diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index c26da03a89a..7f34a55638f 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2630,7 +2630,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_packsswb_128: case Intrinsic::x86_avx2_packssdw: case Intrinsic::x86_avx2_packsswb: - // TODO Add support for Intrinsic::x86_avx512_mask_packss* + case Intrinsic::x86_avx512_packssdw_512: + case Intrinsic::x86_avx512_packsswb_512: if (Value *V = simplifyX86pack(*II, *this, *Builder, true)) return replaceInstUsesWith(*II, V); break; @@ -2639,7 +2640,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse41_packusdw: case Intrinsic::x86_avx2_packusdw: case Intrinsic::x86_avx2_packuswb: - // TODO Add support for Intrinsic::x86_avx512_mask_packus* + case Intrinsic::x86_avx512_packusdw_512: + case Intrinsic::x86_avx512_packuswb_512: if (Value *V = simplifyX86pack(*II, *this, *Builder, false)) return replaceInstUsesWith(*II, V); break; diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 42ce76b7cc2..e641acbfbd9 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1482,8 +1482,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::x86_avx2_packssdw: case Intrinsic::x86_avx2_packsswb: case Intrinsic::x86_avx2_packusdw: - case Intrinsic::x86_avx2_packuswb: { - // TODO Add support for Intrinsic::x86_avx512_mask_pack* + case Intrinsic::x86_avx2_packuswb: + case Intrinsic::x86_avx512_packssdw_512: + case Intrinsic::x86_avx512_packsswb_512: + case Intrinsic::x86_avx512_packusdw_512: + case Intrinsic::x86_avx512_packuswb_512: { auto *Ty0 = II->getArgOperand(0)->getType(); unsigned InnerVWidth = Ty0->getVectorNumElements(); assert(VWidth == (InnerVWidth * 2) && "Unexpected input size"); diff --git a/test/Transforms/InstCombine/x86-pack.ll b/test/Transforms/InstCombine/x86-pack.ll index 68d5521d47a..f3c41a8aa47 100644 --- a/test/Transforms/InstCombine/x86-pack.ll +++ b/test/Transforms/InstCombine/x86-pack.ll @@ -69,6 +69,38 @@ define <32 x i8> @undef_packuswb_256() { ret <32 x i8> %1 } +define <32 x i16> @undef_packssdw_512() { +; CHECK-LABEL: @undef_packssdw_512( +; CHECK-NEXT: ret <32 x i16> undef +; + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> undef, <16 x i32> undef) + ret <32 x i16> %1 +} + +define <32 x i16> @undef_packusdw_512() { +; CHECK-LABEL: @undef_packusdw_512( +; CHECK-NEXT: ret <32 x i16> undef +; + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> undef) + ret <32 x i16> %1 +} + +define <64 x i8> @undef_packsswb_512() { +; CHECK-LABEL: @undef_packsswb_512( +; CHECK-NEXT: ret <64 x i8> undef +; + %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> undef) + ret <64 x i8> %1 +} + +define <64 x i8> @undef_packuswb_512() { +; CHECK-LABEL: @undef_packuswb_512( +; CHECK-NEXT: ret <64 x i8> undef +; + %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> undef, <32 x i16> undef) + ret <64 x i8> %1 +} + ; ; Constant Folding ; @@ -137,13 +169,45 @@ define <32 x i8> @fold_packuswb_256() { ret <32 x i8> %1 } +define <32 x i16> @fold_packssdw_512() { +; CHECK-LABEL: @fold_packssdw_512( +; CHECK-NEXT: ret <32 x i16> +; + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> , <16 x i32> undef) + ret <32 x i16> %1 +} + +define <32 x i16> @fold_packusdw_512() { +; CHECK-LABEL: @fold_packusdw_512( +; CHECK-NEXT: ret <32 x i16> +; + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> , <16 x i32> ) + ret <32 x i16> %1 +} + +define <64 x i8> @fold_packsswb_512() { +; CHECK-LABEL: @fold_packsswb_512( +; CHECK-NEXT: ret <64 x i8> +; + %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> zeroinitializer) + ret <64 x i8> %1 +} + +define <64 x i8> @fold_packuswb_512() { +; CHECK-LABEL: @fold_packuswb_512( +; CHECK-NEXT: ret <64 x i8> +; + %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> zeroinitializer, <32 x i16> ) + ret <64 x i8> %1 +} + ; ; Demanded Elts ; define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_128( -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> undef) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> undef) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; @@ -156,7 +220,7 @@ define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_128( -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = insertelement <4 x i32> %a0, i32 0, i32 0 @@ -190,7 +254,7 @@ define <16 x i8> @elts_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_256( -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> undef) +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> undef) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> @@ -202,7 +266,7 @@ define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { define <16 x i16> @elts_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> undef, <8 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> undef, <16 x i32> ; CHECK-NEXT: ret <16 x i16> [[TMP3]] @@ -236,6 +300,56 @@ define <32 x i8> @elts_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { ret <32 x i8> %4 } +define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @elts_packssdw_512( +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> undef) +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> + %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> + %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2) + %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> + ret <32 x i16> %4 +} + +define <32 x i16> @elts_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @elts_packusdw_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> undef, <32 x i32> +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> + %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> + %3 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %1, <16 x i32> %2) + %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> + ret <32 x i16> %4 +} + +define <64 x i8> @elts_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { +; CHECK-LABEL: @elts_packsswb_512( +; CHECK-NEXT: ret <64 x i8> zeroinitializer +; + %1 = insertelement <32 x i16> %a0, i16 0, i32 0 + %2 = insertelement <32 x i16> %a1, i16 0, i32 8 + %3 = insertelement <32 x i16> %1, i16 0, i32 16 + %4 = insertelement <32 x i16> %2, i16 0, i32 24 + %5 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %3, <32 x i16> %4) + %6 = shufflevector <64 x i8> %5, <64 x i8> undef, <64 x i32> + ret <64 x i8> %6 +} + +define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { +; CHECK-LABEL: @elts_packuswb_512( +; CHECK-NEXT: ret <64 x i8> undef +; + %1 = insertelement <32 x i16> undef, i16 0, i32 1 + %2 = insertelement <32 x i16> undef, i16 0, i32 0 + %3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2) + %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> zeroinitializer + ret <64 x i8> %4 +} + declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone @@ -245,3 +359,8 @@ declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readno declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone + +declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) nounwind readnone +declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) nounwind readnone +declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) nounwind readnone +declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) nounwind readnone