mirror of
https://github.com/RPCS3/llvm.git
synced 2025-03-01 15:19:05 +00:00
[x86][AVX512] Lowering Broadcastm intrinsics to LLVM IR
This patch, together with a matching clang patch (https://reviews.llvm.org/D38683), implements the lowering of X86 broadcastm intrinsics to IR. Differential Revision: https://reviews.llvm.org/D38684 Change-Id: I709ac0b34641095397e994c8ff7e15d1315b3540 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317458 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2513e7c5ce
commit
028edca122
@ -78,6 +78,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
||||
Name=="ssse3.pabs.d.128" || // Added in 6.0
|
||||
Name.startswith("avx2.pabs.") || // Added in 6.0
|
||||
Name.startswith("avx512.mask.pabs.") || // Added in 6.0
|
||||
Name.startswith("avx512.broadcastm") || // Added in 6.0
|
||||
Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
|
||||
Name.startswith("sse2.pcmpeq.") || // Added in 3.1
|
||||
Name.startswith("sse2.pcmpgt.") || // Added in 3.1
|
||||
@ -1027,7 +1028,15 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
|
||||
CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
|
||||
} else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
|
||||
Type *ExtTy = Type::getInt32Ty(C);
|
||||
if (CI->getOperand(0)->getType()->isIntegerTy(8))
|
||||
ExtTy = Type::getInt64Ty(C);
|
||||
unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
|
||||
ExtTy->getPrimitiveSizeInBits();
|
||||
Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
|
||||
Rep = Builder.CreateVectorSplat(NumElts, Rep);
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))) {
|
||||
unsigned NumElts =
|
||||
CI->getArgOperand(1)->getType()->getVectorNumElements();
|
||||
Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
|
||||
|
@ -6746,6 +6746,9 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
|
||||
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
|
||||
"Unsupported vector type for broadcast.");
|
||||
|
||||
BitVector UndefElements;
|
||||
SDValue Ld = BVOp->getSplatValue(&UndefElements);
|
||||
|
||||
// Attempt to use VBROADCASTM
|
||||
// From this paterrn:
|
||||
// a. t0 = (zext_i64 (bitcast_i8 v2i1 X))
|
||||
@ -6753,17 +6756,23 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
|
||||
//
|
||||
// Create (VBROADCASTM v2i1 X)
|
||||
if (Subtarget.hasCDI() && (VT.is512BitVector() || Subtarget.hasVLX())) {
|
||||
MVT EltType;
|
||||
unsigned NumElts;
|
||||
MVT EltType = VT.getScalarType();
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
SDValue BOperand;
|
||||
SDValue ZeroExtended = isSplatZeroExtended(BVOp, NumElts, EltType);
|
||||
if (ZeroExtended && ZeroExtended.getOpcode() == ISD::BITCAST) {
|
||||
SDValue BOperand = ZeroExtended.getOperand(0);
|
||||
if ((ZeroExtended && ZeroExtended.getOpcode() == ISD::BITCAST) ||
|
||||
(Ld && Ld.getOpcode() == ISD::ZERO_EXTEND &&
|
||||
Ld.getOperand(0).getOpcode() == ISD::BITCAST)) {
|
||||
if (ZeroExtended)
|
||||
BOperand = ZeroExtended.getOperand(0);
|
||||
else
|
||||
BOperand = Ld.getOperand(0).getOperand(0);
|
||||
if (BOperand.getValueType().isVector() &&
|
||||
BOperand.getSimpleValueType().getVectorElementType() == MVT::i1) {
|
||||
if ((EltType == MVT::i64 &&
|
||||
VT.getVectorElementType() == MVT::i8) || // for broadcastmb2q
|
||||
(EltType == MVT::i32 &&
|
||||
VT.getVectorElementType() == MVT::i16)) { // for broadcastmw2d
|
||||
if ((EltType == MVT::i64 && (VT.getVectorElementType() == MVT::i8 ||
|
||||
NumElts == 8)) || // for broadcastmb2q
|
||||
(EltType == MVT::i32 && (VT.getVectorElementType() == MVT::i16 ||
|
||||
NumElts == 16))) { // for broadcastmw2d
|
||||
SDValue Brdcst =
|
||||
DAG.getNode(X86ISD::VBROADCASTM, dl,
|
||||
MVT::getVectorVT(EltType, NumElts), BOperand);
|
||||
@ -6773,9 +6782,6 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
|
||||
}
|
||||
}
|
||||
|
||||
BitVector UndefElements;
|
||||
SDValue Ld = BVOp->getSplatValue(&UndefElements);
|
||||
|
||||
// We need a splat of a single value to use broadcast, and it doesn't
|
||||
// make any sense if the value is only in one element of the vector.
|
||||
if (!Ld || (VT.getVectorNumElements() - UndefElements.count()) <= 1) {
|
||||
|
@ -422,12 +422,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_broadcastmb_128, BROADCASTM, X86ISD::VBROADCASTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_broadcastmb_256, BROADCASTM, X86ISD::VBROADCASTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_broadcastmb_512, BROADCASTM, X86ISD::VBROADCASTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_broadcastmw_128, BROADCASTM, X86ISD::VBROADCASTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_broadcastmw_256, BROADCASTM, X86ISD::VBROADCASTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_broadcastmw_512, BROADCASTM, X86ISD::VBROADCASTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtb2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtb2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
|
||||
X86_INTRINSIC_DATA(avx512_cvtb2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
|
||||
|
37
test/CodeGen/X86/avx512cd-intrinsics-fast-isel.ll
Normal file
37
test/CodeGen/X86/avx512cd-intrinsics-fast-isel.ll
Normal file
@ -0,0 +1,37 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512cd | FileCheck %s
|
||||
|
||||
define <8 x i64> @test_mm512_broadcastmb_epi64(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_mm512_broadcastmb_epi64:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: vpbroadcastmb2q %k0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = icmp eq <8 x i64> %a, %b
|
||||
%1 = bitcast <8 x i1> %0 to i8
|
||||
%conv.i = zext i8 %1 to i64
|
||||
%vecinit.i.i = insertelement <8 x i64> undef, i64 %conv.i, i32 0
|
||||
%vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x i64> %vecinit7.i.i
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mm512_broadcastmw_epi32(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_mm512_broadcastmw_epi32:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: vpbroadcastmw2d %k0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %a to <16 x i32>
|
||||
%1 = bitcast <8 x i64> %b to <16 x i32>
|
||||
%2 = icmp eq <16 x i32> %0, %1
|
||||
%3 = bitcast <16 x i1> %2 to i16
|
||||
%conv.i = zext i16 %3 to i32
|
||||
%vecinit.i.i = insertelement <16 x i32> undef, i32 %conv.i, i32 0
|
||||
%vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%4 = bitcast <16 x i32> %vecinit15.i.i to <8 x i64>
|
||||
ret <8 x i64> %4
|
||||
}
|
||||
|
||||
|
@ -45,3 +45,26 @@ define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_x86_vbroadcastmw_512(i16 %a0) {
|
||||
; CHECK-LABEL: test_x86_vbroadcastmw_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzwl %di, %eax
|
||||
; CHECK-NEXT: vpbroadcastd %eax, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16 %a0)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
declare <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16)
|
||||
|
||||
define <8 x i64> @test_x86_broadcastmb_512(i8 %a0) {
|
||||
; CHECK-LABEL: test_x86_broadcastmb_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: vpbroadcastq %rax, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8 %a0)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
declare <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8)
|
||||
|
||||
|
@ -1,28 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s
|
||||
|
||||
define <16 x i32> @test_x86_vbroadcastmw_512(i16 %a0) {
|
||||
; CHECK-LABEL: test_x86_vbroadcastmw_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k0
|
||||
; CHECK-NEXT: vpbroadcastmw2d %k0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16 %a0)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
declare <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16)
|
||||
|
||||
define <8 x i64> @test_x86_broadcastmb_512(i8 %a0) {
|
||||
; CHECK-LABEL: test_x86_broadcastmb_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k0
|
||||
; CHECK-NEXT: vpbroadcastmb2q %k0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8 %a0)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
declare <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8)
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
|
||||
|
||||
define <8 x i64> @test_conflict_q(<8 x i64> %a) {
|
||||
|
@ -69,3 +69,47 @@ define <4 x i64>@test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64>
|
||||
ret <4 x i64> %res2
|
||||
}
|
||||
|
||||
define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) {
|
||||
; CHECK-LABEL: test_x86_vbroadcastmw_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzwl %di, %eax
|
||||
; CHECK-NEXT: vpbroadcastd %eax, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ;
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16)
|
||||
|
||||
define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) {
|
||||
; CHECK-LABEL: test_x86_vbroadcastmw_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzwl %di, %eax
|
||||
; CHECK-NEXT: vpbroadcastd %eax, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ;
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16)
|
||||
|
||||
define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) {
|
||||
; CHECK-LABEL: test_x86_broadcastmb_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: vpbroadcastq %rax, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ;
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8)
|
||||
|
||||
define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) {
|
||||
; CHECK-LABEL: test_x86_broadcastmb_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: vpbroadcastq %rax, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ;
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8)
|
||||
|
||||
|
@ -147,46 +147,3 @@ define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i
|
||||
ret <4 x i64> %res2
|
||||
}
|
||||
|
||||
define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) {
|
||||
; CHECK-LABEL: test_x86_vbroadcastmw_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k0
|
||||
; CHECK-NEXT: vpbroadcastmw2d %k0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ;
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16)
|
||||
|
||||
define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) {
|
||||
; CHECK-LABEL: test_x86_vbroadcastmw_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k0
|
||||
; CHECK-NEXT: vpbroadcastmw2d %k0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ;
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16)
|
||||
|
||||
define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) {
|
||||
; CHECK-LABEL: test_x86_broadcastmb_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k0
|
||||
; CHECK-NEXT: vpbroadcastmb2q %k0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ;
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8)
|
||||
|
||||
define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) {
|
||||
; CHECK-LABEL: test_x86_broadcastmb_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k0
|
||||
; CHECK-NEXT: vpbroadcastmb2q %k0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ;
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8)
|
||||
|
75
test/CodeGen/X86/avx512vlcd-intrinsics-fast-isel.ll
Normal file
75
test/CodeGen/X86/avx512vlcd-intrinsics-fast-isel.ll
Normal file
@ -0,0 +1,75 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s
|
||||
|
||||
define <2 x i64> @test_mm_broadcastmb_epi64(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_mm_broadcastmb_epi64:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
|
||||
; CHECK-NEXT: vpbroadcastmb2q %k0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %a to <4 x i32>
|
||||
%1 = bitcast <2 x i64> %b to <4 x i32>
|
||||
%2 = icmp eq <4 x i32> %0, %1
|
||||
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%4 = bitcast <8 x i1> %3 to i8
|
||||
%conv.i = zext i8 %4 to i64
|
||||
%vecinit.i.i = insertelement <2 x i64> undef, i64 %conv.i, i32 0
|
||||
%vecinit1.i.i = shufflevector <2 x i64> %vecinit.i.i, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x i64> %vecinit1.i.i
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_broadcastmb_epi64(<4 x i64> %a, <4 x i64> %b) {
|
||||
; CHECK-LABEL: test_mm256_broadcastmb_epi64:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
|
||||
; CHECK-NEXT: vpbroadcastmb2q %k0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = icmp eq <4 x i64> %a, %b
|
||||
%1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%2 = bitcast <8 x i1> %1 to i8
|
||||
%conv.i = zext i8 %2 to i64
|
||||
%vecinit.i.i = insertelement <4 x i64> undef, i64 %conv.i, i32 0
|
||||
%vecinit3.i.i = shufflevector <4 x i64> %vecinit.i.i, <4 x i64> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x i64> %vecinit3.i.i
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_broadcastmw_epi32(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_mm_broadcastmw_epi32:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: vpbroadcastmw2d %k0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %a to <16 x i32>
|
||||
%1 = bitcast <8 x i64> %b to <16 x i32>
|
||||
%2 = icmp eq <16 x i32> %0, %1
|
||||
%3 = bitcast <16 x i1> %2 to i16
|
||||
%conv.i = zext i16 %3 to i32
|
||||
%vecinit.i.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
|
||||
%vecinit3.i.i = shufflevector <4 x i32> %vecinit.i.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%4 = bitcast <4 x i32> %vecinit3.i.i to <2 x i64>
|
||||
ret <2 x i64> %4
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_broadcastmw_epi32(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_mm256_broadcastmw_epi32:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: vpbroadcastmw2d %k0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %a to <16 x i32>
|
||||
%1 = bitcast <8 x i64> %b to <16 x i32>
|
||||
%2 = icmp eq <16 x i32> %0, %1
|
||||
%3 = bitcast <16 x i1> %2 to i16
|
||||
%conv.i = zext i16 %3 to i32
|
||||
%vecinit.i.i = insertelement <8 x i32> undef, i32 %conv.i, i32 0
|
||||
%vecinit7.i.i = shufflevector <8 x i32> %vecinit.i.i, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
%4 = bitcast <8 x i32> %vecinit7.i.i to <4 x i64>
|
||||
ret <4 x i64> %4
|
||||
}
|
||||
|
||||
|
@ -80,8 +80,7 @@ define <16 x i32> @test_mm512_epi32(<16 x i32> %a, <16 x i32> %b) {
|
||||
; AVX512CD-LABEL: test_mm512_epi32:
|
||||
; AVX512CD: # BB#0: # %entry
|
||||
; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; AVX512CD-NEXT: kmovw %k0, %eax
|
||||
; AVX512CD-NEXT: vpbroadcastd %eax, %zmm0
|
||||
; AVX512CD-NEXT: vpbroadcastmw2d %k0, %zmm0
|
||||
; AVX512CD-NEXT: retq
|
||||
;
|
||||
; AVX512VLCDBW-LABEL: test_mm512_epi32:
|
||||
@ -110,9 +109,7 @@ define <8 x i64> @test_mm512_epi64(<8 x i32> %a, <8 x i32> %b) {
|
||||
; AVX512CD-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
||||
; AVX512CD-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
||||
; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; AVX512CD-NEXT: kmovw %k0, %eax
|
||||
; AVX512CD-NEXT: movzbl %al, %eax
|
||||
; AVX512CD-NEXT: vpbroadcastq %rax, %zmm0
|
||||
; AVX512CD-NEXT: vpbroadcastmb2q %k0, %zmm0
|
||||
; AVX512CD-NEXT: retq
|
||||
;
|
||||
; AVX512VLCDBW-LABEL: test_mm512_epi64:
|
||||
|
Loading…
x
Reference in New Issue
Block a user