mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-30 23:20:54 +00:00
[X86] Remove masking from 512-bit VPERMIL intrinsics in preparation for being able to constant fold them in InstCombineCalls like we do for 128/256-bit.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289350 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a6dba14e5c
commit
2d270b3115
@ -1316,16 +1316,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermilvar_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilvarpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty],
|
||||
def int_x86_avx512_vpermilvar_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilvarpd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8i64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermilvar_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilvarps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty],
|
||||
def int_x86_avx512_vpermilvar_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilvarps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_pshuf_b_512 :
|
||||
|
@ -312,10 +312,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
Name == "avx512.mask.sub.pd.256" || // Added in 4.0
|
||||
Name == "avx512.mask.sub.ps.128" || // Added in 4.0
|
||||
Name == "avx512.mask.sub.ps.256" || // Added in 4.0
|
||||
Name == "avx512.mask.vpermilvar.ps.128" || // Added in 4.0
|
||||
Name == "avx512.mask.vpermilvar.ps.256" || // Added in 4.0
|
||||
Name == "avx512.mask.vpermilvar.pd.128" || // Added in 4.0
|
||||
Name == "avx512.mask.vpermilvar.pd.256" || // Added in 4.0
|
||||
Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
|
||||
Name.startswith("avx512.mask.psll.d") || // Added in 4.0
|
||||
Name.startswith("avx512.mask.psll.q") || // Added in 4.0
|
||||
Name.startswith("avx512.mask.psll.w") || // Added in 4.0
|
||||
@ -1673,6 +1670,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
IID = Intrinsic::x86_avx_vpermilvar_ps_256;
|
||||
else if (Name.endswith("pd.256"))
|
||||
IID = Intrinsic::x86_avx_vpermilvar_pd_256;
|
||||
else if (Name.endswith("ps.512"))
|
||||
IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
|
||||
else if (Name.endswith("pd.512"))
|
||||
IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
|
||||
else
|
||||
llvm_unreachable("Unexpected vpermilvar intrinsic");
|
||||
|
||||
|
@ -1255,10 +1255,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_512, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::VPERMILPV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_512, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::VPERMILPV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
|
||||
@ -1552,6 +1548,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_vcvtss2usi32, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_vpermilvar_pd_512, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_vpermilvar_ps_512, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, X86ISD::FMADD, 0),
|
||||
|
@ -2596,3 +2596,62 @@ define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask)
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
|
||||
%res3 = fadd <8 x double> %res, %res1
|
||||
%res4 = fadd <8 x double> %res2, %res3
|
||||
ret <8 x double> %res4
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
|
||||
%res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
|
||||
%res3 = fadd <16 x float> %res, %res1
|
||||
%res4 = fadd <16 x float> %res2, %res3
|
||||
ret <16 x float> %res4
|
||||
}
|
||||
|
||||
; Test case to make sure we can print shuffle decode comments for constant pool loads.
|
||||
define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
|
||||
; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm1
|
||||
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> %x2, i16 %x3)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> zeroinitializer, i16 %x3)
|
||||
%res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>, <16 x float> %x2, i16 -1)
|
||||
%res3 = fadd <16 x float> %res, %res1
|
||||
%res4 = fadd <16 x float> %res2, %res3
|
||||
ret <16 x float> %res4
|
||||
}
|
||||
|
||||
|
@ -3853,63 +3853,111 @@ define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x flo
|
||||
ret <4 x float> %res13
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
|
||||
declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
|
||||
define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_mask(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_512_mask:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
|
||||
; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vmovapd %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
|
||||
%res3 = fadd <8 x double> %res, %res1
|
||||
%res4 = fadd <8 x double> %res2, %res3
|
||||
ret <8 x double> %res4
|
||||
%res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
|
||||
%mask.cast = bitcast i8 %mask to <8 x i1>
|
||||
%res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> %x2
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
|
||||
define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_maskz(<8 x double> %x0, <8 x i64> %x1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_512_maskz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
|
||||
%mask.cast = bitcast i8 %mask to <8 x i1>
|
||||
%res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> zeroinitializer
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
|
||||
declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_mask:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
|
||||
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
|
||||
%res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
|
||||
%res3 = fadd <16 x float> %res, %res1
|
||||
%res4 = fadd <16 x float> %res2, %res3
|
||||
ret <16 x float> %res4
|
||||
%res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
|
||||
%mask.cast = bitcast i16 %mask to <16 x i1>
|
||||
%res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_maskz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
|
||||
%mask.cast = bitcast i16 %mask to <16 x i1>
|
||||
%res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
; Test case to make sure we can print shuffle decode comments for constant pool loads.
|
||||
define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool:
|
||||
define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_constant_pool:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
|
||||
; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm1
|
||||
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> %x2, i16 %x3)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> zeroinitializer, i16 %x3)
|
||||
%res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>, <16 x float> %x2, i16 -1)
|
||||
%res3 = fadd <16 x float> %res, %res1
|
||||
%res4 = fadd <16 x float> %res2, %res3
|
||||
ret <16 x float> %res4
|
||||
%res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
|
||||
%mask.cast = bitcast i16 %mask to <16 x i1>
|
||||
%res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
|
||||
%mask.cast = bitcast i16 %mask to <16 x i1>
|
||||
%res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i16)
|
||||
|
Loading…
Reference in New Issue
Block a user