[X86] Remove masking from 512-bit PSHUFB intrinsics in preparation for being able to constant fold it in InstCombineCalls like we do for 128/256-bit.

llvm-svn: 289344
This commit is contained in:
Craig Topper 2016-12-10 23:09:43 +00:00
parent d01a59dea2
commit a134317d69
6 changed files with 77 additions and 32 deletions

View File

@ -1328,10 +1328,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty],
[IntrNoMem]>; [IntrNoMem]>;
def int_x86_avx512_mask_pshuf_b_512 : def int_x86_avx512_pshuf_b_512 :
GCCBuiltin<"__builtin_ia32_pshufb512_mask">, GCCBuiltin<"__builtin_ia32_pshufb512">,
Intrinsic<[llvm_v64i8_ty], Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
[IntrNoMem]>; [IntrNoMem]>;
def int_x86_avx512_mask_shuf_f32x4_256 : def int_x86_avx512_mask_shuf_f32x4_256 :

View File

@ -258,8 +258,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "sse2.pminu.b" || // Added in 3.9 Name == "sse2.pminu.b" || // Added in 3.9
Name == "sse41.pminuw" || // Added in 3.9 Name == "sse41.pminuw" || // Added in 3.9
Name == "sse41.pminud" || // Added in 3.9 Name == "sse41.pminud" || // Added in 3.9
Name == "avx512.mask.pshuf.b.128" || // Added in 4.0 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
Name == "avx512.mask.pshuf.b.256" || // Added in 4.0
Name.startswith("avx2.pmax") || // Added in 3.9 Name.startswith("avx2.pmax") || // Added in 3.9
Name.startswith("avx2.pmin") || // Added in 3.9 Name.startswith("avx2.pmin") || // Added in 3.9
Name.startswith("avx512.mask.pmax") || // Added in 4.0 Name.startswith("avx512.mask.pmax") || // Added in 4.0
@ -1451,6 +1450,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
IID = Intrinsic::x86_ssse3_pshuf_b_128; IID = Intrinsic::x86_ssse3_pshuf_b_128;
else if (VecTy->getPrimitiveSizeInBits() == 256) else if (VecTy->getPrimitiveSizeInBits() == 256)
IID = Intrinsic::x86_avx2_pshuf_b; IID = Intrinsic::x86_avx2_pshuf_b;
else if (VecTy->getPrimitiveSizeInBits() == 512)
IID = Intrinsic::x86_avx512_pshuf_b_512;
else else
llvm_unreachable("Unexpected intrinsic"); llvm_unreachable("Unexpected intrinsic");

View File

@ -1055,8 +1055,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_prorv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0), X86_INTRINSIC_DATA(avx512_mask_prorv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
X86_INTRINSIC_DATA(avx512_mask_prorv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0), X86_INTRINSIC_DATA(avx512_mask_prorv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
X86_INTRINSIC_DATA(avx512_mask_prorv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0), X86_INTRINSIC_DATA(avx512_mask_prorv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
@ -1456,6 +1454,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_512, FMA_OP_MASKZ, X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_512, FMA_OP_MASKZ,
X86ISD::VPMADD52L, 0), X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0), X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
X86_INTRINSIC_DATA(avx512_pshuf_b_512, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_psll_d_512, INTR_TYPE_2OP, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_psll_d_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_psll_q_512, INTR_TYPE_2OP, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_psll_q_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_psll_w_512, INTR_TYPE_2OP, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_psll_w_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),

View File

@ -968,3 +968,27 @@ define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1,
ret <32 x i16> %res4 ret <32 x i16> %res4
} }
declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm3
; AVX512BW-NEXT: kmovq %rdi, %k1
; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
; AVX512BW-NEXT: vpaddb %zmm3, %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm3
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpaddb %zmm3, %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res2 = add <64 x i8> %res, %res1
ret <64 x i8> %res2
}

View File

@ -1747,29 +1747,57 @@ define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16>
ret <32 x i16> %res2 ret <32 x i16> %res2
} }
declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)
define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512: ; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
ret <64 x i8> %res
}
define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %mask) {
; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
; AVX512BW: ## BB#0: ; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rdi, %k1 ; AVX512BW-NEXT: kmovq %rdi, %k1
; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
; ;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512: ; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} ; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) %mask.cast = bitcast i64 %mask to <64 x i1>
%res2 = add <64 x i8> %res, %res1 %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2
ret <64 x i8> %res2
}
define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> %x1, i64 %mask) {
; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rdi, %k1
; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
%mask.cast = bitcast i64 %mask to <64 x i1>
%res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer
ret <64 x i8> %res2 ret <64 x i8> %res2
} }

View File

@ -593,9 +593,7 @@ define <64 x i8> @combine_pshufb_identity_mask(<64 x i8> %x0, i64 %m) {
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; X32-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; X32-NEXT: vmovdqu8 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] ; X32-NEXT: vmovdqu8 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; X32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; X32-NEXT: kunpckdq %k0, %k1, %k1
; X32-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 ; X32-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3
; X32-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1} ; X32-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1}
; X32-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1} ; X32-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1}
@ -604,9 +602,9 @@ define <64 x i8> @combine_pshufb_identity_mask(<64 x i8> %x0, i64 %m) {
; ;
; X64-LABEL: combine_pshufb_identity_mask: ; X64-LABEL: combine_pshufb_identity_mask:
; X64: # BB#0: ; X64: # BB#0:
; X64-NEXT: kmovq %rdi, %k1
; X64-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; X64-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; X64-NEXT: vmovdqu8 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] ; X64-NEXT: vmovdqu8 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-NEXT: kmovq %rdi, %k1
; X64-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 ; X64-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3
; X64-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1} ; X64-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1}
; X64-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1} ; X64-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1}
@ -759,9 +757,7 @@ define <64 x i8> @combine_pshufb_as_pslldq(<64 x i8> %a0) {
define <64 x i8> @combine_pshufb_as_pslldq_mask(<64 x i8> %a0, i64 %m) { define <64 x i8> @combine_pshufb_as_pslldq_mask(<64 x i8> %a0, i64 %m) {
; X32-LABEL: combine_pshufb_as_pslldq_mask: ; X32-LABEL: combine_pshufb_as_pslldq_mask:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; X32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; X32-NEXT: kunpckdq %k0, %k1, %k1
; X32-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53] ; X32-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53]
; X32-NEXT: retl ; X32-NEXT: retl
; ;
@ -790,9 +786,7 @@ define <64 x i8> @combine_pshufb_as_psrldq(<64 x i8> %a0) {
define <64 x i8> @combine_pshufb_as_psrldq_mask(<64 x i8> %a0, i64 %m) { define <64 x i8> @combine_pshufb_as_psrldq_mask(<64 x i8> %a0, i64 %m) {
; X32-LABEL: combine_pshufb_as_psrldq_mask: ; X32-LABEL: combine_pshufb_as_psrldq_mask:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; X32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; X32-NEXT: kunpckdq %k0, %k1, %k1
; X32-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X32-NEXT: retl ; X32-NEXT: retl
; ;