mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-11 15:07:58 +00:00
[AVX-512] Remove 128/256 masked pshufb intrinsics. Autoupgrade them to legacy intrinsics and a select.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286089 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
cb096f0a4e
commit
9fd28c59b7
@ -1376,18 +1376,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
[llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
[llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
|
|
||||||
def int_x86_avx512_mask_pshuf_b_128 :
|
|
||||||
GCCBuiltin<"__builtin_ia32_pshufb128_mask">,
|
|
||||||
Intrinsic<[llvm_v16i8_ty],
|
|
||||||
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_pshuf_b_256 :
|
|
||||||
GCCBuiltin<"__builtin_ia32_pshufb256_mask">,
|
|
||||||
Intrinsic<[llvm_v32i8_ty],
|
|
||||||
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_pshuf_b_512 :
|
def int_x86_avx512_mask_pshuf_b_512 :
|
||||||
GCCBuiltin<"__builtin_ia32_pshufb512_mask">,
|
GCCBuiltin<"__builtin_ia32_pshufb512_mask">,
|
||||||
Intrinsic<[llvm_v64i8_ty],
|
Intrinsic<[llvm_v64i8_ty],
|
||||||
|
@ -244,6 +244,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
|||||||
Name == "sse2.pminu.b" ||
|
Name == "sse2.pminu.b" ||
|
||||||
Name == "sse41.pminuw" ||
|
Name == "sse41.pminuw" ||
|
||||||
Name == "sse41.pminud" ||
|
Name == "sse41.pminud" ||
|
||||||
|
Name == "avx512.mask.pshuf.b.128" ||
|
||||||
|
Name == "avx512.mask.pshuf.b.256" ||
|
||||||
Name.startswith("avx2.pmax") ||
|
Name.startswith("avx2.pmax") ||
|
||||||
Name.startswith("avx2.pmin") ||
|
Name.startswith("avx2.pmin") ||
|
||||||
Name.startswith("avx512.mask.pmax") ||
|
Name.startswith("avx512.mask.pmax") ||
|
||||||
@ -1376,6 +1378,20 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||||||
Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
|
Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||||
CI->getArgOperand(2));
|
CI->getArgOperand(2));
|
||||||
|
} else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
|
||||||
|
VectorType *VecTy = cast<VectorType>(CI->getType());
|
||||||
|
Intrinsic::ID IID;
|
||||||
|
if (VecTy->getPrimitiveSizeInBits() == 128)
|
||||||
|
IID = Intrinsic::x86_ssse3_pshuf_b_128;
|
||||||
|
else if (VecTy->getPrimitiveSizeInBits() == 256)
|
||||||
|
IID = Intrinsic::x86_avx2_pshuf_b;
|
||||||
|
else
|
||||||
|
llvm_unreachable("Unexpected intrinsic");
|
||||||
|
|
||||||
|
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
|
||||||
|
{ CI->getArgOperand(0), CI->getArgOperand(1) });
|
||||||
|
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||||
|
CI->getArgOperand(2));
|
||||||
} else if (IsX86 && Name == "avx512.mask.psll.d.128") {
|
} else if (IsX86 && Name == "avx512.mask.psll.d.128") {
|
||||||
Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psll_d);
|
Rep = UpgradeX86MaskedShift(Builder, *CI, Intrinsic::x86_sse2_psll_d);
|
||||||
} else if (IsX86 && Name == "avx512.mask.psll.d.256") {
|
} else if (IsX86 && Name == "avx512.mask.psll.d.256") {
|
||||||
|
@ -1140,10 +1140,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||||||
X86_INTRINSIC_DATA(avx512_mask_prorv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
|
X86_INTRINSIC_DATA(avx512_mask_prorv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
|
||||||
X86_INTRINSIC_DATA(avx512_mask_prorv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
|
X86_INTRINSIC_DATA(avx512_mask_prorv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
|
||||||
X86_INTRINSIC_DATA(avx512_mask_prorv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
|
X86_INTRINSIC_DATA(avx512_mask_prorv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
|
||||||
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_128, INTR_TYPE_2OP_MASK,
|
|
||||||
X86ISD::PSHUFB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_256, INTR_TYPE_2OP_MASK,
|
|
||||||
X86ISD::PSHUFB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
|
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
|
||||||
X86ISD::PSHUFB, 0),
|
X86ISD::PSHUFB, 0),
|
||||||
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
|
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
|
||||||
|
@ -1704,3 +1704,36 @@ define <16 x i16>@test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1,
|
|||||||
ret <16 x i16> %res4
|
ret <16 x i16> %res4
|
||||||
}
|
}
|
||||||
|
|
||||||
|
declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||||
|
|
||||||
|
define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
|
||||||
|
; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_b_128:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x00,0xd9]
|
||||||
|
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||||
|
; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1]
|
||||||
|
; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc3]
|
||||||
|
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||||
|
%res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
|
||||||
|
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
|
||||||
|
%res2 = add <16 x i8> %res, %res1
|
||||||
|
ret <16 x i8> %res2
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||||
|
|
||||||
|
define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
|
||||||
|
; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_b_256:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x00,0xd9]
|
||||||
|
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||||
|
; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1]
|
||||||
|
; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc3]
|
||||||
|
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||||
|
%res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
|
||||||
|
%res1 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
|
||||||
|
%res2 = add <32 x i8> %res, %res1
|
||||||
|
ret <32 x i8> %res2
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -3871,38 +3871,6 @@ define <16 x i16>@test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16>
|
|||||||
ret <16 x i16> %res2
|
ret <16 x i16> %res2
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
|
||||||
|
|
||||||
define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
|
|
||||||
; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_b_128:
|
|
||||||
; CHECK: ## BB#0:
|
|
||||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
|
||||||
; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1]
|
|
||||||
; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x00,0xc1]
|
|
||||||
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0]
|
|
||||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
|
||||||
%res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
|
|
||||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
|
|
||||||
%res2 = add <16 x i8> %res, %res1
|
|
||||||
ret <16 x i8> %res2
|
|
||||||
}
|
|
||||||
|
|
||||||
declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
|
||||||
|
|
||||||
define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
|
|
||||||
; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_b_256:
|
|
||||||
; CHECK: ## BB#0:
|
|
||||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
|
||||||
; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1]
|
|
||||||
; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x00,0xc1]
|
|
||||||
; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0]
|
|
||||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
|
||||||
%res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
|
|
||||||
%res1 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
|
|
||||||
%res2 = add <32 x i8> %res, %res1
|
|
||||||
ret <32 x i8> %res2
|
|
||||||
}
|
|
||||||
|
|
||||||
declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16)
|
declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16)
|
||||||
|
|
||||||
define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
|
define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user