mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-03 02:53:06 +00:00
[X86] Remove sse2 pshufd/pshuflw/pshufhw intrinsics and upgrade them to shufflevector.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272510 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
de9d1e0d13
commit
b2cfb64e72
@ -662,15 +662,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_ssse3_pshuf_b_128 : GCCBuiltin<"__builtin_ia32_pshufb128">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
|
||||
llvm_v16i8_ty], [IntrNoMem]>;
|
||||
def int_x86_sse2_pshuf_d :
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_sse2_pshufl_w :
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_sse2_pshufh_w :
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_sse_pshuf_w : GCCBuiltin<"__builtin_ia32_pshufw">,
|
||||
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
@ -177,6 +177,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
Name.startswith("x86.avx2.vbroadcast") ||
|
||||
Name.startswith("x86.avx2.pbroadcast") ||
|
||||
Name.startswith("x86.avx.vpermil.") ||
|
||||
Name.startswith("x86.sse2.pshuf") ||
|
||||
Name.startswith("x86.sse41.pmovsx") ||
|
||||
Name.startswith("x86.sse41.pmovzx") ||
|
||||
Name.startswith("x86.avx2.pmovsx") ||
|
||||
@ -880,7 +881,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs);
|
||||
} else if (Name == "llvm.stackprotectorcheck") {
|
||||
Rep = nullptr;
|
||||
} else if (Name.startswith("llvm.x86.avx.vpermil.")) {
|
||||
} else if (Name.startswith("llvm.x86.avx.vpermil.") ||
|
||||
Name == "llvm.x86.sse2.pshuf.d") {
|
||||
Value *Op0 = CI->getArgOperand(0);
|
||||
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
|
||||
VectorType *VecTy = cast<VectorType>(CI->getType());
|
||||
@ -896,6 +898,34 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
for (unsigned i = 0; i != NumElts; ++i)
|
||||
Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
|
||||
|
||||
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
|
||||
} else if (Name == "llvm.x86.sse2.pshufl.w") {
|
||||
Value *Op0 = CI->getArgOperand(0);
|
||||
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
|
||||
unsigned NumElts = CI->getType()->getVectorNumElements();
|
||||
|
||||
SmallVector<uint32_t, 16> Idxs(NumElts);
|
||||
for (unsigned l = 0; l != NumElts; l += 8) {
|
||||
for (unsigned i = 0; i != 4; ++i)
|
||||
Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
|
||||
for (unsigned i = 4; i != 8; ++i)
|
||||
Idxs[i + l] = i + l;
|
||||
}
|
||||
|
||||
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
|
||||
} else if (Name == "llvm.x86.sse2.pshufh.w") {
|
||||
Value *Op0 = CI->getArgOperand(0);
|
||||
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
|
||||
unsigned NumElts = CI->getType()->getVectorNumElements();
|
||||
|
||||
SmallVector<uint32_t, 16> Idxs(NumElts);
|
||||
for (unsigned l = 0; l != NumElts; l += 8) {
|
||||
for (unsigned i = 0; i != 4; ++i)
|
||||
Idxs[i + l] = i + l;
|
||||
for (unsigned i = 0; i != 4; ++i)
|
||||
Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
|
||||
}
|
||||
|
||||
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
|
||||
} else {
|
||||
llvm_unreachable("Unknown function for CallInst upgrade.");
|
||||
|
@ -2133,9 +2133,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
|
||||
X86_INTRINSIC_DATA(sse2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
|
||||
X86_INTRINSIC_DATA(sse2_psad_bw, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
|
||||
X86_INTRINSIC_DATA(sse2_pshuf_d, INTR_TYPE_2OP, X86ISD::PSHUFD, 0),
|
||||
X86_INTRINSIC_DATA(sse2_pshufh_w, INTR_TYPE_2OP, X86ISD::PSHUFHW, 0),
|
||||
X86_INTRINSIC_DATA(sse2_pshufl_w, INTR_TYPE_2OP, X86ISD::PSHUFLW, 0),
|
||||
X86_INTRINSIC_DATA(sse2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),
|
||||
X86_INTRINSIC_DATA(sse2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),
|
||||
X86_INTRINSIC_DATA(sse2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),
|
||||
|
@ -138,4 +138,35 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
|
||||
}
|
||||
declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
|
||||
|
||||
define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
|
||||
; CHECK-LABEL: test_x86_sse2_pshuf_d:
|
||||
; CHECK: ## BB#0: ## %entry
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
|
||||
; CHECK-NEXT: retl
|
||||
entry:
|
||||
%res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
|
||||
|
||||
define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
|
||||
; CHECK-LABEL: test_x86_sse2_pshufl_w:
|
||||
; CHECK: ## BB#0: ## %entry
|
||||
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
|
||||
; CHECK-NEXT: retl
|
||||
entry:
|
||||
%res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
|
||||
|
||||
define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
|
||||
; CHECK-LABEL: test_x86_sse2_pshufh_w:
|
||||
; CHECK: ## BB#0: ## %entry
|
||||
; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
|
||||
; CHECK-NEXT: retl
|
||||
entry:
|
||||
%res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
|
||||
|
@ -1266,51 +1266,3 @@ define void @test_x86_sse2_pause() {
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.x86.sse2.pause() nounwind
|
||||
|
||||
define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
|
||||
; SSE-LABEL: test_x86_sse2_pshuf_d:
|
||||
; SSE: ## BB#0: ## %entry
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
|
||||
; SSE-NEXT: retl
|
||||
;
|
||||
; KNL-LABEL: test_x86_sse2_pshuf_d:
|
||||
; KNL: ## BB#0: ## %entry
|
||||
; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
|
||||
; KNL-NEXT: retl
|
||||
entry:
|
||||
%res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
|
||||
|
||||
define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
|
||||
; SSE-LABEL: test_x86_sse2_pshufl_w:
|
||||
; SSE: ## BB#0: ## %entry
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
|
||||
; SSE-NEXT: retl
|
||||
;
|
||||
; KNL-LABEL: test_x86_sse2_pshufl_w:
|
||||
; KNL: ## BB#0: ## %entry
|
||||
; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
|
||||
; KNL-NEXT: retl
|
||||
entry:
|
||||
%res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
|
||||
|
||||
define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
|
||||
; SSE-LABEL: test_x86_sse2_pshufh_w:
|
||||
; SSE: ## BB#0: ## %entry
|
||||
; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
|
||||
; SSE-NEXT: retl
|
||||
;
|
||||
; KNL-LABEL: test_x86_sse2_pshufh_w:
|
||||
; KNL: ## BB#0: ## %entry
|
||||
; KNL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
|
||||
; KNL-NEXT: retl
|
||||
entry:
|
||||
%res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
|
||||
|
@ -96,10 +96,15 @@ define <4 x i32> @combine_pshufd6(<4 x i32> %a) {
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_pshufd6:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; AVX-NEXT: retq
|
||||
; AVX1-LABEL: combine_pshufd6:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: combine_pshufd6:
|
||||
; AVX2: # BB#0: # %entry
|
||||
; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
entry:
|
||||
%b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 0)
|
||||
%c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 8)
|
||||
|
Loading…
x
Reference in New Issue
Block a user