mirror of
https://github.com/RPCS3/llvm.git
synced 2025-03-09 05:11:39 +00:00
Two fixes to the vpermilvar optimization.
The instcomine logic to handle vpermilvar's pd and 256 variants was incorrect. The _256 variants have indexes into the individual 128 bit lanes and in all cases it also has to mask out unused bits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207577 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6149bc1e10
commit
984f2fc09e
@ -725,9 +725,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||||||
// Convert vpermil* to shufflevector if the mask is constant.
|
// Convert vpermil* to shufflevector if the mask is constant.
|
||||||
Value *V = II->getArgOperand(1);
|
Value *V = II->getArgOperand(1);
|
||||||
if (auto C = dyn_cast<ConstantDataVector>(V)) {
|
if (auto C = dyn_cast<ConstantDataVector>(V)) {
|
||||||
|
unsigned Size = C->getNumElements();
|
||||||
|
assert(Size == 8 || Size == 4 || Size == 2);
|
||||||
|
uint32_t Indexes[8];
|
||||||
|
|
||||||
|
// The intrinsics only read one or two bits, clear the rest.
|
||||||
|
for (unsigned I = 0; I < Size; ++I) {
|
||||||
|
uint32_t Index = C->getElementAsInteger(I) & 0x3;
|
||||||
|
if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd ||
|
||||||
|
II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256)
|
||||||
|
Index >>= 1;
|
||||||
|
Indexes[I] = Index;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The _256 variants are a bit trickier since the mask bits always index
|
||||||
|
// into the corresponding 128 half. In order to convert to a generic
|
||||||
|
// shuffle, we have to make that explicit.
|
||||||
|
if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
|
||||||
|
II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) {
|
||||||
|
for (unsigned I = Size / 2; I < Size; ++I)
|
||||||
|
Indexes[I] += Size / 2;
|
||||||
|
}
|
||||||
|
auto NewC =
|
||||||
|
ConstantDataVector::get(C->getContext(), makeArrayRef(Indexes, Size));
|
||||||
auto V1 = II->getArgOperand(0);
|
auto V1 = II->getArgOperand(0);
|
||||||
auto V2 = UndefValue::get(V1->getType());
|
auto V2 = UndefValue::get(V1->getType());
|
||||||
auto Shuffle = Builder->CreateShuffleVector(V1, V2, C);
|
auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC);
|
||||||
return ReplaceInstUsesWith(CI, Shuffle);
|
return ReplaceInstUsesWith(CI, Shuffle);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -318,7 +318,7 @@ define <4 x float> @test_vpermilvar_ps(<4 x float> %v) {
|
|||||||
declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>)
|
declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>)
|
||||||
define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) {
|
define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) {
|
||||||
; CHECK-LABEL: @test_vpermilvar_ps_256(
|
; CHECK-LABEL: @test_vpermilvar_ps_256(
|
||||||
; CHECK: shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
|
; CHECK: shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
|
||||||
%a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
|
%a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
|
||||||
ret <8 x float> %a
|
ret <8 x float> %a
|
||||||
}
|
}
|
||||||
@ -327,15 +327,15 @@ declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i32>)
|
|||||||
define <2 x double> @test_vpermilvar_pd(<2 x double> %v) {
|
define <2 x double> @test_vpermilvar_pd(<2 x double> %v) {
|
||||||
; CHECK-LABEL: @test_vpermilvar_pd(
|
; CHECK-LABEL: @test_vpermilvar_pd(
|
||||||
; CHECK: shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 0>
|
; CHECK: shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 0>
|
||||||
%a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i32> <i32 1, i32 0>)
|
%a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i32> <i32 2, i32 0>)
|
||||||
ret <2 x double> %a
|
ret <2 x double> %a
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i32>)
|
declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i32>)
|
||||||
define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
|
define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
|
||||||
; CHECK-LABEL: @test_vpermilvar_pd_256(
|
; CHECK-LABEL: @test_vpermilvar_pd_256(
|
||||||
; CHECK: shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
; CHECK: shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
||||||
%a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
|
%a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i32> <i32 3, i32 1, i32 2, i32 0>)
|
||||||
ret <4 x double> %a
|
ret <4 x double> %a
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user