mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-12 22:26:14 +00:00
[InstCombine][SSE] Added support to VPERMILVAR to shuffle combine to accept UNDEF elements.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268204 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6f6c7b0654
commit
c244765b2c
@ -604,7 +604,7 @@ static Value *simplifyX86pshufb(const IntrinsicInst &II,
|
||||
"Unexpected number of elements in shuffle mask!");
|
||||
|
||||
// Construct a shuffle mask from constant integers or UNDEFs.
|
||||
Constant *Indexes[32] = { NULL };
|
||||
Constant *Indexes[32] = {NULL};
|
||||
|
||||
// Each byte in the shuffle control mask forms an index to permute the
|
||||
// corresponding byte in the destination operand.
|
||||
@ -644,39 +644,46 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
|
||||
if (!V)
|
||||
return nullptr;
|
||||
|
||||
unsigned Size = cast<VectorType>(V->getType())->getNumElements();
|
||||
assert(Size == 8 || Size == 4 || Size == 2);
|
||||
auto *MaskEltTy = Type::getInt32Ty(II.getContext());
|
||||
unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
|
||||
assert(NumElts == 8 || NumElts == 4 || NumElts == 2);
|
||||
|
||||
// Initialize the resulting shuffle mask to all zeroes.
|
||||
uint32_t Indexes[8] = { 0 };
|
||||
// Construct a shuffle mask from constant integers or UNDEFs.
|
||||
Constant *Indexes[8] = {NULL};
|
||||
|
||||
// The intrinsics only read one or two bits, clear the rest.
|
||||
for (unsigned I = 0; I < Size; ++I) {
|
||||
for (unsigned I = 0; I < NumElts; ++I) {
|
||||
Constant *COp = V->getAggregateElement(I);
|
||||
if (!COp || !isa<ConstantInt>(COp))
|
||||
if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
|
||||
return nullptr;
|
||||
|
||||
int32_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue() & 0x3;
|
||||
if (isa<UndefValue>(COp)) {
|
||||
Indexes[I] = UndefValue::get(MaskEltTy);
|
||||
continue;
|
||||
}
|
||||
|
||||
APInt Index = cast<ConstantInt>(COp)->getValue();
|
||||
Index = Index.zextOrTrunc(32).getLoBits(2);
|
||||
|
||||
// The PD variants uses bit 1 to select per-lane element index, so
|
||||
// shift down to convert to generic shuffle mask index.
|
||||
if (II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd ||
|
||||
II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256)
|
||||
Index >>= 1;
|
||||
Indexes[I] = Index;
|
||||
Index = Index.lshr(1);
|
||||
|
||||
// The _256 variants are a bit trickier since the mask bits always index
|
||||
// into the corresponding 128 half. In order to convert to a generic
|
||||
// shuffle, we have to make that explicit.
|
||||
if ((II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
|
||||
II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) &&
|
||||
((NumElts / 2) <= I)) {
|
||||
Index += APInt(32, NumElts / 2);
|
||||
}
|
||||
|
||||
Indexes[I] = ConstantInt::get(MaskEltTy, Index);
|
||||
}
|
||||
|
||||
// The _256 variants are a bit trickier since the mask bits always index
|
||||
// into the corresponding 128 half. In order to convert to a generic
|
||||
// shuffle, we have to make that explicit.
|
||||
if (II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
|
||||
II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) {
|
||||
for (unsigned I = Size / 2; I < Size; ++I)
|
||||
Indexes[I] += Size / 2;
|
||||
}
|
||||
|
||||
auto ShuffleMask =
|
||||
ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, Size));
|
||||
auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
|
||||
auto V1 = II.getArgOperand(0);
|
||||
auto V2 = UndefValue::get(V1->getType());
|
||||
return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
|
||||
|
@ -117,8 +117,8 @@ define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
|
||||
|
||||
define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) {
|
||||
; CHECK-LABEL: @undef_test_vpermilvar_ps(
|
||||
; CHECK-NEXT: [[A:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>)
|
||||
; CHECK-NEXT: ret <4 x float> [[A]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
;
|
||||
%a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>)
|
||||
ret <4 x float> %a
|
||||
@ -126,8 +126,8 @@ define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) {
|
||||
|
||||
define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) {
|
||||
; CHECK-LABEL: @undef_test_vpermilvar_ps_256(
|
||||
; CHECK-NEXT: [[A:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>)
|
||||
; CHECK-NEXT: ret <8 x float> [[A]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4>
|
||||
; CHECK-NEXT: ret <8 x float> [[TMP1]]
|
||||
;
|
||||
%a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>)
|
||||
ret <8 x float> %a
|
||||
@ -135,8 +135,8 @@ define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) {
|
||||
|
||||
define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) {
|
||||
; CHECK-LABEL: @undef_test_vpermilvar_pd(
|
||||
; CHECK-NEXT: [[A:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 undef, i64 0>)
|
||||
; CHECK-NEXT: ret <2 x double> [[A]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 undef, i32 0>
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP1]]
|
||||
;
|
||||
%a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 undef, i64 0>)
|
||||
ret <2 x double> %a
|
||||
@ -144,8 +144,8 @@ define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) {
|
||||
|
||||
define <4 x double> @undef_test_vpermilvar_pd_256(<4 x double> %v) {
|
||||
; CHECK-LABEL: @undef_test_vpermilvar_pd_256(
|
||||
; CHECK-NEXT: [[A:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 undef, i64 1, i64 2, i64 undef>)
|
||||
; CHECK-NEXT: ret <4 x double> [[A]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 undef, i32 0, i32 3, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x double> [[TMP1]]
|
||||
;
|
||||
%a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 undef, i64 1, i64 2, i64 undef>)
|
||||
ret <4 x double> %a
|
||||
|
Loading…
Reference in New Issue
Block a user