mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-04 11:27:34 +00:00
Generalize strided store pattern in interleave access pass
Summary: This patch aims to generalize matching of the strided store accesses to more general masks. The more general rule is to have consecutive accesses based on the stride: [x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...] All elements in the masks need not form a contiguous space, there may be gaps. As before, undefs are allowed and filled in with adjacent element loads. Reviewers: HaoLiu, mssimpso Subscribers: mkuper, delena, llvm-commits Differential Revision: https://reviews.llvm.org/D23646 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289573 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
209c7e542d
commit
068dd02393
@ -162,12 +162,17 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Check if the mask is RE-interleave mask for an interleaved store.
|
||||
/// \brief Check if the mask can be used in an interleaved store.
|
||||
//
|
||||
/// It checks for a more general pattern than the RE-interleave mask.
|
||||
/// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...>
|
||||
/// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35>
|
||||
/// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
|
||||
/// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5>
|
||||
///
|
||||
/// I.e. <0, NumSubElts, ... , NumSubElts*(Factor - 1), 1, NumSubElts + 1, ...>
|
||||
///
|
||||
/// E.g. The RE-interleave mask (Factor = 2) could be:
|
||||
/// <0, 4, 1, 5, 2, 6, 3, 7>
|
||||
/// The particular case of an RE-interleave mask is:
|
||||
/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
|
||||
/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
|
||||
static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
|
||||
unsigned MaxFactor) {
|
||||
unsigned NumElts = Mask.size();
|
||||
@ -179,21 +184,72 @@ static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
|
||||
if (NumElts % Factor)
|
||||
continue;
|
||||
|
||||
unsigned NumSubElts = NumElts / Factor;
|
||||
if (!isPowerOf2_32(NumSubElts))
|
||||
unsigned LaneLen = NumElts / Factor;
|
||||
if (!isPowerOf2_32(LaneLen))
|
||||
continue;
|
||||
|
||||
// Check whether each element matchs the RE-interleaved rule. Ignore undef
|
||||
// elements.
|
||||
unsigned i = 0;
|
||||
for (; i < NumElts; i++)
|
||||
if (Mask[i] >= 0 &&
|
||||
static_cast<unsigned>(Mask[i]) !=
|
||||
(i % Factor) * NumSubElts + i / Factor)
|
||||
// Check whether each element matches the general interleaved rule.
|
||||
// Ignore undef elements, as long as the defined elements match the rule.
|
||||
// Outer loop processes all factors (x, y, z in the above example)
|
||||
unsigned I = 0, J;
|
||||
for (; I < Factor; I++) {
|
||||
unsigned SavedLaneValue;
|
||||
unsigned SavedNoUndefs = 0;
|
||||
|
||||
// Inner loop processes consecutive accesses (x, x+1... in the example)
|
||||
for (J = 0; J < LaneLen - 1; J++) {
|
||||
// Lane computes x's position in the Mask
|
||||
unsigned Lane = J * Factor + I;
|
||||
unsigned NextLane = Lane + Factor;
|
||||
int LaneValue = Mask[Lane];
|
||||
int NextLaneValue = Mask[NextLane];
|
||||
|
||||
// If both are defined, values must be sequential
|
||||
if (LaneValue >= 0 && NextLaneValue >= 0 &&
|
||||
LaneValue + 1 != NextLaneValue)
|
||||
break;
|
||||
|
||||
// If the next value is undef, save the current one as reference
|
||||
if (LaneValue >= 0 && NextLaneValue < 0) {
|
||||
SavedLaneValue = LaneValue;
|
||||
SavedNoUndefs = 1;
|
||||
}
|
||||
|
||||
// Undefs are allowed, but defined elements must still be consecutive:
|
||||
// i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
|
||||
// Verify this by storing the last non-undef followed by an undef
|
||||
// Check that following non-undef masks are incremented with the
|
||||
// corresponding distance.
|
||||
if (SavedNoUndefs > 0 && LaneValue < 0) {
|
||||
SavedNoUndefs++;
|
||||
if (NextLaneValue >= 0 &&
|
||||
SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (J < LaneLen - 1)
|
||||
break;
|
||||
|
||||
// Find a RE-interleaved mask of current factor.
|
||||
if (i == NumElts)
|
||||
int StartMask = 0;
|
||||
if (Mask[I] >= 0) {
|
||||
// Check that the start of the I range (J=0) is greater than 0
|
||||
StartMask = Mask[I];
|
||||
} else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
|
||||
// StartMask defined by the last value in lane
|
||||
StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
|
||||
} else if (SavedNoUndefs > 0) {
|
||||
// StartMask defined by some non-zero value in the j loop
|
||||
StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
|
||||
}
|
||||
// else StartMask remains set to 0, i.e. all elements are undefs
|
||||
|
||||
if (StartMask < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
// Found an interleaved mask of current factor.
|
||||
if (I == Factor)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -7281,7 +7281,7 @@ static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
|
||||
///
|
||||
/// E.g. Lower an interleaved store (Factor = 3):
|
||||
/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
|
||||
/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
|
||||
/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
|
||||
/// store <12 x i32> %i.vec, <12 x i32>* %ptr
|
||||
///
|
||||
/// Into:
|
||||
@ -7292,6 +7292,17 @@ static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
|
||||
///
|
||||
/// Note that the new shufflevectors will be removed and we'll only generate one
|
||||
/// st3 instruction in CodeGen.
|
||||
///
|
||||
/// Example for a more general valid mask (Factor 3). Lower:
|
||||
/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
|
||||
/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
|
||||
/// store <12 x i32> %i.vec, <12 x i32>* %ptr
|
||||
///
|
||||
/// Into:
|
||||
/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
|
||||
/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
|
||||
/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
|
||||
/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
|
||||
bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
|
||||
ShuffleVectorInst *SVI,
|
||||
unsigned Factor) const {
|
||||
@ -7302,9 +7313,9 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
|
||||
assert(VecTy->getVectorNumElements() % Factor == 0 &&
|
||||
"Invalid interleaved store");
|
||||
|
||||
unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
|
||||
unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
|
||||
Type *EltTy = VecTy->getVectorElementType();
|
||||
VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
|
||||
VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
|
||||
|
||||
const DataLayout &DL = SI->getModule()->getDataLayout();
|
||||
unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
|
||||
@ -7329,7 +7340,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
|
||||
Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
|
||||
Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
|
||||
|
||||
SubVecTy = VectorType::get(IntTy, NumSubElts);
|
||||
SubVecTy = VectorType::get(IntTy, LaneLen);
|
||||
}
|
||||
|
||||
Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
|
||||
@ -7343,9 +7354,28 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
|
||||
SmallVector<Value *, 5> Ops;
|
||||
|
||||
// Split the shufflevector operands into sub vectors for the new stN call.
|
||||
for (unsigned i = 0; i < Factor; i++)
|
||||
Ops.push_back(Builder.CreateShuffleVector(
|
||||
Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
|
||||
auto Mask = SVI->getShuffleMask();
|
||||
for (unsigned i = 0; i < Factor; i++) {
|
||||
if (Mask[i] >= 0) {
|
||||
Ops.push_back(Builder.CreateShuffleVector(
|
||||
Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
|
||||
} else {
|
||||
unsigned StartMask = 0;
|
||||
for (unsigned j = 1; j < LaneLen; j++) {
|
||||
if (Mask[j*Factor + i] >= 0) {
|
||||
StartMask = Mask[j*Factor + i] - j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Note: If all elements in a chunk are undefs, StartMask=0!
|
||||
// Note: Filling undef gaps with random elements is ok, since
|
||||
// those elements were being written anyway (with undefs).
|
||||
// In the case of all undefs we're defaulting to using elems from 0
|
||||
// Note: StartMask cannot be negative, it's checked in isReInterleaveMask
|
||||
Ops.push_back(Builder.CreateShuffleVector(
|
||||
Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
|
||||
}
|
||||
}
|
||||
|
||||
Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy));
|
||||
Builder.CreateCall(StNFunc, Ops);
|
||||
|
@ -13191,6 +13191,17 @@ static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
|
||||
///
|
||||
/// Note that the new shufflevectors will be removed and we'll only generate one
|
||||
/// vst3 instruction in CodeGen.
|
||||
///
|
||||
/// Example for a more general valid mask (Factor 3). Lower:
|
||||
/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
|
||||
/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
|
||||
/// store <12 x i32> %i.vec, <12 x i32>* %ptr
|
||||
///
|
||||
/// Into:
|
||||
/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
|
||||
/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
|
||||
/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
|
||||
/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
|
||||
bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
|
||||
ShuffleVectorInst *SVI,
|
||||
unsigned Factor) const {
|
||||
@ -13201,9 +13212,9 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
|
||||
assert(VecTy->getVectorNumElements() % Factor == 0 &&
|
||||
"Invalid interleaved store");
|
||||
|
||||
unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
|
||||
unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
|
||||
Type *EltTy = VecTy->getVectorElementType();
|
||||
VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
|
||||
VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
|
||||
|
||||
const DataLayout &DL = SI->getModule()->getDataLayout();
|
||||
unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
|
||||
@ -13230,7 +13241,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
|
||||
Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
|
||||
Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
|
||||
|
||||
SubVecTy = VectorType::get(IntTy, NumSubElts);
|
||||
SubVecTy = VectorType::get(IntTy, LaneLen);
|
||||
}
|
||||
|
||||
static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
|
||||
@ -13246,9 +13257,28 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
|
||||
SI->getModule(), StoreInts[Factor - 2], Tys);
|
||||
|
||||
// Split the shufflevector operands into sub vectors for the new vstN call.
|
||||
for (unsigned i = 0; i < Factor; i++)
|
||||
Ops.push_back(Builder.CreateShuffleVector(
|
||||
Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
|
||||
auto Mask = SVI->getShuffleMask();
|
||||
for (unsigned i = 0; i < Factor; i++) {
|
||||
if (Mask[i] >= 0) {
|
||||
Ops.push_back(Builder.CreateShuffleVector(
|
||||
Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
|
||||
} else {
|
||||
unsigned StartMask = 0;
|
||||
for (unsigned j = 1; j < LaneLen; j++) {
|
||||
if (Mask[j*Factor + i] >= 0) {
|
||||
StartMask = Mask[j*Factor + i] - j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Note: If all elements in a chunk are undefs, StartMask=0!
|
||||
// Note: Filling undef gaps with random elements is ok, since
|
||||
// those elements were being written anyway (with undefs).
|
||||
// In the case of all undefs we're defaulting to using elems from 0
|
||||
// Note: StartMask cannot be negative, it's checked in isReInterleaveMask
|
||||
Ops.push_back(Builder.CreateShuffleVector(
|
||||
Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
|
||||
}
|
||||
}
|
||||
|
||||
Ops.push_back(Builder.getInt32(SI->getAlignment()));
|
||||
Builder.CreateCall(VstNFunc, Ops);
|
||||
|
@ -280,3 +280,114 @@ define i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
|
||||
%3 = extractelement <8 x i32> %1, i32 2
|
||||
ret i32 %3
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor4:
|
||||
; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
|
||||
; NONEON-LABEL: store_general_mask_factor4:
|
||||
; NONEON-NOT: st4
|
||||
define void @store_general_mask_factor4(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <8 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
|
||||
store <8 x i32> %i.vec, <8 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor4_undefbeg:
|
||||
; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
|
||||
; NONEON-LABEL: store_general_mask_factor4_undefbeg:
|
||||
; NONEON-NOT: st4
|
||||
define void @store_general_mask_factor4_undefbeg(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <8 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
|
||||
store <8 x i32> %i.vec, <8 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor4_undefend:
|
||||
; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
|
||||
; NONEON-LABEL: store_general_mask_factor4_undefend:
|
||||
; NONEON-NOT: st4
|
||||
define void @store_general_mask_factor4_undefend(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <8 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
|
||||
store <8 x i32> %i.vec, <8 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor4_undefmid:
|
||||
; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
|
||||
; NONEON-LABEL: store_general_mask_factor4_undefmid:
|
||||
; NONEON-NOT: st4
|
||||
define void @store_general_mask_factor4_undefmid(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <8 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
|
||||
store <8 x i32> %i.vec, <8 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor4_undefmulti:
|
||||
; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
|
||||
; NONEON-LABEL: store_general_mask_factor4_undefmulti:
|
||||
; NONEON-NOT: st4
|
||||
define void @store_general_mask_factor4_undefmulti(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <8 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
|
||||
store <8 x i32> %i.vec, <8 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor3:
|
||||
; NEON: st3 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
|
||||
; NONEON-LABEL: store_general_mask_factor3:
|
||||
; NONEON-NOT: st3
|
||||
define void @store_general_mask_factor3(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <12 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
|
||||
store <12 x i32> %i.vec, <12 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor3_undefmultimid:
|
||||
; NEON: st3 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
|
||||
; NONEON-LABEL: store_general_mask_factor3_undefmultimid:
|
||||
; NONEON-NOT: st3
|
||||
define void @store_general_mask_factor3_undefmultimid(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <12 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
|
||||
store <12 x i32> %i.vec, <12 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor3_undef_fail:
|
||||
; NEON-NOT: st3
|
||||
; NONEON-LABEL: store_general_mask_factor3_undef_fail:
|
||||
; NONEON-NOT: st3
|
||||
define void @store_general_mask_factor3_undef_fail(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <12 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
|
||||
store <12 x i32> %i.vec, <12 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor3_undeflane:
|
||||
; NEON: st3 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
|
||||
; NONEON-LABEL: store_general_mask_factor3_undeflane:
|
||||
; NONEON-NOT: st3
|
||||
define void @store_general_mask_factor3_undeflane(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <12 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
|
||||
store <12 x i32> %i.vec, <12 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor3_negativestart:
|
||||
; NEON-NOT: st3
|
||||
; NONEON-LABEL: store_general_mask_factor3_negativestart:
|
||||
; NONEON-NOT: st3
|
||||
define void @store_general_mask_factor3_negativestart(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <12 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
|
||||
store <12 x i32> %i.vec, <12 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -316,3 +316,147 @@ define i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
|
||||
%3 = extractelement <8 x i32> %1, i32 2
|
||||
ret i32 %3
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor4:
|
||||
; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
||||
; NONEON-LABEL: store_general_mask_factor4:
|
||||
; NONEON-NOT: vst4.32
|
||||
define void @store_general_mask_factor4(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <8 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
|
||||
store <8 x i32> %i.vec, <8 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor4_undefbeg:
|
||||
; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
||||
; NONEON-LABEL: store_general_mask_factor4_undefbeg:
|
||||
; NONEON-NOT: vst4.32
|
||||
define void @store_general_mask_factor4_undefbeg(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <8 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
|
||||
store <8 x i32> %i.vec, <8 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor4_undefend:
|
||||
; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
||||
; NONEON-LABEL: store_general_mask_factor4_undefend:
|
||||
; NONEON-NOT: vst4.32
|
||||
define void @store_general_mask_factor4_undefend(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <8 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
|
||||
store <8 x i32> %i.vec, <8 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor4_undefmid:
|
||||
; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
||||
; NONEON-LABEL: store_general_mask_factor4_undefmid:
|
||||
; NONEON-NOT: vst4.32
|
||||
define void @store_general_mask_factor4_undefmid(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <8 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
|
||||
store <8 x i32> %i.vec, <8 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor4_undefmulti:
|
||||
; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
||||
; NONEON-LABEL: store_general_mask_factor4_undefmulti:
|
||||
; NONEON-NOT: vst4.32
|
||||
define void @store_general_mask_factor4_undefmulti(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <8 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
|
||||
store <8 x i32> %i.vec, <8 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor3:
|
||||
; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
||||
; NONEON-LABEL: store_general_mask_factor3:
|
||||
; NONEON-NOT: vst3.32
|
||||
define void @store_general_mask_factor3(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <12 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
|
||||
store <12 x i32> %i.vec, <12 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor3_undefmultimid:
|
||||
; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
||||
; NONEON-LABEL: store_general_mask_factor3_undefmultimid:
|
||||
; NONEON-NOT: vst3.32
|
||||
define void @store_general_mask_factor3_undefmultimid(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <12 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
|
||||
store <12 x i32> %i.vec, <12 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor3_undef_fail:
|
||||
; NEON-NOT: vst3.32
|
||||
; NONEON-LABEL: store_general_mask_factor3_undef_fail:
|
||||
; NONEON-NOT: vst3.32
|
||||
define void @store_general_mask_factor3_undef_fail(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <12 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
|
||||
store <12 x i32> %i.vec, <12 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor3_undeflane:
|
||||
; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
||||
; NONEON-LABEL: store_general_mask_factor3_undeflane:
|
||||
; NONEON-NOT: vst3.32
|
||||
define void @store_general_mask_factor3_undeflane(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <12 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
|
||||
store <12 x i32> %i.vec, <12 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor3_endstart_fail:
|
||||
; NEON-NOT: vst3.32
|
||||
; NONEON-LABEL: store_general_mask_factor3_endstart_fail:
|
||||
; NONEON-NOT: vst3.32
|
||||
define void @store_general_mask_factor3_endstart_fail(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <12 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
|
||||
store <12 x i32> %i.vec, <12 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor3_endstart_pass:
|
||||
; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
||||
; NONEON-LABEL: store_general_mask_factor3_endstart_pass:
|
||||
; NONEON-NOT: vst3.32
|
||||
define void @store_general_mask_factor3_endstart_pass(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <12 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
|
||||
store <12 x i32> %i.vec, <12 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor3_midstart_fail:
|
||||
; NEON-NOT: vst3.32
|
||||
; NONEON-LABEL: store_general_mask_factor3_midstart_fail:
|
||||
; NONEON-NOT: vst3.32
|
||||
define void @store_general_mask_factor3_midstart_fail(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <12 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 0, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
|
||||
store <12 x i32> %i.vec, <12 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; NEON-LABEL: store_general_mask_factor3_midstart_pass:
|
||||
; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
||||
; NONEON-LABEL: store_general_mask_factor3_midstart_pass:
|
||||
; NONEON-NOT: vst3.32
|
||||
define void @store_general_mask_factor3_midstart_pass(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
|
||||
%base = bitcast i32* %ptr to <12 x i32>*
|
||||
%i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 1, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
|
||||
store <12 x i32> %i.vec, <12 x i32>* %base, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user