mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-13 23:18:58 +00:00
[SLP] Fix PR23510: Try to find best possible vectorizable stores.
Summary: The analysis of the store sequence goes in straight order - from the first store to the last. Bu the best opportunity for vectorization will happen if we're going to use reverse order - from last store to the first. It may be best because usually users have some initialization part + further processing and this first initialization may confuse SLP vectorizer. Reviewers: RKSimon, hfinkel, mkuper, spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D39606 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317821 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8b45407e72
commit
9daf9c5c89
@ -4316,7 +4316,8 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
|
||||
|
||||
bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
|
||||
BoUpSLP &R) {
|
||||
SetVector<StoreInst *> Heads, Tails;
|
||||
SetVector<StoreInst *> Heads;
|
||||
SmallDenseSet<StoreInst *> Tails;
|
||||
SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
|
||||
|
||||
// We may run into multiple chains that merge into a single chain. We mark the
|
||||
@ -4324,45 +4325,51 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
|
||||
BoUpSLP::ValueSet VectorizedStores;
|
||||
bool Changed = false;
|
||||
|
||||
// Do a quadratic search on all of the given stores and find
|
||||
// Do a quadratic search on all of the given stores in reverse order and find
|
||||
// all of the pairs of stores that follow each other.
|
||||
SmallVector<unsigned, 16> IndexQueue;
|
||||
for (unsigned i = 0, e = Stores.size(); i < e; ++i) {
|
||||
IndexQueue.clear();
|
||||
unsigned E = Stores.size();
|
||||
IndexQueue.resize(E - 1);
|
||||
for (unsigned I = E; I > 0; --I) {
|
||||
unsigned Idx = I - 1;
|
||||
// If a store has multiple consecutive store candidates, search Stores
|
||||
// array according to the sequence: from i+1 to e, then from i-1 to 0.
|
||||
// array according to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ...
|
||||
// This is because usually pairing with immediate succeeding or preceding
|
||||
// candidate create the best chance to find slp vectorization opportunity.
|
||||
unsigned j = 0;
|
||||
for (j = i + 1; j < e; ++j)
|
||||
IndexQueue.push_back(j);
|
||||
for (j = i; j > 0; --j)
|
||||
IndexQueue.push_back(j - 1);
|
||||
unsigned Offset = 1;
|
||||
unsigned Cnt = 0;
|
||||
for (unsigned J = 0; J < E - 1; ++J, ++Offset) {
|
||||
if (Idx >= Offset) {
|
||||
IndexQueue[Cnt] = Idx - Offset;
|
||||
++Cnt;
|
||||
}
|
||||
if (Idx + Offset < E) {
|
||||
IndexQueue[Cnt] = Idx + Offset;
|
||||
++Cnt;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &k : IndexQueue) {
|
||||
if (isConsecutiveAccess(Stores[i], Stores[k], *DL, *SE)) {
|
||||
Tails.insert(Stores[k]);
|
||||
Heads.insert(Stores[i]);
|
||||
ConsecutiveChain[Stores[i]] = Stores[k];
|
||||
for (auto K : IndexQueue) {
|
||||
if (isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE)) {
|
||||
Tails.insert(Stores[Idx]);
|
||||
Heads.insert(Stores[K]);
|
||||
ConsecutiveChain[Stores[K]] = Stores[Idx];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For stores that start but don't end a link in the chain:
|
||||
for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
|
||||
it != e; ++it) {
|
||||
if (Tails.count(*it))
|
||||
for (auto *SI : llvm::reverse(Heads)) {
|
||||
if (Tails.count(SI))
|
||||
continue;
|
||||
|
||||
// We found a store instr that starts a chain. Now follow the chain and try
|
||||
// to vectorize it.
|
||||
BoUpSLP::ValueList Operands;
|
||||
StoreInst *I = *it;
|
||||
StoreInst *I = SI;
|
||||
// Collect the chain into a list.
|
||||
while (Tails.count(I) || Heads.count(I)) {
|
||||
if (VectorizedStores.count(I))
|
||||
break;
|
||||
while ((Tails.count(I) || Heads.count(I)) && !VectorizedStores.count(I)) {
|
||||
Operands.push_back(I);
|
||||
// Move to the next value in the chain.
|
||||
I = ConsecutiveChain[I];
|
||||
|
@ -25,26 +25,21 @@ define void @_Z8DistanceIlLi5EEvPfiPmS0_(float* %p1, i32 %p2, i64* %p3, float* %
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
|
||||
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
|
||||
; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[P3]], align 8
|
||||
; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[TMP2]], 5
|
||||
; CHECK-NEXT: store i64 [[SHR]], i64* [[P3]], align 8
|
||||
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 1
|
||||
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ARRAYIDX4]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = lshr <2 x i64> [[TMP4]], <i64 5, i64 5>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[ARRAYIDX4]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 8
|
||||
; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 3
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[ARRAYIDX8]], align 8
|
||||
; CHECK-NEXT: [[SHR9:%.*]] = lshr i64 [[TMP7]], 5
|
||||
; CHECK-NEXT: store i64 [[SHR9]], i64* [[ARRAYIDX8]], align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
|
||||
; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8
|
||||
; CHECK-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 [[IDX_EXT]]
|
||||
; CHECK-NEXT: [[AND:%.*]] = and i64 [[SHR]], 5
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP6]], 5
|
||||
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[ADD_PTR11]], i64 [[AND]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX13]], align 4
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[P4]], align 4
|
||||
; CHECK-NEXT: [[ADD15:%.*]] = fadd float [[TMP8]], [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX13]], align 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[P4]], align 4
|
||||
; CHECK-NEXT: [[ADD15:%.*]] = fadd float [[TMP7]], [[TMP8]]
|
||||
; CHECK-NEXT: store float [[ADD15]], float* [[P4]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
|
Loading…
Reference in New Issue
Block a user