mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-26 05:00:26 +00:00
SLP Vectorizer: Fix a bug in the code that does CSE on the generated gather sequences.
Make sure that we don't replace and RAUW two sequences if one does not dominate the other. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184674 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
787ad64b98
commit
722b0a4d29
@ -127,8 +127,9 @@ public:
|
||||
static const int MAX_COST = INT_MIN;
|
||||
|
||||
FuncSLP(Function *Func, ScalarEvolution *Se, DataLayout *Dl,
|
||||
TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li) :
|
||||
F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li),
|
||||
TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li,
|
||||
DominatorTree *Dt) :
|
||||
F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt),
|
||||
Builder(Se->getContext()) {
|
||||
for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) {
|
||||
BasicBlock *BB = it;
|
||||
@ -255,6 +256,7 @@ public:
|
||||
TargetTransformInfo *TTI;
|
||||
AliasAnalysis *AA;
|
||||
LoopInfo *LI;
|
||||
DominatorTree *DT;
|
||||
/// Instruction builder to construct the vectorized tree.
|
||||
IRBuilder<> Builder;
|
||||
};
|
||||
@ -1197,7 +1199,8 @@ void FuncSLP::optimizeGatherSequence() {
|
||||
// visited instructions.
|
||||
for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
|
||||
ve = Visited.end(); v != ve; ++v) {
|
||||
if (Insert->isIdenticalTo(*v)) {
|
||||
if (Insert->isIdenticalTo(*v) &&
|
||||
DT->dominates((*v)->getParent(), Insert->getParent())) {
|
||||
Insert->replaceAllUsesWith(*v);
|
||||
break;
|
||||
}
|
||||
@ -1224,6 +1227,7 @@ struct SLPVectorizer : public FunctionPass {
|
||||
TargetTransformInfo *TTI;
|
||||
AliasAnalysis *AA;
|
||||
LoopInfo *LI;
|
||||
DominatorTree *DT;
|
||||
|
||||
virtual bool runOnFunction(Function &F) {
|
||||
SE = &getAnalysis<ScalarEvolution>();
|
||||
@ -1231,6 +1235,7 @@ struct SLPVectorizer : public FunctionPass {
|
||||
TTI = &getAnalysis<TargetTransformInfo>();
|
||||
AA = &getAnalysis<AliasAnalysis>();
|
||||
LI = &getAnalysis<LoopInfo>();
|
||||
DT = &getAnalysis<DominatorTree>();
|
||||
|
||||
StoreRefs.clear();
|
||||
bool Changed = false;
|
||||
@ -1244,7 +1249,7 @@ struct SLPVectorizer : public FunctionPass {
|
||||
|
||||
// Use the bollom up slp vectorizer to construct chains that start with
|
||||
// he store instructions.
|
||||
FuncSLP R(&F, SE, DL, TTI, AA, LI);
|
||||
FuncSLP R(&F, SE, DL, TTI, AA, LI, DT);
|
||||
|
||||
for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
|
||||
BasicBlock *BB = it;
|
||||
@ -1274,6 +1279,7 @@ struct SLPVectorizer : public FunctionPass {
|
||||
AU.addRequired<AliasAnalysis>();
|
||||
AU.addRequired<TargetTransformInfo>();
|
||||
AU.addRequired<LoopInfo>();
|
||||
AU.addRequired<DominatorTree>();
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -83,3 +83,54 @@ entry:
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
; int test2(double *G, int k) {
|
||||
; if (k) {
|
||||
; G[0] = 1+G[5]*4;
|
||||
; G[1] = 6+G[6]*3;
|
||||
; } else {
|
||||
; G[2] = 7+G[5]*4;
|
||||
; G[3] = 8+G[6]*3;
|
||||
; }
|
||||
; }
|
||||
|
||||
; We can't merge the gather sequences because one does not dominate the other.
|
||||
; CHECK: test2
|
||||
; CHECK: insertelement
|
||||
; CHECK: insertelement
|
||||
; CHECK: insertelement
|
||||
; CHECK: insertelement
|
||||
; CHECK: ret
|
||||
define i32 @test2(double* nocapture %G, i32 %k) {
|
||||
%1 = icmp eq i32 %k, 0
|
||||
%2 = getelementptr inbounds double* %G, i64 5
|
||||
%3 = load double* %2, align 8
|
||||
%4 = fmul double %3, 4.000000e+00
|
||||
br i1 %1, label %12, label %5
|
||||
|
||||
; <label>:5 ; preds = %0
|
||||
%6 = fadd double %4, 1.000000e+00
|
||||
store double %6, double* %G, align 8
|
||||
%7 = getelementptr inbounds double* %G, i64 6
|
||||
%8 = load double* %7, align 8
|
||||
%9 = fmul double %8, 3.000000e+00
|
||||
%10 = fadd double %9, 6.000000e+00
|
||||
%11 = getelementptr inbounds double* %G, i64 1
|
||||
store double %10, double* %11, align 8
|
||||
br label %20
|
||||
|
||||
; <label>:12 ; preds = %0
|
||||
%13 = fadd double %4, 7.000000e+00
|
||||
%14 = getelementptr inbounds double* %G, i64 2
|
||||
store double %13, double* %14, align 8
|
||||
%15 = getelementptr inbounds double* %G, i64 6
|
||||
%16 = load double* %15, align 8
|
||||
%17 = fmul double %16, 3.000000e+00
|
||||
%18 = fadd double %17, 8.000000e+00
|
||||
%19 = getelementptr inbounds double* %G, i64 3
|
||||
store double %18, double* %19, align 8
|
||||
br label %20
|
||||
|
||||
; <label>:20 ; preds = %12, %5
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user