diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 125f2cbc516..190fc5a2dc4 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4315,7 +4315,33 @@ BasicBlock::iterator LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, const SmallVectorImpl &Inputs) const { + Instruction *Tentative = &*IP; for (;;) { + bool AllDominate = true; + Instruction *BetterPos = nullptr; + // Don't bother attempting to insert before a catchswitch, their basic block + // cannot have other non-PHI instructions. + if (isa(Tentative)) + return IP; + + for (Instruction *Inst : Inputs) { + if (Inst == Tentative || !DT.dominates(Inst, Tentative)) { + AllDominate = false; + break; + } + // Attempt to find an insert position in the middle of the block, + // instead of at the end, so that it can be used for other expansions. + if (Tentative->getParent() == Inst->getParent() && + (!BetterPos || !DT.dominates(Inst, BetterPos))) + BetterPos = &*std::next(BasicBlock::iterator(Inst)); + } + if (!AllDominate) + break; + if (BetterPos) + IP = BetterPos->getIterator(); + else + IP = Tentative->getIterator(); + const Loop *IPLoop = LI.getLoopFor(IP->getParent()); unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; @@ -4334,31 +4360,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, break; } - bool AllDominate = true; - Instruction *BetterPos = nullptr; - Instruction *Tentative = IDom->getTerminator(); - // Don't bother attempting to insert before a catchswitch, their basic block - // cannot have other non-PHI instructions. - if (isa(Tentative)) - return IP; - - for (Instruction *Inst : Inputs) { - if (Inst == Tentative || !DT.dominates(Inst, Tentative)) { - AllDominate = false; - break; - } - // Attempt to find an insert position in the middle of the block, - // instead of at the end, so that it can be used for other expansions. - if (IDom == Inst->getParent() && - (!BetterPos || !DT.dominates(Inst, BetterPos))) - BetterPos = &*std::next(BasicBlock::iterator(Inst)); - } - if (!AllDominate) - break; - if (BetterPos) - IP = BetterPos->getIterator(); - else - IP = Tentative->getIterator(); + Tentative = IDom->getTerminator(); } return IP; diff --git a/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll b/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll new file mode 100644 index 00000000000..a2dfe81b108 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll @@ -0,0 +1,34 @@ +; RUN: llc -mtriple=arm64-unknown-unknown -print-lsr-output < %s 2>&1 | FileCheck %s + +declare void @foo(i64) + +; Verify that redundant adds aren't inserted by LSR. +; CHECK-LABEL: @bar( +define void @bar(double* %A) { +entry: + br label %while.cond + +while.cond: +; CHECK-LABEL: while.cond: +; CHECK: add i64 %lsr.iv, 1 +; CHECK-NOT: add i64 %lsr.iv, 1 +; CHECK-LABEL: land.rhs: + %indvars.iv28 = phi i64 [ %indvars.iv.next29, %land.rhs ], [ 50, %entry ] + %cmp = icmp sgt i64 %indvars.iv28, 0 + br i1 %cmp, label %land.rhs, label %while.end + +land.rhs: + %indvars.iv.next29 = add nsw i64 %indvars.iv28, -1 + %arrayidx = getelementptr inbounds double, double* %A, i64 %indvars.iv.next29 + %Aload = load double, double* %arrayidx, align 8 + %cmp1 = fcmp oeq double %Aload, 0.000000e+00 + br i1 %cmp1, label %while.cond, label %if.end + +while.end: + %indvars.iv28.lcssa = phi i64 [ %indvars.iv28, %while.cond ] + tail call void @foo(i64 %indvars.iv28.lcssa) + br label %if.end + +if.end: + ret void +} diff --git a/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll b/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll new file mode 100644 index 00000000000..81a6b07fe95 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -loop-reduce -S + +; Test that SCEV insertpoint's don't get corrupted and cause an +; invalid instruction to be inserted in a block other than its parent. +; See http://reviews.llvm.org/D20703 for context. +define void @test() { +entry: + %bf.load = load i32, i32* null, align 4 + %bf.clear = lshr i32 %bf.load, 1 + %div = and i32 %bf.clear, 134217727 + %sub = add nsw i32 %div, -1 + %0 = zext i32 %sub to i64 + br label %while.cond + +while.cond: ; preds = %cond.end, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %cond.end ], [ 0, %entry ] + %cmp = icmp eq i64 %indvars.iv, %0 + br i1 %cmp, label %cleanup16, label %while.body + +while.body: ; preds = %while.cond + %1 = trunc i64 %indvars.iv to i32 + %mul = shl i32 %1, 1 + %add = add nuw i32 %mul, 2 + %cmp3 = icmp ult i32 %add, 0 + br i1 %cmp3, label %if.end, label %if.then + +if.then: ; preds = %while.body + unreachable + +if.end: ; preds = %while.body + br i1 false, label %cond.end, label %cond.true + +cond.true: ; preds = %if.end + br label %cond.end + +cond.end: ; preds = %cond.true, %if.end + %add7 = add i32 %1, 1 + %cmp12 = icmp ugt i32 %add7, %sub + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br i1 %cmp12, label %if.then13, label %while.cond + +if.then13: ; preds = %cond.end + unreachable + +cleanup16: ; preds = %while.cond + ret void +}