diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index 1e88bd54458..4433be000d7 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -26,7 +26,7 @@ namespace llvm { /// Return true if the given expression is safe to expand in the sense that /// all materialized values are safe to speculate. - bool isSafeToExpand(const SCEV *S); + bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE); /// SCEVExpander - This class uses information about analyze scalars to /// rewrite expressions in canonical form. diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index f373a80681a..86a557b55f7 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1233,6 +1233,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Re-apply any non-loop-dominating scale. if (PostLoopScale) { + assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); Result = InsertNoopCastOfTo(Result, IntTy); Result = Builder.CreateMul(Result, expandCodeFor(PostLoopScale, IntTy)); @@ -1704,28 +1705,43 @@ namespace { // Currently, we only allow division by a nonzero constant here. If this is // inadequate, we could easily allow division by SCEVUnknown by using // ValueTracking to check isKnownNonZero(). +// +// We cannot generally expand recurrences unless the step dominates the loop +// header. The expander handles the special case of affine recurrences by +// scaling the recurrence outside the loop, but this technique isn't generally +// applicable. Expanding a nested recurrence outside a loop requires computing +// binomial coefficients. This could be done, but the recurrence has to be in a +// perfectly reduced form, which can't be guaranteed. struct SCEVFindUnsafe { + ScalarEvolution &SE; bool IsUnsafe; - SCEVFindUnsafe(): IsUnsafe(false) {} + SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {} bool follow(const SCEV *S) { - const SCEVUDivExpr *D = dyn_cast(S); - if (!D) - return true; - const SCEVConstant *SC = dyn_cast(D->getRHS()); - if (SC && !SC->getValue()->isZero()) - return true; - IsUnsafe = true; - return false; + if (const SCEVUDivExpr *D = dyn_cast(S)) { + const SCEVConstant *SC = dyn_cast(D->getRHS()); + if (!SC || SC->getValue()->isZero()) { + IsUnsafe = true; + return false; + } + } + if (const SCEVAddRecExpr *AR = dyn_cast(S)) { + const SCEV *Step = AR->getStepRecurrence(SE); + if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) { + IsUnsafe = true; + return false; + } + } + return true; } bool isDone() const { return IsUnsafe; } }; } namespace llvm { -bool isSafeToExpand(const SCEV *S) { - SCEVFindUnsafe Search; +bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) { + SCEVFindUnsafe Search(SE); visitAll(S, Search); return !Search.IsUnsafe; } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index cfd8db0f6cc..235aaaa6f80 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -532,7 +532,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { // and varies predictably *inside* the loop. Evaluate the value it // contains when the loop exits, if possible. const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); - if (!SE->isLoopInvariant(ExitValue, L) || !isSafeToExpand(ExitValue)) + if (!SE->isLoopInvariant(ExitValue, L) || + !isSafeToExpand(ExitValue, *SE)) continue; // Computing the value outside of the loop brings no benefit if : diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 14cb9793925..eff5268c448 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1170,6 +1170,13 @@ public: /// may be used. bool AllFixupsOutsideLoop; + /// RigidFormula is set to true to guarantee that this use will be associated + /// with a single formula--the one that initially matched. Some SCEV + /// expressions cannot be expanded. This allows LSR to consider the registers + /// used by those expressions without the need to expand them later after + /// changing the formula. + bool RigidFormula; + /// WidestFixupType - This records the widest use type for any fixup using /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different /// max fixup widths to be equivalent, because the narrower one may be relying @@ -1188,6 +1195,7 @@ public: MinOffset(INT64_MAX), MaxOffset(INT64_MIN), AllFixupsOutsideLoop(true), + RigidFormula(false), WidestFixupType(0) {} bool HasFormulaWithSameRegs(const Formula &F) const; @@ -1214,6 +1222,9 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. bool LSRUse::InsertFormula(const Formula &F) { + if (!Formulae.empty() && RigidFormula) + return false; + SmallVector Key = F.BaseRegs; if (F.ScaledReg) Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for uniquifying. @@ -1433,7 +1444,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, } case LSRUse::ICmpZero: // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg. - // Therefore, return 0 in case F.Scale == -1. + // Therefore, return 0 in case F.Scale == -1. return F.Scale != -1; case LSRUse::Basic: @@ -2943,7 +2954,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // x == y --> x - y == 0 const SCEV *N = SE.getSCEV(NV); - if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) { + if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) { // S is normalized, so normalize N before folding it into S // to keep the result normalized. N = TransformForPostIncUse(Normalize, N, CI, 0, @@ -2986,6 +2997,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { /// and loop-computable portions. void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { + // Mark uses whose expressions cannot be expanded. + if (!isSafeToExpand(S, SE)) + LU.RigidFormula = true; + Formula F; F.InitialMatch(S, L, SE); bool Inserted = InsertFormula(LU, LUIdx, F); @@ -4353,6 +4368,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF, SCEVExpander &Rewriter, SmallVectorImpl &DeadInsts) const { const LSRUse &LU = Uses[LF.LUIdx]; + if (LU.RigidFormula) + return LF.OperandValToReplace; // Determine an input position which will be dominated by the operands and // which will dominate the result. diff --git a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll new file mode 100644 index 00000000000..255cf41a817 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll @@ -0,0 +1,42 @@ +; RUN: opt -loop-reduce -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx" + +; PR15470: LSR miscompile. The test2 function should return '1'. +; +; SCEV expander cannot expand quadratic recurrences outside of the +; loop. This recurrence depends on %sub.us, so can't be expanded. +; +; CHECK-LABEL: @test2 +; CHECK-LABEL: test2.loop: +; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ] +; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216 +; +; CHECK=LABEL: for.end: +; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us +; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us +; CHECK: %f = ashr i32 %sext.us, 24 +; CHECK: ret i32 %f +define i32 @test2() { +entry: + br label %test2.loop + +test2.loop: + %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ] + %inc11.us = add nsw i32 %inc1115.us, 1 + %cmp.us = icmp slt i32 %inc11.us, 2 + br i1 %cmp.us, label %test2.loop, label %for.end + +for.end: + %tobool.us = icmp eq i32 %inc1115.us, 0 + %sub.us = select i1 %tobool.us, i32 0, i32 0 + %mul.us = shl i32 %inc1115.us, 24 + %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us + %sext.us = mul i32 %mul.us, %sub.cond.us + %f = ashr i32 %sext.us, 24 + br label %exit + +exit: + ret i32 %f +}