mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-25 12:50:00 +00:00
Fix SCEV overly optimistic back edge taken count for multi-exit loops.
Fixes PR11375: Different results for 'clang++ huh.cpp'... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144746 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f56c60b571
commit
79f0bfcc20
@ -4153,13 +4153,19 @@ void ScalarEvolution::forgetValue(Value *V) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// getExact - Get the exact loop backedge taken count considering all loop
|
/// getExact - Get the exact loop backedge taken count considering all loop
|
||||||
/// exits. If all exits are computable, this is the minimum computed count.
|
/// exits. A computable result can only be return for loops with a single exit.
|
||||||
|
/// Returning the minimum taken count among all exits is incorrect because one
|
||||||
|
/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that
|
||||||
|
/// the limit of each loop test is never skipped. This is a valid assumption as
|
||||||
|
/// long as the loop exits via that test. For precise results, it is the
|
||||||
|
/// caller's responsibility to specify the relevant loop exit using
|
||||||
|
/// getExact(ExitingBlock, SE).
|
||||||
const SCEV *
|
const SCEV *
|
||||||
ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
|
ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
|
||||||
// If any exits were not computable, the loop is not computable.
|
// If any exits were not computable, the loop is not computable.
|
||||||
if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
|
if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
|
||||||
|
|
||||||
// We need at least one computable exit.
|
// We need exactly one computable exit.
|
||||||
if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
|
if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
|
||||||
assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
|
assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
|
||||||
|
|
||||||
@ -4171,8 +4177,8 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
|
|||||||
|
|
||||||
if (!BECount)
|
if (!BECount)
|
||||||
BECount = ENT->ExactNotTaken;
|
BECount = ENT->ExactNotTaken;
|
||||||
else
|
else if (BECount != ENT->ExactNotTaken)
|
||||||
BECount = SE->getUMinFromMismatchedTypes(BECount, ENT->ExactNotTaken);
|
return SE->getCouldNotCompute();
|
||||||
}
|
}
|
||||||
assert(BECount && "Invalid not taken count for loop exit");
|
assert(BECount && "Invalid not taken count for loop exit");
|
||||||
return BECount;
|
return BECount;
|
||||||
@ -4253,8 +4259,15 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
|
|||||||
|
|
||||||
if (MaxBECount == getCouldNotCompute())
|
if (MaxBECount == getCouldNotCompute())
|
||||||
MaxBECount = EL.Max;
|
MaxBECount = EL.Max;
|
||||||
else if (EL.Max != getCouldNotCompute())
|
else if (EL.Max != getCouldNotCompute()) {
|
||||||
MaxBECount = getUMinFromMismatchedTypes(MaxBECount, EL.Max);
|
// We cannot take the "min" MaxBECount, because non-unit stride loops may
|
||||||
|
// skip some loop tests. Taking the max over the exits is sufficiently
|
||||||
|
// conservative. TODO: We could do better taking into consideration
|
||||||
|
// that (1) the loop has unit stride (2) the last loop test is
|
||||||
|
// less-than/greater-than (3) any loop test is less-than/greater-than AND
|
||||||
|
// falls-through some constant times less then the other tests.
|
||||||
|
MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
|
return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
|
||||||
@ -4920,7 +4933,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
|
|||||||
// the loop symbolically to determine when the condition gets a value of
|
// the loop symbolically to determine when the condition gets a value of
|
||||||
// "ExitWhen".
|
// "ExitWhen".
|
||||||
|
|
||||||
unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
|
unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
|
||||||
for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
|
for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
|
||||||
ConstantInt *CondVal =
|
ConstantInt *CondVal =
|
||||||
dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L,
|
dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L,
|
||||||
@ -5507,10 +5520,10 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
|
|||||||
// behavior. Loops must exhibit defined behavior until a wrapped value is
|
// behavior. Loops must exhibit defined behavior until a wrapped value is
|
||||||
// actually used. So the trip count computed by udiv could be smaller than the
|
// actually used. So the trip count computed by udiv could be smaller than the
|
||||||
// number of well-defined iterations.
|
// number of well-defined iterations.
|
||||||
if (AddRec->getNoWrapFlags(SCEV::FlagNW))
|
if (AddRec->getNoWrapFlags(SCEV::FlagNW)) {
|
||||||
// FIXME: We really want an "isexact" bit for udiv.
|
// FIXME: We really want an "isexact" bit for udiv.
|
||||||
return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
|
return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
|
||||||
|
}
|
||||||
// Then, try to solve the above equation provided that Start is constant.
|
// Then, try to solve the above equation provided that Start is constant.
|
||||||
if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
|
if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
|
||||||
return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
|
return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
|
||||||
|
40
test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll
Normal file
40
test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
; RUN: opt < %s -indvars -S | FileCheck %s
|
||||||
|
;
|
||||||
|
; Prior to the fix for PR11375, indvars would replace %firstIV with a
|
||||||
|
; loop-invariant gep computed in the preheader. This was incorrect
|
||||||
|
; because it was based on the minimum "ExitNotTaken" count. If the
|
||||||
|
; final loop test is skipped (odd number of elements) then the early
|
||||||
|
; exit would be taken and the loop invariant value would be incorrect.
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
|
target triple = "x86_64-apple-darwin"
|
||||||
|
|
||||||
|
; CHECK: if.end:
|
||||||
|
; CHECK: phi i32* [ %first.lcssa, %early.exit ]
|
||||||
|
define i32 @test(i32* %first, i32* %last) uwtable ssp {
|
||||||
|
entry:
|
||||||
|
br i1 undef, label %if.end, label %if.then
|
||||||
|
|
||||||
|
if.then: ; preds = %entry
|
||||||
|
br i1 undef, label %if.end, label %do.body
|
||||||
|
|
||||||
|
do.body: ; preds = %if.else, %if.then
|
||||||
|
%firstIV = phi i32* [ %incdec.ptr2, %if.else ], [ %first, %if.then ]
|
||||||
|
%incdec.ptr1 = getelementptr inbounds i32* %firstIV, i64 1
|
||||||
|
%cmp1 = icmp eq i32* %incdec.ptr1, %last
|
||||||
|
br i1 %cmp1, label %early.exit, label %if.else
|
||||||
|
|
||||||
|
if.else: ; preds = %do.body
|
||||||
|
%incdec.ptr2 = getelementptr inbounds i32* %firstIV, i64 2
|
||||||
|
%cmp2 = icmp eq i32* %incdec.ptr2, %last
|
||||||
|
br i1 %cmp2, label %if.end, label %do.body
|
||||||
|
|
||||||
|
early.exit:
|
||||||
|
%first.lcssa = phi i32* [ %firstIV, %do.body ]
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.end:
|
||||||
|
%tmp = phi i32* [ %first.lcssa, %early.exit ], [ %first, %if.then ], [ %first, %entry ], [ undef, %if.else ]
|
||||||
|
%val = load i32* %tmp
|
||||||
|
ret i32 %val
|
||||||
|
}
|
@ -1,8 +1,14 @@
|
|||||||
; RUN: opt < %s -indvars -S \
|
; RUN: opt < %s -indvars -S \
|
||||||
; RUN: | grep {%b.1 = phi i32 \\\[ 2, %bb \\\], \\\[ 1, %bb2 \\\]}
|
; RUN: | grep {%b.1 = phi i32 \\\[ 2, %bb \\\], \\\[ 1, %bb2 \\\]}
|
||||||
|
;
|
||||||
; This loop has multiple exits, and the value of %b1 depends on which
|
; This loop has multiple exits, and the value of %b1 depends on which
|
||||||
; exit is taken. Indvars should correctly compute the exit values.
|
; exit is taken. Indvars should correctly compute the exit values.
|
||||||
|
;
|
||||||
|
; XFAIL: *
|
||||||
|
; Indvars does not currently replace loop invariant values unless all
|
||||||
|
; loop exits have the same exit value. We could handle some cases,
|
||||||
|
; such as this, by making getSCEVAtScope() sensitive to a particular
|
||||||
|
; loop exit. See PR11388.
|
||||||
|
|
||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||||
target triple = "x86_64-pc-linux-gnu"
|
target triple = "x86_64-pc-linux-gnu"
|
||||||
|
@ -2,8 +2,13 @@
|
|||||||
; RUN: grep {\[%\]tmp7 = icmp eq i8 -28, -28} %t
|
; RUN: grep {\[%\]tmp7 = icmp eq i8 -28, -28} %t
|
||||||
; RUN: grep {\[%\]tmp8 = icmp eq i8 63, 63} %t
|
; RUN: grep {\[%\]tmp8 = icmp eq i8 63, 63} %t
|
||||||
; PR4477
|
; PR4477
|
||||||
|
|
||||||
; Indvars should compute the exit values in loop.
|
; Indvars should compute the exit values in loop.
|
||||||
|
;
|
||||||
|
; XFAIL: *
|
||||||
|
; Indvars does not currently replace loop invariant values unless all
|
||||||
|
; loop exits have the same exit value. We could handle some cases,
|
||||||
|
; such as this, by making getSCEVAtScope() sensitive to a particular
|
||||||
|
; loop exit. See PR11388.
|
||||||
|
|
||||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
|
||||||
target triple = "i386-pc-linux-gnu"
|
target triple = "i386-pc-linux-gnu"
|
||||||
|
Loading…
Reference in New Issue
Block a user