Allow loop unrolling to get known trip counts from ScalarEvolution.

SCEV unrolling can unroll loops with arbitrary induction variables. It
is a prerequisite for -disable-iv-rewrite performance. It is also
easily handles loops of arbitrary structure including multiple exits
and is generally more robust.

This is under a temporary option to avoid affecting default
behavior for the next couple of weeks. It is needed so that I can
checkin unit tests for updateUnloop.

llvm-svn: 137384
This commit is contained in:
Andrew Trick 2011-08-11 23:36:16 +00:00
parent 911b7966fe
commit 5a5a5ebe68
4 changed files with 266 additions and 8 deletions

View File

@ -507,7 +507,8 @@ namespace llvm {
/// FoundLHS, and FoundRHS is true.
bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS, const SCEV *FoundRHS);
const SCEV *FoundLHS,
const SCEV *FoundRHS);
/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
/// in the header of its containing loop, we know the loop executes a
@ -710,6 +711,18 @@ namespace llvm {
bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS);
/// getSmallConstantTripCount - Returns the maximum trip count of this loop
/// as a normal unsigned value, if possible. Returns 0 if the trip count is
/// unknown or not constant.
unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitBlock);
/// getSmallConstantTripMultiple - Returns the largest constant divisor of
/// the trip count of this loop as a normal unsigned value, if
/// possible. This means that the actual trip count is always a multiple of
/// the returned value (don't forget the trip count could very well be zero
/// as well!).
unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitBlock);
// getExitCount - Get the expression for the number of loop iterations for
// which this loop is guaranteed not to exit via ExitingBlock. Otherwise
// return SCEVCouldNotCompute.

View File

@ -3830,6 +3830,63 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// Iteration Count Computation Code
//
/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
/// or not constant. Will also return 0 if the maximum trip count is very large
/// (>= 2^32)
unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
BasicBlock *ExitBlock) {
const SCEVConstant *ExitCount =
dyn_cast<SCEVConstant>(getExitCount(L, ExitBlock));
if (!ExitCount)
return 0;
ConstantInt *ExitConst = ExitCount->getValue();
// Guard against huge trip counts.
if (ExitConst->getValue().getActiveBits() > 32)
return 0;
// In case of integer overflow, this returns 0, which is correct.
return ((unsigned)ExitConst->getZExtValue()) + 1;
}
/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
/// trip count of this loop as a normal unsigned value, if possible. This
/// means that the actual trip count is always a multiple of the returned
/// value (don't forget the trip count could very well be zero as well!).
///
/// Returns 1 if the trip count is unknown or not guaranteed to be the
/// multiple of a constant (which is also the case if the trip count is simply
/// constant, use getSmallConstantTripCount for that case), Will also return 1
/// if the trip count is very large (>= 2^32).
unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
BasicBlock *ExitBlock) {
const SCEV *ExitCount = getExitCount(L, ExitBlock);
if (ExitCount == getCouldNotCompute())
return 1;
// Get the trip count from the BE count by adding 1.
const SCEV *TCMul = getAddExpr(ExitCount,
getConstant(ExitCount->getType(), 1));
// FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
// to factor simple cases.
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
TCMul = Mul->getOperand(0);
const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
if (!MulC)
return 1;
ConstantInt *Result = MulC->getValue();
// Guard against huge trip counts.
if (!Result || Result->getValue().getActiveBits() > 32)
return 1;
return (unsigned)Result->getZExtValue();
}
// getExitCount - Get the expression for the number of loop iterations for which
// this loop is guaranteed not to exit via ExitintBlock. Otherwise return
// SCEVCouldNotCompute.

View File

@ -39,6 +39,11 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
"-unroll-threshold loop size is reached."));
// Temporary flag to be made default shortly.
static cl::opt<bool>
UnrollWithSCEV("unroll-scev", cl::init(false), cl::Hidden,
cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling"));
namespace {
class LoopUnroll : public LoopPass {
public:
@ -121,6 +126,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
LoopInfo *LI = &getAnalysis<LoopInfo>();
ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
@ -136,14 +142,24 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
Threshold = OptSizeUnrollThreshold;
// Find trip count
unsigned TripCount = L->getSmallConstantTripCount();
// Find trip multiple if count is not available
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
unsigned TripMultiple = 1;
if (TripCount == 0)
TripMultiple = L->getSmallConstantTripMultiple();
if (UnrollWithSCEV) {
// Find "latch trip count". UnrollLoop assumes that control cannot exit
// via the loop latch on any iteration prior to TripCount. The loop may exit
// early via an earlier branch.
BasicBlock *LatchBlock = L->getLoopLatch();
if (LatchBlock) {
TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
}
}
else {
TripCount = L->getSmallConstantTripCount();
if (TripCount == 0)
TripMultiple = L->getSmallConstantTripMultiple();
}
// Automatically select an unroll count.
unsigned Count = CurrentCount;
if (Count == 0) {

View File

@ -0,0 +1,172 @@
; RUN: opt < %s -S -indvars -loop-unroll -verify-loop-info -unroll-scev | FileCheck %s
;
; Unit tests for loop unrolling using ScalarEvolution to compute trip counts.
;
; Indvars is run first to generate an "old" SCEV result. Some unit
; tests may check that SCEV is properly invalidated between passes.
; Completely unroll loops without a canonical IV.
;
; CHECK: @sansCanonical
; CHECK-NOT: phi
; CHECK-NOT: icmp
; CHECK: ret
define i32 @sansCanonical(i32* %base) nounwind {
entry:
br label %while.body
while.body:
%iv = phi i64 [ 10, %entry ], [ %iv.next, %while.body ]
%sum = phi i32 [ 0, %entry ], [ %sum.next, %while.body ]
%iv.next = add i64 %iv, -1
%adr = getelementptr inbounds i32* %base, i64 %iv.next
%tmp = load i32* %adr, align 8
%sum.next = add i32 %sum, %tmp
%iv.narrow = trunc i64 %iv.next to i32
%cmp.i65 = icmp sgt i32 %iv.narrow, 0
br i1 %cmp.i65, label %while.body, label %exit
exit:
ret i32 %sum
}
; SCEV unrolling properly handles loops with multiple exits. In this
; case, the computed trip count based on a canonical IV is *not* for a
; latch block. Canonical unrolling incorrectly unrolls it, but SCEV
; unrolling does not.
;
; CHECK: @earlyLoopTest
; CHECK: tail:
; CHECK-NOT: br
; CHECK: br i1 %cmp2, label %loop, label %exit2
define i64 @earlyLoopTest(i64* %base) nounwind {
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %inc, %tail ]
%s = phi i64 [ 0, %entry ], [ %s.next, %tail ]
%adr = getelementptr i64* %base, i64 %iv
%val = load i64* %adr
%s.next = add i64 %s, %val
%inc = add i64 %iv, 1
%cmp = icmp ne i64 %inc, 4
br i1 %cmp, label %tail, label %exit1
tail:
%cmp2 = icmp ne i64 %val, 0
br i1 %cmp2, label %loop, label %exit2
exit1:
ret i64 %s
exit2:
ret i64 %s.next
}
; SCEV properly unrolls multi-exit loops.
;
; CHECK: @multiExit
; CHECK: getelementptr i32* %base, i64 10
; CHECK-NEXT: load i32*
; CHECK: br i1 false, label %l2.10, label %exit1
; CHECK: l2.10:
; CHECK-NOT: br
; CHECK: ret i32
define i32 @multiExit(i32* %base) nounwind {
entry:
br label %l1
l1:
%iv1 = phi i32 [ 0, %entry ], [ %inc1, %l2 ]
%iv2 = phi i32 [ 0, %entry ], [ %inc2, %l2 ]
%inc1 = add i32 %iv1, 1
%inc2 = add i32 %iv2, 1
%adr = getelementptr i32* %base, i32 %iv1
%val = load i32* %adr
%cmp1 = icmp slt i32 %iv1, 5
br i1 %cmp1, label %l2, label %exit1
l2:
%cmp2 = icmp slt i32 %iv2, 10
br i1 %cmp2, label %l1, label %exit2
exit1:
ret i32 1
exit2:
ret i32 %val
}
; SCEV should not unroll a multi-exit loops unless the latch block has
; a known trip count, regardless of the early exit trip counts. The
; LoopUnroll utility uses this assumption to optimize the latch
; block's branch.
;
; CHECK: @multiExit
; CHECK: l3:
; CHECK-NOT: br
; CHECK: br i1 %cmp3, label %l1, label %exit3
define i32 @multiExitIncomplete(i32* %base) nounwind {
entry:
br label %l1
l1:
%iv1 = phi i32 [ 0, %entry ], [ %inc1, %l3 ]
%iv2 = phi i32 [ 0, %entry ], [ %inc2, %l3 ]
%inc1 = add i32 %iv1, 1
%inc2 = add i32 %iv2, 1
%adr = getelementptr i32* %base, i32 %iv1
%val = load i32* %adr
%cmp1 = icmp slt i32 %iv1, 5
br i1 %cmp1, label %l2, label %exit1
l2:
%cmp2 = icmp slt i32 %iv2, 10
br i1 %cmp2, label %l3, label %exit2
l3:
%cmp3 = icmp ne i32 %val, 0
br i1 %cmp3, label %l1, label %exit3
exit1:
ret i32 1
exit2:
ret i32 2
exit3:
ret i32 3
}
; When loop unroll merges a loop exit with one of its parent loop's
; exits, SCEV must forget its ExitNotTaken info.
;
; CHECK: @nestedUnroll
; CHECK-NOT: br i1
; CHECK: for.body87:
define void @nestedUnroll() nounwind {
entry:
br label %for.inc
for.inc:
br i1 false, label %for.inc, label %for.body38.preheader
for.body38.preheader:
br label %for.body38
for.body38:
%i.113 = phi i32 [ %inc76, %for.inc74 ], [ 0, %for.body38.preheader ]
%mul48 = mul nsw i32 %i.113, 6
br label %for.body43
for.body43:
%j.011 = phi i32 [ 0, %for.body38 ], [ %inc72, %for.body43 ]
%add49 = add nsw i32 %j.011, %mul48
%sh_prom50 = zext i32 %add49 to i64
%inc72 = add nsw i32 %j.011, 1
br i1 false, label %for.body43, label %for.inc74
for.inc74:
%inc76 = add nsw i32 %i.113, 1
br i1 false, label %for.body38, label %for.body87.preheader
for.body87.preheader:
br label %for.body87
for.body87:
br label %for.body87
}