mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-23 12:24:34 +00:00
Allow loop unrolling to get known trip counts from ScalarEvolution.
SCEV unrolling can unroll loops with arbitrary induction variables. It is a prerequisite for -disable-iv-rewrite performance. It is also easily handles loops of arbitrary structure including multiple exits and is generally more robust. This is under a temporary option to avoid affecting default behavior for the next couple of weeks. It is needed so that I can checkin unit tests for updateUnloop. llvm-svn: 137384
This commit is contained in:
parent
911b7966fe
commit
5a5a5ebe68
@ -507,7 +507,8 @@ namespace llvm {
|
||||
/// FoundLHS, and FoundRHS is true.
|
||||
bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
|
||||
const SCEV *LHS, const SCEV *RHS,
|
||||
const SCEV *FoundLHS, const SCEV *FoundRHS);
|
||||
const SCEV *FoundLHS,
|
||||
const SCEV *FoundRHS);
|
||||
|
||||
/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
|
||||
/// in the header of its containing loop, we know the loop executes a
|
||||
@ -710,6 +711,18 @@ namespace llvm {
|
||||
bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
|
||||
const SCEV *LHS, const SCEV *RHS);
|
||||
|
||||
/// getSmallConstantTripCount - Returns the maximum trip count of this loop
|
||||
/// as a normal unsigned value, if possible. Returns 0 if the trip count is
|
||||
/// unknown or not constant.
|
||||
unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitBlock);
|
||||
|
||||
/// getSmallConstantTripMultiple - Returns the largest constant divisor of
|
||||
/// the trip count of this loop as a normal unsigned value, if
|
||||
/// possible. This means that the actual trip count is always a multiple of
|
||||
/// the returned value (don't forget the trip count could very well be zero
|
||||
/// as well!).
|
||||
unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitBlock);
|
||||
|
||||
// getExitCount - Get the expression for the number of loop iterations for
|
||||
// which this loop is guaranteed not to exit via ExitingBlock. Otherwise
|
||||
// return SCEVCouldNotCompute.
|
||||
|
@ -3830,6 +3830,63 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
|
||||
// Iteration Count Computation Code
|
||||
//
|
||||
|
||||
/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
|
||||
/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
|
||||
/// or not constant. Will also return 0 if the maximum trip count is very large
|
||||
/// (>= 2^32)
|
||||
unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
|
||||
BasicBlock *ExitBlock) {
|
||||
const SCEVConstant *ExitCount =
|
||||
dyn_cast<SCEVConstant>(getExitCount(L, ExitBlock));
|
||||
if (!ExitCount)
|
||||
return 0;
|
||||
|
||||
ConstantInt *ExitConst = ExitCount->getValue();
|
||||
|
||||
// Guard against huge trip counts.
|
||||
if (ExitConst->getValue().getActiveBits() > 32)
|
||||
return 0;
|
||||
|
||||
// In case of integer overflow, this returns 0, which is correct.
|
||||
return ((unsigned)ExitConst->getZExtValue()) + 1;
|
||||
}
|
||||
|
||||
/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
|
||||
/// trip count of this loop as a normal unsigned value, if possible. This
|
||||
/// means that the actual trip count is always a multiple of the returned
|
||||
/// value (don't forget the trip count could very well be zero as well!).
|
||||
///
|
||||
/// Returns 1 if the trip count is unknown or not guaranteed to be the
|
||||
/// multiple of a constant (which is also the case if the trip count is simply
|
||||
/// constant, use getSmallConstantTripCount for that case), Will also return 1
|
||||
/// if the trip count is very large (>= 2^32).
|
||||
unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
|
||||
BasicBlock *ExitBlock) {
|
||||
const SCEV *ExitCount = getExitCount(L, ExitBlock);
|
||||
if (ExitCount == getCouldNotCompute())
|
||||
return 1;
|
||||
|
||||
// Get the trip count from the BE count by adding 1.
|
||||
const SCEV *TCMul = getAddExpr(ExitCount,
|
||||
getConstant(ExitCount->getType(), 1));
|
||||
// FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
|
||||
// to factor simple cases.
|
||||
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
|
||||
TCMul = Mul->getOperand(0);
|
||||
|
||||
const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
|
||||
if (!MulC)
|
||||
return 1;
|
||||
|
||||
ConstantInt *Result = MulC->getValue();
|
||||
|
||||
// Guard against huge trip counts.
|
||||
if (!Result || Result->getValue().getActiveBits() > 32)
|
||||
return 1;
|
||||
|
||||
return (unsigned)Result->getZExtValue();
|
||||
}
|
||||
|
||||
// getExitCount - Get the expression for the number of loop iterations for which
|
||||
// this loop is guaranteed not to exit via ExitintBlock. Otherwise return
|
||||
// SCEVCouldNotCompute.
|
||||
|
@ -39,6 +39,11 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
|
||||
cl::desc("Allows loops to be partially unrolled until "
|
||||
"-unroll-threshold loop size is reached."));
|
||||
|
||||
// Temporary flag to be made default shortly.
|
||||
static cl::opt<bool>
|
||||
UnrollWithSCEV("unroll-scev", cl::init(false), cl::Hidden,
|
||||
cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling"));
|
||||
|
||||
namespace {
|
||||
class LoopUnroll : public LoopPass {
|
||||
public:
|
||||
@ -121,6 +126,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
|
||||
|
||||
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
|
||||
LoopInfo *LI = &getAnalysis<LoopInfo>();
|
||||
ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
|
||||
|
||||
BasicBlock *Header = L->getHeader();
|
||||
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
|
||||
@ -136,14 +142,24 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
|
||||
Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
|
||||
Threshold = OptSizeUnrollThreshold;
|
||||
|
||||
// Find trip count
|
||||
unsigned TripCount = L->getSmallConstantTripCount();
|
||||
|
||||
// Find trip multiple if count is not available
|
||||
// Find trip count and trip multiple if count is not available
|
||||
unsigned TripCount = 0;
|
||||
unsigned TripMultiple = 1;
|
||||
if (TripCount == 0)
|
||||
TripMultiple = L->getSmallConstantTripMultiple();
|
||||
|
||||
if (UnrollWithSCEV) {
|
||||
// Find "latch trip count". UnrollLoop assumes that control cannot exit
|
||||
// via the loop latch on any iteration prior to TripCount. The loop may exit
|
||||
// early via an earlier branch.
|
||||
BasicBlock *LatchBlock = L->getLoopLatch();
|
||||
if (LatchBlock) {
|
||||
TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
|
||||
TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
|
||||
}
|
||||
}
|
||||
else {
|
||||
TripCount = L->getSmallConstantTripCount();
|
||||
if (TripCount == 0)
|
||||
TripMultiple = L->getSmallConstantTripMultiple();
|
||||
}
|
||||
// Automatically select an unroll count.
|
||||
unsigned Count = CurrentCount;
|
||||
if (Count == 0) {
|
||||
|
172
test/Transforms/LoopUnroll/scevunroll.ll
Normal file
172
test/Transforms/LoopUnroll/scevunroll.ll
Normal file
@ -0,0 +1,172 @@
|
||||
; RUN: opt < %s -S -indvars -loop-unroll -verify-loop-info -unroll-scev | FileCheck %s
|
||||
;
|
||||
; Unit tests for loop unrolling using ScalarEvolution to compute trip counts.
|
||||
;
|
||||
; Indvars is run first to generate an "old" SCEV result. Some unit
|
||||
; tests may check that SCEV is properly invalidated between passes.
|
||||
|
||||
; Completely unroll loops without a canonical IV.
|
||||
;
|
||||
; CHECK: @sansCanonical
|
||||
; CHECK-NOT: phi
|
||||
; CHECK-NOT: icmp
|
||||
; CHECK: ret
|
||||
define i32 @sansCanonical(i32* %base) nounwind {
|
||||
entry:
|
||||
br label %while.body
|
||||
|
||||
while.body:
|
||||
%iv = phi i64 [ 10, %entry ], [ %iv.next, %while.body ]
|
||||
%sum = phi i32 [ 0, %entry ], [ %sum.next, %while.body ]
|
||||
%iv.next = add i64 %iv, -1
|
||||
%adr = getelementptr inbounds i32* %base, i64 %iv.next
|
||||
%tmp = load i32* %adr, align 8
|
||||
%sum.next = add i32 %sum, %tmp
|
||||
%iv.narrow = trunc i64 %iv.next to i32
|
||||
%cmp.i65 = icmp sgt i32 %iv.narrow, 0
|
||||
br i1 %cmp.i65, label %while.body, label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %sum
|
||||
}
|
||||
|
||||
; SCEV unrolling properly handles loops with multiple exits. In this
|
||||
; case, the computed trip count based on a canonical IV is *not* for a
|
||||
; latch block. Canonical unrolling incorrectly unrolls it, but SCEV
|
||||
; unrolling does not.
|
||||
;
|
||||
; CHECK: @earlyLoopTest
|
||||
; CHECK: tail:
|
||||
; CHECK-NOT: br
|
||||
; CHECK: br i1 %cmp2, label %loop, label %exit2
|
||||
define i64 @earlyLoopTest(i64* %base) nounwind {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i64 [ 0, %entry ], [ %inc, %tail ]
|
||||
%s = phi i64 [ 0, %entry ], [ %s.next, %tail ]
|
||||
%adr = getelementptr i64* %base, i64 %iv
|
||||
%val = load i64* %adr
|
||||
%s.next = add i64 %s, %val
|
||||
%inc = add i64 %iv, 1
|
||||
%cmp = icmp ne i64 %inc, 4
|
||||
br i1 %cmp, label %tail, label %exit1
|
||||
|
||||
tail:
|
||||
%cmp2 = icmp ne i64 %val, 0
|
||||
br i1 %cmp2, label %loop, label %exit2
|
||||
|
||||
exit1:
|
||||
ret i64 %s
|
||||
|
||||
exit2:
|
||||
ret i64 %s.next
|
||||
}
|
||||
|
||||
; SCEV properly unrolls multi-exit loops.
|
||||
;
|
||||
; CHECK: @multiExit
|
||||
; CHECK: getelementptr i32* %base, i64 10
|
||||
; CHECK-NEXT: load i32*
|
||||
; CHECK: br i1 false, label %l2.10, label %exit1
|
||||
; CHECK: l2.10:
|
||||
; CHECK-NOT: br
|
||||
; CHECK: ret i32
|
||||
define i32 @multiExit(i32* %base) nounwind {
|
||||
entry:
|
||||
br label %l1
|
||||
l1:
|
||||
%iv1 = phi i32 [ 0, %entry ], [ %inc1, %l2 ]
|
||||
%iv2 = phi i32 [ 0, %entry ], [ %inc2, %l2 ]
|
||||
%inc1 = add i32 %iv1, 1
|
||||
%inc2 = add i32 %iv2, 1
|
||||
%adr = getelementptr i32* %base, i32 %iv1
|
||||
%val = load i32* %adr
|
||||
%cmp1 = icmp slt i32 %iv1, 5
|
||||
br i1 %cmp1, label %l2, label %exit1
|
||||
l2:
|
||||
%cmp2 = icmp slt i32 %iv2, 10
|
||||
br i1 %cmp2, label %l1, label %exit2
|
||||
exit1:
|
||||
ret i32 1
|
||||
exit2:
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
|
||||
; SCEV should not unroll a multi-exit loops unless the latch block has
|
||||
; a known trip count, regardless of the early exit trip counts. The
|
||||
; LoopUnroll utility uses this assumption to optimize the latch
|
||||
; block's branch.
|
||||
;
|
||||
; CHECK: @multiExit
|
||||
; CHECK: l3:
|
||||
; CHECK-NOT: br
|
||||
; CHECK: br i1 %cmp3, label %l1, label %exit3
|
||||
define i32 @multiExitIncomplete(i32* %base) nounwind {
|
||||
entry:
|
||||
br label %l1
|
||||
l1:
|
||||
%iv1 = phi i32 [ 0, %entry ], [ %inc1, %l3 ]
|
||||
%iv2 = phi i32 [ 0, %entry ], [ %inc2, %l3 ]
|
||||
%inc1 = add i32 %iv1, 1
|
||||
%inc2 = add i32 %iv2, 1
|
||||
%adr = getelementptr i32* %base, i32 %iv1
|
||||
%val = load i32* %adr
|
||||
%cmp1 = icmp slt i32 %iv1, 5
|
||||
br i1 %cmp1, label %l2, label %exit1
|
||||
l2:
|
||||
%cmp2 = icmp slt i32 %iv2, 10
|
||||
br i1 %cmp2, label %l3, label %exit2
|
||||
l3:
|
||||
%cmp3 = icmp ne i32 %val, 0
|
||||
br i1 %cmp3, label %l1, label %exit3
|
||||
|
||||
exit1:
|
||||
ret i32 1
|
||||
exit2:
|
||||
ret i32 2
|
||||
exit3:
|
||||
ret i32 3
|
||||
}
|
||||
|
||||
; When loop unroll merges a loop exit with one of its parent loop's
|
||||
; exits, SCEV must forget its ExitNotTaken info.
|
||||
;
|
||||
; CHECK: @nestedUnroll
|
||||
; CHECK-NOT: br i1
|
||||
; CHECK: for.body87:
|
||||
define void @nestedUnroll() nounwind {
|
||||
entry:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
br i1 false, label %for.inc, label %for.body38.preheader
|
||||
|
||||
for.body38.preheader:
|
||||
br label %for.body38
|
||||
|
||||
for.body38:
|
||||
%i.113 = phi i32 [ %inc76, %for.inc74 ], [ 0, %for.body38.preheader ]
|
||||
%mul48 = mul nsw i32 %i.113, 6
|
||||
br label %for.body43
|
||||
|
||||
for.body43:
|
||||
%j.011 = phi i32 [ 0, %for.body38 ], [ %inc72, %for.body43 ]
|
||||
%add49 = add nsw i32 %j.011, %mul48
|
||||
%sh_prom50 = zext i32 %add49 to i64
|
||||
%inc72 = add nsw i32 %j.011, 1
|
||||
br i1 false, label %for.body43, label %for.inc74
|
||||
|
||||
for.inc74:
|
||||
%inc76 = add nsw i32 %i.113, 1
|
||||
br i1 false, label %for.body38, label %for.body87.preheader
|
||||
|
||||
for.body87.preheader:
|
||||
br label %for.body87
|
||||
|
||||
for.body87:
|
||||
br label %for.body87
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user