mirror of
https://github.com/RPCSX/llvm.git
synced 2025-04-18 08:00:02 +00:00
The patch turns on epilogue unroll for loops with constant recurency start.
Summary: Set unroll remainder to epilog if a loop contains a phi with constant parameter: loop: pn = phi [Const, PreHeader], [pn.next, Latch] ... Reviewer: hfinkel Differential Revision: http://reviews.llvm.org/D27004 From: Evgeny Stupachenko <evstupac@gmail.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296770 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d541a8113c
commit
c40a2f9b2a
@ -216,6 +216,45 @@ const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
|
||||
}
|
||||
}
|
||||
|
||||
/// The function chooses which type of unroll (epilog or prolog) is more
|
||||
/// profitabale.
|
||||
/// Epilog unroll is more profitable when there is PHI that starts from
|
||||
/// constant. In this case epilog will leave PHI start from constant,
|
||||
/// but prolog will convert it to non-constant.
|
||||
///
|
||||
/// loop:
|
||||
/// PN = PHI [I, Latch], [CI, PreHeader]
|
||||
/// I = foo(PN)
|
||||
/// ...
|
||||
///
|
||||
/// Epilog unroll case.
|
||||
/// loop:
|
||||
/// PN = PHI [I2, Latch], [CI, PreHeader]
|
||||
/// I1 = foo(PN)
|
||||
/// I2 = foo(I1)
|
||||
/// ...
|
||||
/// Prolog unroll case.
|
||||
/// NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
|
||||
/// loop:
|
||||
/// PN = PHI [I2, Latch], [NewPN, PreHeader]
|
||||
/// I1 = foo(PN)
|
||||
/// I2 = foo(I1)
|
||||
/// ...
|
||||
///
|
||||
static bool isEpilogProfitable(Loop *L) {
|
||||
BasicBlock *PreHeader = L->getLoopPreheader();
|
||||
BasicBlock *Header = L->getHeader();
|
||||
assert(PreHeader && Header);
|
||||
for (Instruction &BBI : *Header) {
|
||||
PHINode *PN = dyn_cast<PHINode>(&BBI);
|
||||
if (!PN)
|
||||
break;
|
||||
if (isa<ConstantInt>(PN->getIncomingValueForBlock(PreHeader)))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
|
||||
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
|
||||
/// can only fail when the loop's latch block is not terminated by a conditional
|
||||
@ -359,9 +398,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
|
||||
"convergent operation.");
|
||||
});
|
||||
|
||||
bool EpilogProfitability =
|
||||
UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
|
||||
: isEpilogProfitable(L);
|
||||
|
||||
if (RuntimeTripCount && TripMultiple % Count != 0 &&
|
||||
!UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
|
||||
UnrollRuntimeEpilog, LI, SE, DT,
|
||||
EpilogProfitability, LI, SE, DT,
|
||||
PreserveLCSSA)) {
|
||||
if (Force)
|
||||
RuntimeTripCount = false;
|
||||
|
@ -138,11 +138,11 @@ l0.0.latch:
|
||||
; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 2 containing: %l0.0<header>
|
||||
; CHECK-CHILDREN-NOT: LoopUnrollPass
|
||||
;
|
||||
; Revisit the children of the outer loop that are part of the prologue.
|
||||
; Revisit the children of the outer loop that are part of the epilogue.
|
||||
;
|
||||
; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0.prol<header>
|
||||
; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0.epil<header>
|
||||
; CHECK-NOT: LoopUnrollPass
|
||||
; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1.prol<header>
|
||||
; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1.epil<header>
|
||||
; CHECK-NOT: LoopUnrollPass
|
||||
l0.latch:
|
||||
br label %l0
|
||||
|
@ -14,9 +14,6 @@ entry:
|
||||
%cmp1 = icmp eq i3 %n, 0
|
||||
br i1 %cmp1, label %for.end, label %for.body
|
||||
|
||||
; UNROLL-16-NOT: for.body.prol:
|
||||
; UNROLL-4: for.body.prol:
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
; UNROLL-16-LABEL: for.body:
|
||||
; UNROLL-4-LABEL: for.body:
|
||||
@ -42,6 +39,10 @@ for.body: ; preds = %for.body, %entry
|
||||
|
||||
; UNROLL-16-LABEL: for.end
|
||||
; UNROLL-4-LABEL: for.end
|
||||
|
||||
; UNROLL-16-NOT: for.body.epil:
|
||||
; UNROLL-4: for.body.epil:
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
%sum.0.lcssa = phi i3 [ 0, %entry ], [ %add, %for.body ]
|
||||
ret i3 %sum.0.lcssa
|
||||
|
@ -3,12 +3,12 @@
|
||||
@known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
|
||||
|
||||
; CHECK-LABEL: @bar_prof
|
||||
; CHECK: loop.prol:
|
||||
; CHECK: loop:
|
||||
; CHECK: %mul = mul
|
||||
; CHECK: %mul.1 = mul
|
||||
; CHECK: %mul.2 = mul
|
||||
; CHECK: %mul.3 = mul
|
||||
; CHECK: loop.epil:
|
||||
define i32 @bar_prof(i32* noalias nocapture readonly %src, i64 %c) !prof !1 {
|
||||
entry:
|
||||
br label %loop
|
||||
@ -32,7 +32,7 @@ loop.end:
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @bar_prof_flat
|
||||
; CHECK-NOT: loop.prol
|
||||
; CHECK-NOT: loop.epil
|
||||
define i32 @bar_prof_flat(i32* noalias nocapture readonly %src, i64 %c) !prof !1 {
|
||||
entry:
|
||||
br label %loop
|
||||
|
@ -171,10 +171,6 @@ for.end: ; preds = %for.body, %entry
|
||||
; should be duplicated (original and 4x unrolled).
|
||||
;
|
||||
; CHECK-LABEL: @runtime_loop_with_count4(
|
||||
; CHECK: for.body.prol:
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body
|
||||
; CHECK: store
|
||||
; CHECK: store
|
||||
@ -182,6 +178,10 @@ for.end: ; preds = %for.body, %entry
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body.epil:
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
|
||||
entry:
|
||||
%cmp3 = icmp sgt i32 %b, 0
|
||||
@ -287,10 +287,6 @@ for.end: ; preds = %for.body
|
||||
; (original and 8x).
|
||||
;
|
||||
; CHECK-LABEL: @runtime_loop_with_enable(
|
||||
; CHECK: for.body.prol:
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body:
|
||||
; CHECK: store i32
|
||||
; CHECK: store i32
|
||||
@ -302,6 +298,10 @@ for.end: ; preds = %for.body
|
||||
; CHECK: store i32
|
||||
; CHECK-NOT: store i32
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body.epil:
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
|
||||
entry:
|
||||
%cmp3 = icmp sgt i32 %b, 0
|
||||
@ -328,16 +328,16 @@ for.end: ; preds = %for.body, %entry
|
||||
; should be duplicated (original and 3x unrolled).
|
||||
;
|
||||
; CHECK-LABEL: @runtime_loop_with_count3(
|
||||
; CHECK: for.body.prol:
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body
|
||||
; CHECK: store
|
||||
; CHECK: store
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
; CHECK: for.body.epil:
|
||||
; CHECK: store
|
||||
; CHECK-NOT: store
|
||||
; CHECK: br i1
|
||||
define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
|
||||
entry:
|
||||
%cmp3 = icmp sgt i32 %b, 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user