mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-08 21:10:35 +00:00
[RuntimeUnroll] Add heuristic for unrolling multi-exit loop
Add a profitability heuristic to enable runtime unrolling of multi-exit loop: There can be atmost two unique exit blocks for the loop and the second exit block should be a deoptimizing block. Also, there can be one other exiting block other than the latch exiting block. The reason for the latter is so that we limit the number of branches in the unrolled code to being at most the unroll factor. Deoptimizing blocks are rarely taken so these additional number of branches created due to the unrolling are predictable, since one of their target is the deopt block. Reviewers: apilipenko, reames, evstupac, mkuper Subscribers: llvm-commits Reviewed by: reames Differential Revision: https://reviews.llvm.org/D35380 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313363 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a50c5eba3e
commit
3c71a6c431
@ -487,9 +487,41 @@ static bool canProfitablyUnrollMultiExitLoop(
|
||||
PreserveLCSSA, UseEpilogRemainder) &&
|
||||
"Should be safe to unroll before checking profitability!");
|
||||
#endif
|
||||
|
||||
// Priority goes to UnrollRuntimeMultiExit if it's supplied.
|
||||
return UnrollRuntimeMultiExit.getNumOccurrences() ? UnrollRuntimeMultiExit
|
||||
: false;
|
||||
if (UnrollRuntimeMultiExit.getNumOccurrences())
|
||||
return UnrollRuntimeMultiExit;
|
||||
|
||||
// The main pain point with multi-exit loop unrolling is that once unrolled,
|
||||
// we will not be able to merge all blocks into a straight line code.
|
||||
// There are branches within the unrolled loop that go to the OtherExits.
|
||||
// The second point is the increase in code size, but this is true
|
||||
// irrespective of multiple exits.
|
||||
|
||||
// Note: Both the heuristics below are coarse grained. We are essentially
|
||||
// enabling unrolling of loops that have a single side exit other than the
|
||||
// normal LatchExit (i.e. exiting into a deoptimize block).
|
||||
// The heuristics considered are:
|
||||
// 1. low number of branches in the unrolled version.
|
||||
// 2. high predictability of these extra branches.
|
||||
// We avoid unrolling loops that have more than two exiting blocks. This
|
||||
// limits the total number of branches in the unrolled loop to be atmost
|
||||
// the unroll factor (since one of the exiting blocks is the latch block).
|
||||
SmallVector<BasicBlock*, 4> ExitingBlocks;
|
||||
L->getExitingBlocks(ExitingBlocks);
|
||||
if (ExitingBlocks.size() > 2)
|
||||
return false;
|
||||
|
||||
// The second heuristic is that L has one exit other than the latchexit and
|
||||
// that exit is a deoptimize block. We know that deoptimize blocks are rarely
|
||||
// taken, which also implies the branch leading to the deoptimize block is
|
||||
// highly predictable.
|
||||
return (OtherExits.size() == 1 &&
|
||||
OtherExits[0]->getTerminatingDeoptimizeCall());
|
||||
// TODO: These can be fine-tuned further to consider code size or deopt states
|
||||
// that are captured by the deoptimize exit block.
|
||||
// Also, we can extend this to support more cases, if we actually
|
||||
// know of kinds of multiexit loops that would benefit from unrolling.
|
||||
}
|
||||
|
||||
/// Insert code in the prolog/epilog code when unrolling a loop with a
|
||||
|
94
test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
Normal file
94
test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
Normal file
@ -0,0 +1,94 @@
|
||||
; RUN: opt < %s -loop-unroll -unroll-runtime=true -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s
|
||||
; RUN: opt < %s -loop-unroll -unroll-runtime=true -verify-dom-info -unroll-runtime-multi-exit=false -verify-loop-info -S | FileCheck %s -check-prefix=NOUNROLL
|
||||
|
||||
; this tests when unrolling multiple exit loop occurs by default (i.e. without specifying -unroll-runtime-multi-exit)
|
||||
|
||||
; the second exit block is a deopt block. The loop has one exiting block other than the latch.
|
||||
define i32 @test1(i32* nocapture %a, i64 %n) {
|
||||
; CHECK-LABEL: test1(
|
||||
; CHECK-LABEL: header.epil:
|
||||
; CHECK-NEXT: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
|
||||
; CHECK-LABEL: otherexit.loopexit:
|
||||
; CHECK-NEXT: %sum.02.lcssa.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ],
|
||||
; CHECK-NEXT: br label %otherexit
|
||||
; CHECK-LABEL: otherexit.loopexit3:
|
||||
; CHECK-NEXT: br label %otherexit
|
||||
; CHECK-LABEL: otherexit:
|
||||
; CHECK-NEXT: %sum.02.lcssa = phi i32 [ %sum.02.lcssa.ph, %otherexit.loopexit ], [ %sum.02.epil, %otherexit.loopexit3 ]
|
||||
; CHECK-NEXT: %rval = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %sum.02.lcssa) ]
|
||||
; CHECK-NEXT: ret i32 %rval
|
||||
; CHECK-LABEL: latch.7:
|
||||
; CHECK: add i64 %indvars.iv, 8
|
||||
|
||||
; NOUNROLL: test1(
|
||||
; NOUNROLL-NOT: .epil
|
||||
; NOUNROLL-NOT: .prol
|
||||
; NOUNROLL: otherexit:
|
||||
; NOUNROLL-NEXT: %sum.02.lcssa = phi i32 [ %sum.02, %for.exiting_block ]
|
||||
; NOUNROLL-NEXT: %rval = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %sum.02.lcssa) ]
|
||||
entry:
|
||||
br label %header
|
||||
|
||||
header:
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
|
||||
%sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
|
||||
br label %for.exiting_block
|
||||
|
||||
for.exiting_block:
|
||||
%cmp = icmp eq i64 %n, 42
|
||||
br i1 %cmp, label %otherexit, label %latch
|
||||
|
||||
latch:
|
||||
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %0, %sum.02
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, %n
|
||||
br i1 %exitcond, label %latchexit, label %header
|
||||
|
||||
latchexit: ; preds = %latch
|
||||
%sum.0.lcssa = phi i32 [ %add, %latch ]
|
||||
ret i32 %sum.0.lcssa
|
||||
|
||||
otherexit:
|
||||
%rval = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %sum.02) ]
|
||||
ret i32 %rval
|
||||
}
|
||||
|
||||
; the exit block is not a deopt block.
|
||||
define i32 @test2(i32* nocapture %a, i64 %n) {
|
||||
; CHECK-LABEL: test2(
|
||||
; CHECK-NOT: .epil
|
||||
; CHECK-NOT: .prol
|
||||
; CHECK-LABEL: otherexit:
|
||||
; CHECK-NEXT: ret i32 %sum.02
|
||||
|
||||
entry:
|
||||
br label %header
|
||||
|
||||
header:
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
|
||||
%sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
|
||||
br label %for.exiting_block
|
||||
|
||||
for.exiting_block:
|
||||
%cmp = icmp eq i64 %n, 42
|
||||
br i1 %cmp, label %otherexit, label %latch
|
||||
|
||||
latch:
|
||||
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %0, %sum.02
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, %n
|
||||
br i1 %exitcond, label %latchexit, label %header
|
||||
|
||||
latchexit: ; preds = %latch
|
||||
%sum.0.lcssa = phi i32 [ %add, %latch ]
|
||||
ret i32 %sum.0.lcssa
|
||||
|
||||
otherexit:
|
||||
%rval = phi i32 [%sum.02, %for.exiting_block ]
|
||||
ret i32 %rval
|
||||
}
|
||||
declare i32 @llvm.experimental.deoptimize.i32(...)
|
Loading…
Reference in New Issue
Block a user