Don't recompute LCSSA after loop-unrolling when possible.

Summary:
Currently we always recompute LCSSA for outer loops after unrolling an
inner loop. That leads to compile time problem when we have big loop
nests, and we can solve it by avoiding unnecessary work. For instance,
if w eonly do partial unrolling, we don't break LCSSA, so we don't need
to rebuild it. Also, if all exits from the inner loop are inside the
enclosing loop, then complete unrolling won't break LCSSA either.

I replaced unconditional LCSSA recomputation with conditional recomputation +
unconditional assert and added several tests, which were failing when I
experimented with it.

Soon I plan to follow up with a similar patch for recalculation of dominators
tree.

Reviewers: hfinkel, dexonsmith, bogner, joker.eph, chandlerc

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D14526

llvm-svn: 253126
This commit is contained in:
Michael Zolotukhin 2015-11-14 05:51:41 +00:00
parent 3093820c02
commit 71a368d115
2 changed files with 130 additions and 1 deletions

View File

@ -221,6 +221,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// Are we eliminating the loop control altogether?
bool CompletelyUnroll = Count == TripCount;
SmallVector<BasicBlock *, 4> ExitBlocks;
L->getExitBlocks(ExitBlocks);
Loop *ParentL = L->getParentLoop();
bool AllExitsAreInsideParentLoop = !ParentL ||
std::all_of(ExitBlocks.begin(), ExitBlocks.end(),
[&](BasicBlock *BB) { return ParentL->contains(BB); });
// We assume a run-time trip count if the compiler cannot
// figure out the loop trip count and the unroll-runtime
@ -554,7 +560,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
while (OuterL->getParentLoop() != LatchLoop)
OuterL = OuterL->getParentLoop();
if (CompletelyUnroll && !AllExitsAreInsideParentLoop)
formLCSSARecursively(*OuterL, *DT, LI, SE);
else
assert(OuterL->isLCSSAForm(*DT) &&
"Loops should be in LCSSA form after loop-unroll.");
}
}

View File

@ -0,0 +1,119 @@
; RUN: opt < %s -loop-unroll -S | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; This test shows how unrolling an inner loop could break LCSSA for an outer
; loop, and there is no cheap way to recover it.
;
; In this case the inner loop, L3, is being unrolled. It only runs one
; iteration, so unrolling basically means replacing
; br i1 true, label %exit, label %L3_header
; with
; br label %exit
;
; However, this change messes up the loops structure: for instance, block
; L3_body no longer belongs to L2. It becomes an exit block for L2, so LCSSA
; phis for definitions in L2 should now be placed there. In particular, we need
; to insert such a definition for %y1.
; CHECK-LABEL: @foo1
define void @foo1() {
entry:
br label %L1_header
L1_header:
br label %L2_header
L2_header:
%y1 = phi i64 [ undef, %L1_header ], [ %x.lcssa, %L2_latch ]
br label %L3_header
L3_header:
%y2 = phi i64 [ 0, %L3_latch ], [ %y1, %L2_header ]
%x = add i64 undef, -1
br i1 true, label %L2_latch, label %L3_body
L2_latch:
%x.lcssa = phi i64 [ %x, %L3_header ]
br label %L2_header
; CHECK: L3_body:
; CHECK-NEXT: %y1.lcssa = phi i64 [ %y1, %L3_header ]
L3_body:
store i64 %y1, i64* undef
br i1 false, label %L3_latch, label %L1_latch
L3_latch:
br i1 true, label %exit, label %L3_header
L1_latch:
%y.lcssa = phi i64 [ %y2, %L3_body ]
br label %L1_header
exit:
ret void
}
; Additional tests for some corner cases.
;
; CHECK-LABEL: @foo2
define void @foo2() {
entry:
br label %L1_header
L1_header:
br label %L2_header
L2_header:
%a = phi i64 [ undef, %L1_header ], [ %dec_us, %L3_header ]
br label %L3_header
L3_header:
%b = phi i64 [ 0, %L3_latch ], [ %a, %L2_header ]
%dec_us = add i64 undef, -1
br i1 true, label %L2_header, label %L3_break_to_L1
; CHECK: L3_break_to_L1:
; CHECK-NEXT: %a.lcssa = phi i64 [ %a, %L3_header ]
L3_break_to_L1:
br i1 false, label %L3_latch, label %L1_latch
L1_latch:
%b_lcssa = phi i64 [ %b, %L3_break_to_L1 ]
br label %L1_header
L3_latch:
br i1 true, label %Exit, label %L3_header
Exit:
ret void
}
; CHECK-LABEL: @foo3
define void @foo3() {
entry:
br label %L1_header
L1_header:
%a = phi i8* [ %b, %L1_latch ], [ null, %entry ]
br i1 undef, label %L2_header, label %L1_latch
L2_header:
br i1 undef, label %L2_latch, label %L1_latch
; CHECK: L2_latch:
; CHECK-NEXT: %a.lcssa = phi i8* [ %a, %L2_header ]
L2_latch:
br i1 true, label %L2_exit, label %L2_header
L1_latch:
%b = phi i8* [ undef, %L1_header ], [ null, %L2_header ]
br label %L1_header
L2_exit:
%a_lcssa1 = phi i8* [ %a, %L2_latch ]
br label %Exit
Exit:
%a_lcssa2 = phi i8* [ %a_lcssa1, %L2_exit ]
ret void
}