From 71a368d115d332148ff6e3eea4e4567d879d0234 Mon Sep 17 00:00:00 2001 From: Michael Zolotukhin Date: Sat, 14 Nov 2015 05:51:41 +0000 Subject: [PATCH] Don't recompute LCSSA after loop-unrolling when possible. Summary: Currently we always recompute LCSSA for outer loops after unrolling an inner loop. That leads to compile time problem when we have big loop nests, and we can solve it by avoiding unnecessary work. For instance, if w eonly do partial unrolling, we don't break LCSSA, so we don't need to rebuild it. Also, if all exits from the inner loop are inside the enclosing loop, then complete unrolling won't break LCSSA either. I replaced unconditional LCSSA recomputation with conditional recomputation + unconditional assert and added several tests, which were failing when I experimented with it. Soon I plan to follow up with a similar patch for recalculation of dominators tree. Reviewers: hfinkel, dexonsmith, bogner, joker.eph, chandlerc Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D14526 llvm-svn: 253126 --- lib/Transforms/Utils/LoopUnroll.cpp | 12 +- test/Transforms/LoopUnroll/rebuild_lcssa.ll | 119 ++++++++++++++++++++ 2 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/LoopUnroll/rebuild_lcssa.ll diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index ad4c388e406..3999989ec89 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -221,6 +221,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + Loop *ParentL = L->getParentLoop(); + bool AllExitsAreInsideParentLoop = !ParentL || + std::all_of(ExitBlocks.begin(), ExitBlocks.end(), + [&](BasicBlock *BB) { return ParentL->contains(BB); }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime @@ -554,7 +560,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); - formLCSSARecursively(*OuterL, *DT, LI, SE); + if (CompletelyUnroll && !AllExitsAreInsideParentLoop) + formLCSSARecursively(*OuterL, *DT, LI, SE); + else + assert(OuterL->isLCSSAForm(*DT) && + "Loops should be in LCSSA form after loop-unroll."); } } diff --git a/test/Transforms/LoopUnroll/rebuild_lcssa.ll b/test/Transforms/LoopUnroll/rebuild_lcssa.ll new file mode 100644 index 00000000000..49498492344 --- /dev/null +++ b/test/Transforms/LoopUnroll/rebuild_lcssa.ll @@ -0,0 +1,119 @@ +; RUN: opt < %s -loop-unroll -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; This test shows how unrolling an inner loop could break LCSSA for an outer +; loop, and there is no cheap way to recover it. +; +; In this case the inner loop, L3, is being unrolled. It only runs one +; iteration, so unrolling basically means replacing +; br i1 true, label %exit, label %L3_header +; with +; br label %exit +; +; However, this change messes up the loops structure: for instance, block +; L3_body no longer belongs to L2. It becomes an exit block for L2, so LCSSA +; phis for definitions in L2 should now be placed there. In particular, we need +; to insert such a definition for %y1. + +; CHECK-LABEL: @foo1 +define void @foo1() { +entry: + br label %L1_header + +L1_header: + br label %L2_header + +L2_header: + %y1 = phi i64 [ undef, %L1_header ], [ %x.lcssa, %L2_latch ] + br label %L3_header + +L3_header: + %y2 = phi i64 [ 0, %L3_latch ], [ %y1, %L2_header ] + %x = add i64 undef, -1 + br i1 true, label %L2_latch, label %L3_body + +L2_latch: + %x.lcssa = phi i64 [ %x, %L3_header ] + br label %L2_header + +; CHECK: L3_body: +; CHECK-NEXT: %y1.lcssa = phi i64 [ %y1, %L3_header ] +L3_body: + store i64 %y1, i64* undef + br i1 false, label %L3_latch, label %L1_latch + +L3_latch: + br i1 true, label %exit, label %L3_header + +L1_latch: + %y.lcssa = phi i64 [ %y2, %L3_body ] + br label %L1_header + +exit: + ret void +} + +; Additional tests for some corner cases. +; +; CHECK-LABEL: @foo2 +define void @foo2() { +entry: + br label %L1_header + +L1_header: + br label %L2_header + +L2_header: + %a = phi i64 [ undef, %L1_header ], [ %dec_us, %L3_header ] + br label %L3_header + +L3_header: + %b = phi i64 [ 0, %L3_latch ], [ %a, %L2_header ] + %dec_us = add i64 undef, -1 + br i1 true, label %L2_header, label %L3_break_to_L1 + +; CHECK: L3_break_to_L1: +; CHECK-NEXT: %a.lcssa = phi i64 [ %a, %L3_header ] +L3_break_to_L1: + br i1 false, label %L3_latch, label %L1_latch + +L1_latch: + %b_lcssa = phi i64 [ %b, %L3_break_to_L1 ] + br label %L1_header + +L3_latch: + br i1 true, label %Exit, label %L3_header + +Exit: + ret void +} + +; CHECK-LABEL: @foo3 +define void @foo3() { +entry: + br label %L1_header + +L1_header: + %a = phi i8* [ %b, %L1_latch ], [ null, %entry ] + br i1 undef, label %L2_header, label %L1_latch + +L2_header: + br i1 undef, label %L2_latch, label %L1_latch + +; CHECK: L2_latch: +; CHECK-NEXT: %a.lcssa = phi i8* [ %a, %L2_header ] +L2_latch: + br i1 true, label %L2_exit, label %L2_header + +L1_latch: + %b = phi i8* [ undef, %L1_header ], [ null, %L2_header ] + br label %L1_header + +L2_exit: + %a_lcssa1 = phi i8* [ %a, %L2_latch ] + br label %Exit + +Exit: + %a_lcssa2 = phi i8* [ %a_lcssa1, %L2_exit ] + ret void +}