diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 1f3ac8af86c..20b7e7d0a7d 100644 --- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -543,6 +543,18 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, // the exits. Loop *OuterL = &L; + if (DefaultExitBB) { + // Clear out the default destination temporarily to allow accurate + // predecessor lists to be examined below. + SI.setDefaultDest(nullptr); + // Check the loop containing this exit. + Loop *ExitL = LI.getLoopFor(DefaultExitBB); + if (!ExitL || ExitL->contains(OuterL)) + OuterL = ExitL; + } + + // Store the exit cases into a separate data structure and remove them from + // the switch. SmallVector, 4> ExitCases; ExitCases.reserve(ExitCaseIndices.size()); // We walk the case indices backwards so that we remove the last case first @@ -576,23 +588,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, SI.case_begin()->getCaseSuccessor(); })) CommonSuccBB = SI.case_begin()->getCaseSuccessor(); - - if (DefaultExitBB) { - // We can't remove the default edge so replace it with an edge to either - // the single common remaining successor (if we have one) or an unreachable - // block. - if (CommonSuccBB) { - SI.setDefaultDest(CommonSuccBB); - } else { - BasicBlock *UnreachableBB = BasicBlock::Create( - ParentBB->getContext(), - Twine(ParentBB->getName()) + ".unreachable_default", - ParentBB->getParent()); - new UnreachableInst(ParentBB->getContext(), UnreachableBB); - SI.setDefaultDest(UnreachableBB); - DT.addNewBlock(UnreachableBB, ParentBB); - } - } else { + if (!DefaultExitBB) { // If we're not unswitching the default, we need it to match any cases to // have a common successor or if we have no cases it is the common // successor. @@ -688,8 +684,33 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, // pointing at unreachable and other complexity. if (CommonSuccBB) { BasicBlock *BB = SI.getParent(); + // We may have had multiple edges to this common successor block, so remove + // them as predecessors. We skip the first one, either the default or the + // actual first case. + bool SkippedFirst = DefaultExitBB == nullptr; + for (auto Case : SI.cases()) { + assert(Case.getCaseSuccessor() == CommonSuccBB && + "Non-common successor!"); + if (!SkippedFirst) { + SkippedFirst = true; + continue; + } + CommonSuccBB->removePredecessor(BB, + /*DontDeleteUselessPHIs*/ true); + } + // Now nuke the switch and replace it with a direct branch. SI.eraseFromParent(); BranchInst::Create(CommonSuccBB, BB); + } else if (DefaultExitBB) { + assert(SI.getNumCases() > 0 && + "If we had no cases we'd have a common successor!"); + // Move the last case to the default successor. This is valid as if the + // default got unswitched it cannot be reached. This has the advantage of + // being simple and keeping the number of edges from this switch to + // successors the same, and avoiding any PHI update complexity. + auto LastCaseI = std::prev(SI.case_end()); + SI.setDefaultDest(LastCaseI->getCaseSuccessor()); + SI.removeCase(LastCaseI); } // Walk the unswitched exit blocks and the unswitched split blocks and update diff --git a/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll b/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll index d9c143edc30..fc8cd5be25c 100644 --- a/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll +++ b/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll @@ -3934,3 +3934,195 @@ exit: ; CHECK: exit: ; CHECK-NEXT: ret void } + +; A devilish pattern. This is a crafty, crafty test case designed to risk +; creating indirect cycles with trivial and non-trivial unswitching. The inner +; loop has a switch with a trivial exit edge that can be unswitched, but the +; rest of the switch cannot be unswitched because its cost is too high. +; However, the unswitching of the trivial edge creates a new switch in the +; outer loop. *This* switch isn't trivial, but has a low cost to unswitch. When +; we unswitch this switch from the outer loop, we will remove it completely and +; create a clone of the inner loop on one side. This clone will then again be +; viable for unswitching the inner-most loop. This lets us check that the +; unswitching doesn't end up cycling infinitely even when the cycle is +; indirect and due to revisiting a loop after cloning. +define void @test30(i32 %arg) { +; CHECK-LABEL: define void @test30( +entry: + br label %outer.header +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 %arg, label %[[ENTRY_SPLIT:.*]] [ +; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_US:.*]] +; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_US]] +; CHECK-NEXT: ] +; +; CHECK: [[ENTRY_SPLIT_US]]: +; CHECK-NEXT: switch i32 %arg, label %[[ENTRY_SPLIT_US_SPLIT:.*]] [ +; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_US_SPLIT_US:.*]] +; CHECK-NEXT: ] + +outer.header: + br label %inner.header + +inner.header: + switch i32 %arg, label %inner.loopexit1 [ + i32 1, label %inner.body1 + i32 2, label %inner.body2 + ] + +inner.body1: + %a = call i32 @a() + br label %inner.latch +; The (super convoluted) fully unswitched loop around `@a`. +; +; CHECK: [[ENTRY_SPLIT_US_SPLIT_US]]: +; CHECK-NEXT: br label %[[OUTER_HEADER_US_US:.*]] +; +; CHECK: [[OUTER_HEADER_US_US]]: +; CHECK-NEXT: br label %[[OUTER_HEADER_SPLIT_US_US:.*]] +; +; CHECK: [[OUTER_LATCH_US_US:.*]]: +; CHECK-NEXT: %[[OUTER_COND_US_US:.*]] = call i1 @cond() +; CHECK-NEXT: br i1 %[[OUTER_COND_US_US]], label %[[OUTER_HEADER_US_US]], label %[[EXIT_SPLIT_US_SPLIT_US:.*]] +; +; CHECK: [[OUTER_HEADER_SPLIT_US_US]]: +; CHECK-NEXT: br label %[[OUTER_HEADER_SPLIT_SPLIT_US_US_US:.*]] +; +; CHECK: [[INNER_LOOPEXIT2_US_US:.*]]: +; CHECK-NEXT: br label %[[OUTER_LATCH_US_US]] +; +; CHECK: [[OUTER_HEADER_SPLIT_SPLIT_US_US_US]]: +; CHECK-NEXT: br label %[[INNER_HEADER_US_US_US:.*]] +; +; CHECK: [[INNER_HEADER_US_US_US]]: +; CHECK-NEXT: br label %[[INNER_BODY1_US_US_US:.*]] +; +; CHECK: [[INNER_BODY1_US_US_US]]: +; CHECK-NEXT: %[[A:.*]] = call i32 @a() +; CHECK-NEXT: br label %[[INNER_LATCH_US_US_US:.*]] +; +; CHECK: [[INNER_LATCH_US_US_US]]: +; CHECK-NEXT: %[[PHI_A:.*]] = phi i32 [ %[[A]], %[[INNER_BODY1_US_US_US]] ] +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 %[[PHI_A]]) +; CHECK-NEXT: %[[INNER_COND_US_US_US:.*]] = call i1 @cond() +; CHECK-NEXT: br i1 %[[INNER_COND_US_US_US]], label %[[INNER_HEADER_US_US_US]], label %[[INNER_LOOPEXIT2_SPLIT_US_US_US:.*]] +; +; CHECK: [[INNER_LOOPEXIT2_SPLIT_US_US_US]]: +; CHECK-NEXT: br label %[[INNER_LOOPEXIT2_US_US]] +; +; CHECK: [[EXIT_SPLIT_US_SPLIT_US]]: +; CHECK-NEXT: br label %[[EXIT_SPLIT_US:.*]] + + +inner.body2: + %b = call i32 @b() + br label %inner.latch +; The fully unswitched loop around `@b`. +; +; CHECK: [[ENTRY_SPLIT_US_SPLIT]]: +; CHECK-NEXT: br label %[[OUTER_HEADER_US:.*]] +; +; CHECK: [[OUTER_HEADER_US]]: +; CHECK-NEXT: br label %[[OUTER_HEADER_SPLIT_US:.*]] +; +; CHECK: [[INNER_HEADER_US:.*]]: +; CHECK-NEXT: br label %[[INNER_BODY2_US:.*]] +; +; CHECK: [[INNER_BODY2_US]]: +; CHECK-NEXT: %[[B:.*]] = call i32 @b() +; CHECK-NEXT: br label %[[INNER_LATCH_US:.*]] +; +; CHECK: [[INNER_LATCH_US]]: +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 0) +; CHECK-NEXT: call void @sink1(i32 %[[B]]) +; CHECK-NEXT: %[[INNER_COND_US:.*]] = call i1 @cond() +; CHECK-NEXT: br i1 %[[INNER_COND_US]], label %[[INNER_HEADER_US]], label %[[INNER_LOOPEXIT2_SPLIT_US:.*]] +; +; CHECK: [[INNER_LOOPEXIT2_SPLIT_US]]: +; CHECK-NEXT: br label %[[INNER_LOOPEXIT2_US:.*]] +; +; CHECK: [[OUTER_LATCH_US:.*]]: +; CHECK-NEXT: %[[OUTER_COND_US:.*]] = call i1 @cond() +; CHECK-NEXT: br i1 %[[OUTER_COND_US]], label %[[OUTER_HEADER_US]], label %[[EXIT_SPLIT_US_SPLIT:.*]] +; +; CHECK: [[OUTER_HEADER_SPLIT_US]]: +; CHECK-NEXT: br label %[[OUTER_HEADER_SPLIT_SPLIT_US:.*]] +; +; CHECK: [[OUTER_HEADER_SPLIT_SPLIT_US]]: +; CHECK-NEXT: br label %[[INNER_HEADER_US]] +; +; CHECK: [[INNER_LOOPEXIT2_US]]: +; CHECK-NEXT: br label %[[OUTER_LATCH_US]] +; +; CHECK: [[EXIT_SPLIT_US]]: +; CHECK-NEXT: br label %exit + +inner.latch: + %phi = phi i32 [ %a, %inner.body1 ], [ %b, %inner.body2 ] + ; Make 10 junk calls here to ensure we're over the "50" cost threshold of + ; non-trivial unswitching for this inner switch. + call void @sink1(i32 0) + call void @sink1(i32 0) + call void @sink1(i32 0) + call void @sink1(i32 0) + call void @sink1(i32 0) + call void @sink1(i32 0) + call void @sink1(i32 0) + call void @sink1(i32 0) + call void @sink1(i32 0) + call void @sink1(i32 0) + call void @sink1(i32 %phi) + %inner.cond = call i1 @cond() + br i1 %inner.cond, label %inner.header, label %inner.loopexit2 + +inner.loopexit1: + br label %outer.latch +; The unswitched `loopexit1` path. +; +; CHECK: [[ENTRY_SPLIT]]: +; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] +; +; CHECK: outer.header: +; CHECK-NEXT: br label %inner.loopexit1 +; +; CHECK: inner.loopexit1: +; CHECK-NEXT: br label %outer.latch +; +; CHECK: outer.latch: +; CHECK-NEXT: %outer.cond = call i1 @cond() +; CHECK-NEXT: br i1 %outer.cond, label %outer.header, label %[[EXIT_SPLIT:.*]] +; +; CHECK: [[EXIT_SPLIT]]: +; CHECK-NEXT: br label %exit + +inner.loopexit2: + br label %outer.latch + +outer.latch: + %outer.cond = call i1 @cond() + br i1 %outer.cond, label %outer.header, label %exit + +exit: + ret void +; CHECK: exit: +; CHECK-NEXT: ret void +} diff --git a/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll b/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll index 6b36a8a5cde..6a025a4136b 100644 --- a/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll +++ b/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll @@ -1,6 +1,7 @@ ; RUN: opt -passes='loop(unswitch),verify' -S < %s | FileCheck %s declare void @some_func() noreturn +declare void @sink(i32) declare i1 @cond() declare i32 @cond.i32() @@ -136,10 +137,9 @@ loop_begin: ] ; CHECK: loop_begin: ; CHECK-NEXT: load -; CHECK-NEXT: switch i32 %cond2, label %[[UNREACHABLE:.*]] [ +; CHECK-NEXT: switch i32 %cond2, label %loop2 [ ; CHECK-NEXT: i32 0, label %loop0 ; CHECK-NEXT: i32 1, label %loop1 -; CHECK-NEXT: i32 2, label %loop2 ; CHECK-NEXT: ] loop0: @@ -182,9 +182,6 @@ loop_exit3: ret i32 0 ; CHECK: loop_exit3: ; CHECK-NEXT: ret -; -; CHECK: [[UNREACHABLE]]: -; CHECK-NEXT: unreachable } ; This test contains a trivially unswitchable branch with an LCSSA phi node in @@ -1160,3 +1157,88 @@ exit: ; CHECK: exit: ; CHECK-NEXT: ret void } + +define void @test_unswitch_to_common_succ_with_phis(i32* %var, i32 %cond) { +; CHECK-LABEL: @test_unswitch_to_common_succ_with_phis( +entry: + br label %header +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 %cond, label %loopexit1 [ +; CHECK-NEXT: i32 13, label %loopexit2 +; CHECK-NEXT: i32 0, label %entry.split +; CHECK-NEXT: i32 1, label %entry.split +; CHECK-NEXT: ] +; +; CHECK: entry.split: +; CHECK-NEXT: br label %header + +header: + %var_val = load i32, i32* %var + switch i32 %cond, label %loopexit1 [ + i32 0, label %latch + i32 1, label %latch + i32 13, label %loopexit2 + ] +; CHECK: header: +; CHECK-NEXT: load +; CHECK-NEXT: br label %latch + +latch: + ; No-op PHI node to exercise weird PHI update scenarios. + %phi = phi i32 [ %var_val, %header ], [ %var_val, %header ] + call void @sink(i32 %phi) + br label %header +; CHECK: latch: +; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ %var_val, %header ] +; CHECK-NEXT: call void @sink(i32 %[[PHI]]) +; CHECK-NEXT: br label %header + +loopexit1: + ret void +; CHECK: loopexit1: +; CHECK-NEXT: ret + +loopexit2: + ret void +; CHECK: loopexit2: +; CHECK-NEXT: ret +} + +define void @test_unswitch_to_default_common_succ_with_phis(i32* %var, i32 %cond) { +; CHECK-LABEL: @test_unswitch_to_default_common_succ_with_phis( +entry: + br label %header +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 %cond, label %entry.split [ +; CHECK-NEXT: i32 13, label %loopexit +; CHECK-NEXT: ] +; +; CHECK: entry.split: +; CHECK-NEXT: br label %header + +header: + %var_val = load i32, i32* %var + switch i32 %cond, label %latch [ + i32 0, label %latch + i32 1, label %latch + i32 13, label %loopexit + ] +; CHECK: header: +; CHECK-NEXT: load +; CHECK-NEXT: br label %latch + +latch: + ; No-op PHI node to exercise weird PHI update scenarios. + %phi = phi i32 [ %var_val, %header ], [ %var_val, %header ], [ %var_val, %header ] + call void @sink(i32 %phi) + br label %header +; CHECK: latch: +; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ %var_val, %header ] +; CHECK-NEXT: call void @sink(i32 %[[PHI]]) +; CHECK-NEXT: br label %header + +loopexit: + ret void +; CHECK: loopexit: +; CHECK-NEXT: ret +} diff --git a/test/Transforms/SimpleLoopUnswitch/update-scev.ll b/test/Transforms/SimpleLoopUnswitch/update-scev.ll index 3a2d7618817..96bdc6a91b8 100644 --- a/test/Transforms/SimpleLoopUnswitch/update-scev.ll +++ b/test/Transforms/SimpleLoopUnswitch/update-scev.ll @@ -76,9 +76,7 @@ define void @test2(i32 %n, i32 %m, i32 %cond) { ; backedge-taken counts. ; SCEV-LABEL: Determining loop execution counts for: @test2 ; SCEV: Loop %inner_loop_begin: backedge-taken count is (-1 + (1 smax %m)) -; FIXME: The following backedge taken count should be known but isn't apparently -; just because of a switch in the outer loop. -; SCEV: Loop %outer_loop_begin: Unpredictable backedge-taken count. +; SCEV: Loop %outer_loop_begin: backedge-taken count is (-1 + (1 smax %n)) ; ; CHECK-LABEL: define void @test2( entry: