[SimplifyCFG] Handle tail-sinking of more than 2 incoming branches

This was a real restriction in the original version of SinkIfThenCodeToEnd. Now it's been rewritten, the restriction can be lifted. As part of this, we handle a very common and useful case where one of the incoming branches is actually conditional. Consider: if (a) x(1); else if (b) x(2); This produces the following CFG: [if] / \ [x(1)] [if] | | \ | | \ | [x(2)] | \ | / [ end ] [end] has two unconditional predecessor arcs and one conditional. The conditional refers to the implicit empty 'else' arc. This same pattern can also be caused by an empty default block in a switch. We can't sink the call to x() down to end because no call to x() happens on the third incoming arc (assume that x() has sideeffects for the sake of argument; if something is safe to speculate we could indeed sink nevertheless but this cannot happen in the general case and causes many extra selects). We are now able to detect this case and split off the unconditional arcs to a common successor: [if] / \ [x(1)] [if] | | \ | | \ | [x(2)] | \ / | [sink.split] | \ / [ end ] Now we can sink the call to x() into %sink.split. This can cause significant code simplification in many testcases. llvm-svn: 280364
2024-12-02 16:36:40 +00:00 · 2016-09-01 12:58:13 +00:00 · 2016-09-01 12:58:13 +00:00 · 9b64cc7de2
commit 9b64cc7de2
parent d1066659ab
7 changed files with 224 additions and 50 deletions
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@ -1577,20 +1577,62 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
  assert(BI1->isUnconditional());
  BasicBlock *BBEnd = BI1->getSuccessor(0);

-  // We currently only support branch targets with two predecessors.
-  // FIXME: this is an arbitrary restriction and should be lifted.
-  SmallVector<BasicBlock*,4> Blocks;
-  for (auto *BB : predecessors(BBEnd))
-    Blocks.push_back(BB);
-  if (Blocks.size() != 2 ||
-      !all_of(Blocks, [](const BasicBlock *BB) {
-        auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
-        return BI && BI->isUnconditional();
-      }))
+  // We support two situations:
+  //   (1) all incoming arcs are unconditional
+  //   (2) one incoming arc is conditional
+  //
+  // (2) is very common in switch defaults and
+  // else-if patterns;
+  //
+  //   if (a) f(1);
+  //   else if (b) f(2);
+  //
+  // produces:
+  //
+  //       [if]
+  //      /    \
+  //    [f(1)] [if]
+  //      |     | \
+  //      |     |  \
+  //      |  [f(2)]|
+  //       \    | /
+  //        [ end ]
+  //
+  // [end] has two unconditional predecessor arcs and one conditional. The
+  // conditional refers to the implicit empty 'else' arc. This conditional
+  // arc can also be caused by an empty default block in a switch.
+  //
+  // In this case, we attempt to sink code from all *unconditional* arcs.
+  // If we can sink instructions from these arcs (determined during the scan
+  // phase below) we insert a common successor for all unconditional arcs and
+  // connect that to [end], to enable sinking:
+  //
+  //       [if]
+  //      /    \
+  //    [x(1)] [if]
+  //      |     | \
+  //      |     |  \
+  //      |  [x(2)] |
+  //       \   /    |
+  //   [sink.split] |
+  //         \     /
+  //         [ end ]
+  //
+  SmallVector<BasicBlock*,4> UnconditionalPreds;
+  Instruction *Cond = nullptr;
+  for (auto *B : predecessors(BBEnd)) {
+    auto *T = B->getTerminator();
+    if (isa<BranchInst>(T) && cast<BranchInst>(T)->isUnconditional())
+      UnconditionalPreds.push_back(B);
+    else if ((isa<BranchInst>(T) || isa<SwitchInst>(T)) && !Cond)
+      Cond = T;
+    else
+      return false;
+  }
+  if (UnconditionalPreds.size() < 2)
    return false;
  
  bool Changed = false;
-
  // We take a two-step approach to tail sinking. First we scan from the end of
  // each block upwards in lockstep. If the n'th instruction from the end of each
  // block can be sunk, those instructions are added to ValuesToSink and we
@ -1600,7 +1642,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
  unsigned ScanIdx = 0;
  SmallPtrSet<Value*,4> InstructionsToSink;
  DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
-  LockstepReverseIterator LRI(Blocks);
+  LockstepReverseIterator LRI(UnconditionalPreds);
  while (LRI.isValid() &&
         canSinkInstructions(*LRI, PHIOperands)) {
    DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0] << "\n");
@ -1609,6 +1651,36 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
    --LRI;
  }

+  auto ProfitableToSinkLastInstruction = [&]() {
+    LRI.reset();
+    unsigned NumPHIdValues = 0;
+    for (auto *I : *LRI)
+      for (auto *V : PHIOperands[I])
+        if (InstructionsToSink.count(V) == 0)
+          ++NumPHIdValues;
+    DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
+    unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
+    if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
+        NumPHIInsts++;
+    
+    return NumPHIInsts <= 1;
+  };
+
+  if (ScanIdx > 0 && Cond) {
+    // Check if we would actually sink anything first!
+    if (!ProfitableToSinkLastInstruction())
+      return false;
+    
+    DEBUG(dbgs() << "SINK: Splitting edge\n");
+    // We have a conditional edge and we're going to sink some instructions.
+    // Insert a new block postdominating all blocks we're going to sink from.
+    if (!SplitBlockPredecessors(BI1->getSuccessor(0), UnconditionalPreds,
+                                ".sink.split"))
+      // Edges couldn't be split.
+      return false;
+    Changed = true;
+  }
+  
  // Now that we've analyzed all potential sinking candidates, perform the
  // actual sink. We iteratively sink the last non-terminator of the source
  // blocks into their common successor unless doing so would require too
@ -1623,32 +1695,22 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
  // This is unlikely in practice though.
  for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) {
    DEBUG(dbgs() << "SINK: Sink: "
-                 << *Blocks[0]->getTerminator()->getPrevNode()
+                 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
                 << "\n");

    // Because we've sunk every instruction in turn, the current instruction to
    // sink is always at index 0.
-    LRI.reset();
-    unsigned NumPHIdValues = 0;
-    for (auto *I : *LRI)
-      for (auto *V : PHIOperands[I])
-        if (InstructionsToSink.count(V) == 0)
-          ++NumPHIdValues;
-    DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
-    unsigned NumPHIInsts = NumPHIdValues / Blocks.size();
-    if (NumPHIdValues % Blocks.size() != 0)
-      NumPHIInsts++;
-    
-    if (NumPHIInsts > 1)
+    if (!ProfitableToSinkLastInstruction()) {
      // Too many PHIs would be created.
+      DEBUG(dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
      break;
+    }
    
-    if (!sinkLastInstruction(Blocks))
+    if (!sinkLastInstruction(UnconditionalPreds))
      return Changed;
    NumSinkCommons++;
    Changed = true;
  }
-  
  return Changed;
 }

--- a/test/CodeGen/AArch64/arm64-jumptable.ll
+++ b/test/CodeGen/AArch64/arm64-jumptable.ll
@ -2,25 +2,26 @@
 ; RUN: llc -mtriple=arm64-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-LINUX
 ; <rdar://11417675>

-define void @sum(i32* %to) {
+define void @sum(i32 %a, i32* %to, i32 %c) {
 entry:
-  switch i32 undef, label %exit [
+  switch i32 %a, label %exit [
    i32 1, label %bb1
    i32 2, label %bb2
    i32 3, label %bb3
    i32 4, label %bb4
  ]
 bb1:
-  store i32 undef, i32* %to
+  %b = add i32 %c, 1
+  store i32 %b, i32* %to
  br label %exit
 bb2:
-  store i32 undef, i32* %to
+  store i32 2, i32* %to
  br label %exit
 bb3:
-  store i32 undef, i32* %to
+  store i32 3, i32* %to
  br label %exit
 bb4:
-  store i32 undef, i32* %to
+  store i32 4, i32* %to
  br label %exit
 exit:
  ret void
--- a/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
+++ b/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
@ -3,7 +3,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"

 ; Function Attrs: norecurse nounwind
 define void @foo(i32 %a, i32 %b, float* nocapture %foo_arr) #0 {
-; CHECK: (load 4 from %ir.arrayidx1.{{i[1-2]}}), (load 4 from %ir.arrayidx1.{{i[1-2]}})
+; CHECK: (load 4 from %ir.arrayidx1.{{i[1-2]}})
 entry:
  %cmp = icmp sgt i32 %a, 0
  br i1 %cmp, label %if.then, label %if.end
--- a/test/CodeGen/AArch64/ifcvt-select.ll
+++ b/test/CodeGen/AArch64/ifcvt-select.ll
@ -4,7 +4,7 @@
 define i32 @foo(i32 %a, i32 %b)  {
 entry:
 ;CHECK-LABEL: foo:
-;CHECK: csinc
+;CHECK: cneg
 ;CHECK-NOT: csel
  %sub = sub nsw i32 %b, %a
  %cmp10 = icmp sgt i32 %a, 0
--- a/test/CodeGen/AArch64/rm_redundant_cmp.ll
+++ b/test/CodeGen/AArch64/rm_redundant_cmp.ll
@ -11,9 +11,9 @@
 define void @test_i16_2cmp_signed_1() {
 ; CHECK-LABEL: test_i16_2cmp_signed_1
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.gt
+; CHECK-NEXT: b.lt
 ; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
 entry:
  %0 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
  %1 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
@ -39,7 +39,7 @@ if.end8:                                          ; preds = %if.else, %if.then7,
 define void @test_i16_2cmp_signed_2() {
 ; CHECK-LABEL: test_i16_2cmp_signed_2
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.le
+; CHECK-NEXT: b.gt
 ; CHECK-NOT: cmp
 ; CHECK: b.ge
 entry:
@ -67,9 +67,9 @@ if.end8:                                          ; preds = %if.else, %if.then7,
 define void @test_i16_2cmp_unsigned_1() {
 ; CHECK-LABEL: test_i16_2cmp_unsigned_1
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.hi
+; CHECK-NEXT: b.lo
 ; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
 entry:
  %0 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
  %1 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
@ -95,7 +95,7 @@ if.end8:                                          ; preds = %if.else, %if.then7,
 define void @test_i16_2cmp_unsigned_2() {
 ; CHECK-LABEL: test_i16_2cmp_unsigned_2
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.ls
+; CHECK-NEXT: b.hi
 ; CHECK-NOT: cmp
 ; CHECK: b.hs
 entry:
@ -132,9 +132,9 @@ if.end8:                                          ; preds = %if.else, %if.then7,
 define void @test_i8_2cmp_signed_1() {
 ; CHECK-LABEL: test_i8_2cmp_signed_1
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.gt
+; CHECK-NEXT: b.lt
 ; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
 entry:
  %0 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
  %1 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
@ -160,7 +160,7 @@ if.end8:                                          ; preds = %if.else, %if.then7,
 define void @test_i8_2cmp_signed_2() {
 ; CHECK-LABEL: test_i8_2cmp_signed_2
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.le
+; CHECK-NEXT: b.gt
 ; CHECK-NOT: cmp
 ; CHECK: b.ge
 entry:
@ -188,9 +188,9 @@ if.end8:                                          ; preds = %if.else, %if.then7,
 define void @test_i8_2cmp_unsigned_1() {
 ; CHECK-LABEL: test_i8_2cmp_unsigned_1
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.hi
+; CHECK-NEXT: b.lo
 ; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
 entry:
  %0 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
  %1 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
@ -216,7 +216,7 @@ if.end8:                                          ; preds = %if.else, %if.then7,
 define void @test_i8_2cmp_unsigned_2() {
 ; CHECK-LABEL: test_i8_2cmp_unsigned_2
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.ls
+; CHECK-NEXT: b.hi
 ; CHECK-NOT: cmp
 ; CHECK: b.hs
 entry:
--- a/test/MC/ARM/data-in-code.ll
+++ b/test/MC/ARM/data-in-code.ll
@ -9,7 +9,7 @@
 ;; Ensure that if a jump table is generated that it has Mapping Symbols
 ;; marking the data-in-code region.

-define void @foo(i32* %ptr) nounwind ssp {
+define void @foo(i32* %ptr, i32 %b) nounwind ssp {
  %tmp = load i32, i32* %ptr, align 4
  switch i32 %tmp, label %exit [
    i32 0, label %bb0
@ -18,7 +18,7 @@ define void @foo(i32* %ptr) nounwind ssp {
    i32 3, label %bb3
  ]
 bb0:
-  store i32 0, i32* %ptr, align 4
+  store i32 %b, i32* %ptr, align 4
  br label %exit
 bb1:
  store i32 1, i32* %ptr, align 4
--- a/test/Transforms/SimplifyCFG/sink-common-code.ll
+++ b/test/Transforms/SimplifyCFG/sink-common-code.ll
@ -451,6 +451,117 @@ if.end:
 ; CHECK-LABEL: test_crash
 ; No checks for test_crash - just ensure it doesn't crash!

+define zeroext i1 @test16(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) {
+
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = zext i1 %cmp to i8
+  br label %if.end
+
+if.else:
+  br i1 %flag2, label %if.then2, label %if.end
+
+if.then2:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp ule i32 %add, %blksA
+  %frombool3 = zext i1 %cmp2 to i8
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %if.else ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
+
+; CHECK-LABEL: test16
+; CHECK: zext
+; CHECK-NOT: zext
+
+define zeroext i1 @test17(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+  switch i32 %flag, label %if.end [
+    i32 0, label %if.then
+    i32 1, label %if.then2
+  ]
+
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = zext i1 %cmp to i8
+  br label %if.end
+
+if.then2:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp ule i32 %add, %blksA
+  %frombool3 = zext i1 %cmp2 to i8
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %entry ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
+
+; CHECK-LABEL: test17
+; CHECK: if.then:
+; CHECK-NEXT: icmp uge
+; CHECK-NEXT: br label %[[x:.*]]
+
+; CHECK: if.then2:
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp ule
+; CHECK-NEXT: br label %[[x]]
+
+; CHECK: [[x]]:
+; CHECK-NEXT: %[[y:.*]] = phi i1 [ %cmp
+; CHECK-NEXT: %[[z:.*]] = zext i1 %[[y]]
+; CHECK-NEXT: br label %if.end
+
+; CHECK: if.end:
+; CHECK-NEXT: phi i8
+; CHECK-DAG: [ %[[z]], %[[x]] ]
+; CHECK-DAG: [ 0, %entry ]
+
+define zeroext i1 @test18(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+  switch i32 %flag, label %if.then3 [
+    i32 0, label %if.then
+    i32 1, label %if.then2
+  ]
+
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = zext i1 %cmp to i8
+  br label %if.end
+
+if.then2:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp ule i32 %add, %blksA
+  %frombool3 = zext i1 %cmp2 to i8
+  br label %if.end
+
+if.then3:
+  %add2 = add i32 %nblks, %blksA
+  %cmp3 = icmp ule i32 %add2, %blksA
+  %frombool4 = zext i1 %cmp3 to i8
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ %frombool4, %if.then3 ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
+
+; CHECK-LABEL: test18
+; CHECK: if.end:
+; CHECK-NEXT: %[[x:.*]] = phi i1
+; CHECK-DAG: [ %cmp, %if.then ]
+; CHECK-DAG: [ %cmp2, %if.then2 ]
+; CHECK-DAG: [ %cmp3, %if.then3 ]
+; CHECK-NEXT: zext i1 %[[x]] to i8
+
 ; CHECK: !0 = !{!1, !1, i64 0}
 ; CHECK: !1 = !{!"float", !2}
 ; CHECK: !2 = !{!"an example type tree"}