[LoopPeeling] Get rid of Phis that become invariant after N steps

This patch is a generalization of the improvement introduced in rL296898. Previously, we were able to peel one iteration of a loop to get rid of a Phi that becomes an invariant on the 2nd iteration. In more general case, if a Phi becomes invariant after N iterations, we can peel N times and turn it into invariant. In order to do this, we for every Phi in loop's header we define the Invariant Depth value which is calculated as follows: Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge]. If %y is a loop invariant, then Depth(%x) = 1. If %y is a Phi from the loop header, Depth(%x) = Depth(%y) + 1. Otherwise, Depth(%x) is infinite. Notice that if we peel a loop, all Phis with Depth = 1 become invariants, and all other Phis with finite depth decrease the depth by 1. Thus, peeling N first iterations allows us to turn all Phis with Depth <= N into invariants. Reviewers: reames, apilipenko, mkuper, skatkov, anna, sanjoy Reviewed By: sanjoy Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D31613 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300446 91177308-0d34-0410-b5e6-96231b3b80d8
2025-01-10 06:03:52 +00:00 · 2017-04-17 09:52:02 +00:00 · 2017-04-17 09:52:02 +00:00 · 63c0d1d702
commit 63c0d1d702
parent 38eae0ca5e
2 changed files with 229 additions and 23 deletions
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@ -46,6 +46,11 @@ static cl::opt<unsigned> UnrollForcePeelCount(
    "unroll-force-peel-count", cl::init(0), cl::Hidden,
    cl::desc("Force a peel count regardless of profiling information."));

+// Designates that a Phi is estimated to become invariant after an "infinite"
+// number of loop iterations (i.e. only may become an invariant if the loop is
+// fully unrolled).
+static const unsigned InfiniteIterationsToInvariance = UINT_MAX;
+
 // Check whether we are capable of peeling this loop.
 static bool canPeel(Loop *L) {
  // Make sure the loop is in simplified form
@ -66,10 +71,62 @@ static bool canPeel(Loop *L) {
  return true;
 }

+// This function calculates the number of iterations after which the given Phi
+// becomes an invariant. The pre-calculated values are memorized in the map. The
+// function (shortcut is I) is calculated according to the following definition:
+// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
+//   If %y is a loop invariant, then I(%x) = 1.
+//   If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
+//   Otherwise, I(%x) is infinite.
+// TODO: Actually if %y is an expression that depends only on Phi %z and some
+//       loop invariants, we can estimate I(%x) = I(%z) + 1. The example
+//       looks like:
+//         %x = phi(0, %a),  <-- becomes invariant starting from 3rd iteration.
+//         %y = phi(0, 5),
+//         %a = %y + 1.
+static unsigned calculateIterationsToInvariance(
+    PHINode *Phi, Loop *L, BasicBlock *BackEdge,
+    SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
+  assert(Phi->getParent() == L->getHeader() &&
+         "Non-loop Phi should not be checked for turning into invariant.");
+  assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
+  // If we already know the answer, take it from the map.
+  auto I = IterationsToInvariance.find(Phi);
+  if (I != IterationsToInvariance.end())
+    return I->second;
+
+  // Otherwise we need to analyze the input from the back edge.
+  Value *Input = Phi->getIncomingValueForBlock(BackEdge);
+  // Place infinity to map to avoid infinite recursion for cycled Phis. Such
+  // cycles can never stop on an invariant.
+  IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
+  unsigned ToInvariance = InfiniteIterationsToInvariance;
+
+  if (L->isLoopInvariant(Input))
+    ToInvariance = 1u;
+  else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
+    // Only consider Phis in header block.
+    if (IncPhi->getParent() != L->getHeader())
+      return InfiniteIterationsToInvariance;
+    // If the input becomes an invariant after X iterations, then our Phi
+    // becomes an invariant after X + 1 iterations.
+    unsigned InputToInvariance = calculateIterationsToInvariance(
+        IncPhi, L, BackEdge, IterationsToInvariance);
+    if (InputToInvariance != InfiniteIterationsToInvariance)
+      ToInvariance = InputToInvariance + 1u;
+  }
+
+  // If we found that this Phi lies in an invariant chain, update the map.
+  if (ToInvariance != InfiniteIterationsToInvariance)
+    IterationsToInvariance[Phi] = ToInvariance;
+  return ToInvariance;
+}
+
 // Return the number of iterations we want to peel off.
 void llvm::computePeelCount(Loop *L, unsigned LoopSize,
                            TargetTransformInfo::UnrollingPreferences &UP,
                            unsigned &TripCount) {
+  assert(LoopSize > 0 && "Zero loop size is not allowed!");
  UP.PeelCount = 0;
  if (!canPeel(L))
    return;
@ -78,31 +135,37 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
  if (!L->empty())
    return;

-  // Try to find a Phi node that has the same loop invariant as an input from
-  // its only back edge. If there is such Phi, peeling 1 iteration from the
-  // loop is profitable, because starting from 2nd iteration we will have an
-  // invariant instead of this Phi.
+  // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
+  // iterations of the loop. For this we compute the number for iterations after
+  // which every Phi is guaranteed to become an invariant, and try to peel the
+  // maximum number of iterations among these values, thus turning all those
+  // Phis into invariants.
  // First, check that we can peel at least one iteration.
  if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) {
+    // Store the pre-calculated values here.
+    SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
+    // Now go through all Phis to calculate their the number of iterations they
+    // need to become invariants.
+    unsigned DesiredPeelCount = 0;
    BasicBlock *BackEdge = L->getLoopLatch();
    assert(BackEdge && "Loop is not in simplified form?");
-    BasicBlock *Header = L->getHeader();
-    // Iterate over Phis to find one with invariant input on back edge.
-    bool FoundCandidate = false;
-    PHINode *Phi;
-    for (auto BI = Header->begin(); isa<PHINode>(&*BI); ++BI) {
-      Phi = cast<PHINode>(&*BI);
-      Value *Input = Phi->getIncomingValueForBlock(BackEdge);
-      if (L->isLoopInvariant(Input)) {
-        FoundCandidate = true;
-        break;
-      }
+    for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
+      PHINode *Phi = cast<PHINode>(&*BI);
+      unsigned ToInvariance = calculateIterationsToInvariance(
+          Phi, L, BackEdge, IterationsToInvariance);
+      if (ToInvariance != InfiniteIterationsToInvariance)
+        DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
    }
-    if (FoundCandidate) {
-      DEBUG(dbgs() << "Peel one iteration to get rid of " << *Phi
-                   << " because starting from 2nd iteration it is always"
-                   << " an invariant\n");
-      UP.PeelCount = 1;
+    if (DesiredPeelCount > 0) {
+      // Pay respect to limitations implied by loop size and the max peel count.
+      unsigned MaxPeelCount = UnrollPeelMaxCount;
+      MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
+      DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
+      // Consider max peel count limitation.
+      assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
+      DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn"
+                   << " some Phis into invariants.\n");
+      UP.PeelCount = DesiredPeelCount;
      return;
    }
  }
--- a/test/Transforms/LoopUnroll/peel-loop-not-forced.ll
+++ b/test/Transforms/LoopUnroll/peel-loop-not-forced.ll
@ -1,4 +1,4 @@
-; RUN: opt < %s -S -loop-unroll -unroll-threshold=8 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=30 | FileCheck %s

 define i32 @invariant_backedge_1(i32 %a, i32 %b) {
 ; CHECK-LABEL: @invariant_backedge_1
@ -25,10 +25,112 @@ exit:
  ret i32 %sum
 }

-; Peeling should fail due to method size.
 define i32 @invariant_backedge_2(i32 %a, i32 %b) {
+; This loop should be peeled twice because it has a Phi which becomes invariant
+; starting from 3rd iteration.
 ; CHECK-LABEL: @invariant_backedge_2
-; CHECK-NOT:   loop.peel:
+; CHECK:       loop.peel{{.*}}:
+; CHECK:       loop.peel{{.*}}:
+; CHECK:         %i = phi
+; CHECK:         %sum = phi
+; CHECK-NOT:     %half.inv = phi
+; CHECK-NOT:     %plus = phi
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+  %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
+  %plus = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
+
+  %incsum = add i32 %sum, %plus
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %i, 1000
+
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @invariant_backedge_3(i32 %a, i32 %b) {
+; This loop should be peeled thrice because it has a Phi which becomes invariant
+; starting from 4th iteration.
+; CHECK-LABEL: @invariant_backedge_3
+; CHECK:       loop.peel{{.*}}:
+; CHECK:       loop.peel{{.*}}:
+; CHECK:       loop.peel{{.*}}:
+; CHECK:         %i = phi
+; CHECK:         %sum = phi
+; CHECK-NOT:     %half.inv = phi
+; CHECK-NOT:     %half.inv.2 = phi
+; CHECK-NOT:     %plus = phi
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+  %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
+  %half.inv.2 = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
+  %plus = phi i32 [ %a, %entry ], [ %half.inv.2, %loop ]
+
+  %incsum = add i32 %sum, %plus
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %i, 1000
+
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @invariant_backedge_limited_by_size(i32 %a, i32 %b) {
+; This loop should normally be peeled thrice because it has a Phi which becomes
+; invariant starting from 4th iteration, but the size of the loop only allows
+; us to peel twice because we are restricted to 30 instructions in resulting
+; code. Thus, %plus Phi node should stay in loop even despite its backedge
+; input is an invariant.
+; CHECK-LABEL: @invariant_backedge_limited_by_size
+; CHECK:       loop.peel{{.*}}:
+; CHECK:       loop.peel{{.*}}:
+; CHECK:         %i = phi
+; CHECK:         %sum = phi
+; CHECK:         %plus = phi i32 [ %a, {{.*}} ], [ %b, %loop ]
+; CHECK-NOT:     %half.inv = phi
+; CHECK-NOT:     %half.inv.2 = phi
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+  %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
+  %half.inv.2 = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
+  %plus = phi i32 [ %a, %entry ], [ %half.inv.2, %loop ]
+
+  %incsum = add i32 %sum, %plus
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %i, 1000
+
+  %incsum2 = add i32 %incsum, %plus
+  %incsum3 = add i32 %incsum, %plus
+  %incsum4 = add i32 %incsum, %plus
+  %incsum5 = add i32 %incsum, %plus
+  %incsum6 = add i32 %incsum, %plus
+  %incsum7 = add i32 %incsum, %plus
+
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %sum
+}
+
+; Peeling should fail due to method size.
+define i32 @invariant_backedge_negative(i32 %a, i32 %b) {
+; CHECK-LABEL: @invariant_backedge_negative
+; CHECK-NOT:   loop.peel{{.*}}:
 ; CHECK:       loop:
 ; CHECK:         %i = phi
 ; CHECK:         %sum = phi
@ -43,6 +145,47 @@ loop:

  %incsum = add i32 %sum, %plus
  %incsum2 = add i32 %incsum, %plus
+  %incsum3 = add i32 %incsum, %plus
+  %incsum4 = add i32 %incsum, %plus
+  %incsum5 = add i32 %incsum, %plus
+  %incsum6 = add i32 %incsum, %plus
+  %incsum7 = add i32 %incsum, %plus
+  %incsum8 = add i32 %incsum, %plus
+  %incsum9 = add i32 %incsum, %plus
+  %incsum10 = add i32 %incsum, %plus
+  %incsum11 = add i32 %incsum, %plus
+  %incsum12 = add i32 %incsum, %plus
+  %incsum13 = add i32 %incsum, %plus
+  %incsum14 = add i32 %incsum, %plus
+  %incsum15 = add i32 %incsum, %plus
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %i, 1000
+
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @cycled_phis(i32 %a, i32 %b) {
+; Make sure that we do not crash working with cycled Phis and don't peel it.
+; TODO: Actually this loop should be partially unrolled with factor 2.
+; CHECK-LABEL: @cycled_phis
+; CHECK-NOT:   loop.peel{{.*}}:
+; CHECK:       loop:
+; CHECK:         %i = phi
+; CHECK:         %phi.a = phi
+; CHECK:         %phi.b = phi
+; CHECK:         %sum = phi
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %phi.a = phi i32 [ %a, %entry ], [ %phi.b, %loop ]
+  %phi.b = phi i32 [ %b, %entry ], [ %phi.a, %loop ]
+  %sum = phi i32 [ 0, %entry], [ %incsum, %loop ]
+  %incsum = add i32 %sum, %phi.a
  %inc = add i32 %i, 1
  %cmp = icmp slt i32 %i, 1000