[SCEV] Fix sorting order for AddRecExprs

The existing sorting order in defined CompareSCEVComplexity sorts AddRecExprs by loop depth, but does not pay attention to dominance of loops. This can lead us to the following buggy situation: for (...) { // loop1 op1 = {A,+,B} } for (...) { // loop2 op2 = {A,+,B} S = add op1, op2 } In this case there is no guarantee that in operand list of S the op2 comes before op1 (loop depth is the same, so they will be sorted just lexicographically), so we can incorrectly treat S as a recurrence of loop1, which is wrong. This patch changes the sorting logic so that it places the dominated recs before the dominating recs. This ensures that when we pick the first recurrency in the operands order, it will be the bottom-most in terms of domination tree. The attached test set includes some tests that produce incorrect SCEV estimations and crashes with oldlogic. Reviewers: sanjoy, reames, apilipenko, anna Reviewed By: sanjoy Subscribers: llvm-commits, mzolotukhin Differential Revision: https://reviews.llvm.org/D33121 llvm-svn: 303148
2025-01-20 18:56:04 +00:00 · 2017-05-16 07:27:06 +00:00 · 2017-05-16 07:27:06 +00:00 · 66886e6d12
commit 66886e6d12
parent 60604f3f0f
2 changed files with 485 additions and 16 deletions
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@ -584,7 +584,7 @@ CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache,
 static int CompareSCEVComplexity(
    SmallSet<std::pair<const SCEV *, const SCEV *>, 8> &EqCacheSCEV,
    const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS,
-    unsigned Depth = 0) {
+    DominatorTree &DT, unsigned Depth = 0) {
  // Fast-path: SCEVs are uniqued so we can do a quick equality check.
  if (LHS == RHS)
    return 0;
@ -629,9 +629,16 @@ static int CompareSCEVComplexity(
    const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
    const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);

-    // Compare addrec loop depths.
+    // If there is a dominance relationship between the loops, sort by the
+    // dominance. Otherwise, sort by depth. We require such order in getAddExpr.
    const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
    if (LLoop != RLoop) {
+      const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader();
+      assert(LHead != RHead && "Two loops share the same header?");
+      if (DT.dominates(LHead, RHead))
+        return 1;
+      else if (DT.dominates(RHead, LHead))
+        return -1;
      unsigned LDepth = LLoop->getLoopDepth(), RDepth = RLoop->getLoopDepth();
      if (LDepth != RDepth)
        return (int)LDepth - (int)RDepth;
@ -645,7 +652,7 @@ static int CompareSCEVComplexity(
    // Lexicographically compare.
    for (unsigned i = 0; i != LNumOps; ++i) {
      int X = CompareSCEVComplexity(EqCacheSCEV, LI, LA->getOperand(i),
-                                    RA->getOperand(i), Depth + 1);
+                                    RA->getOperand(i), DT,  Depth + 1);
      if (X != 0)
        return X;
    }
@ -669,7 +676,7 @@ static int CompareSCEVComplexity(
      if (i >= RNumOps)
        return 1;
      int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(i),
-                                    RC->getOperand(i), Depth + 1);
+                                    RC->getOperand(i), DT, Depth + 1);
      if (X != 0)
        return X;
    }
@ -683,10 +690,10 @@ static int CompareSCEVComplexity(

    // Lexicographically compare udiv expressions.
    int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getLHS(), RC->getLHS(),
-                                  Depth + 1);
+                                  DT, Depth + 1);
    if (X != 0)
      return X;
-    X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(),
+    X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(), DT,
                              Depth + 1);
    if (X == 0)
      EqCacheSCEV.insert({LHS, RHS});
@ -701,7 +708,7 @@ static int CompareSCEVComplexity(

    // Compare cast expressions by operand.
    int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(),
-                                  RC->getOperand(), Depth + 1);
+                                  RC->getOperand(), DT, Depth + 1);
    if (X == 0)
      EqCacheSCEV.insert({LHS, RHS});
    return X;
@ -724,7 +731,7 @@ static int CompareSCEVComplexity(
 /// land in memory.
 ///
 static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
-                              LoopInfo *LI) {
+                              LoopInfo *LI, DominatorTree &DT) {
  if (Ops.size() < 2) return;  // Noop

  SmallSet<std::pair<const SCEV *, const SCEV *>, 8> EqCache;
@ -732,15 +739,16 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
    // This is the common case, which also happens to be trivially simple.
    // Special case it.
    const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
-    if (CompareSCEVComplexity(EqCache, LI, RHS, LHS) < 0)
+    if (CompareSCEVComplexity(EqCache, LI, RHS, LHS, DT) < 0)
      std::swap(LHS, RHS);
    return;
  }

  // Do the rough sort by complexity.
  std::stable_sort(Ops.begin(), Ops.end(),
-                   [&EqCache, LI](const SCEV *LHS, const SCEV *RHS) {
-                     return CompareSCEVComplexity(EqCache, LI, LHS, RHS) < 0;
+                   [&EqCache, LI, &DT](const SCEV *LHS, const SCEV *RHS) {
+                     return
+                         CompareSCEVComplexity(EqCache, LI, LHS, RHS, DT) < 0;
                   });

  // Now that we are sorted by complexity, group elements of the same
@ -2186,7 +2194,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
 #endif

  // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, &LI);
+  GroupByComplexity(Ops, &LI, DT);

  Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);

@ -2492,7 +2500,13 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
    // added together.  If so, we can fold them.
    for (unsigned OtherIdx = Idx+1;
         OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
-         ++OtherIdx)
+         ++OtherIdx) {
+      // We expect the AddRecExpr's to be sorted in reverse dominance order,
+      // so that the 1st found AddRecExpr is dominated by all others.
+      assert(DT.dominates(
+           cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()->getHeader(),
+           AddRec->getLoop()->getHeader()) &&
+        "AddRecExprs are not sorted in reverse dominance order?");
      if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
        // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
        SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
@ -2518,6 +2532,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
        Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
        return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
      }
+    }

    // Otherwise couldn't fold anything into this recurrence.  Move onto the
    // next one.
@ -2614,7 +2629,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
 #endif

  // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, &LI);
+  GroupByComplexity(Ops, &LI, DT);

  Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);

@ -3211,7 +3226,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
 #endif

  // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, &LI);
+  GroupByComplexity(Ops, &LI, DT);

  // If there are any constants, fold them together.
  unsigned Idx = 0;
@ -3312,7 +3327,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
 #endif

  // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, &LI);
+  GroupByComplexity(Ops, &LI, DT);

  // If there are any constants, fold them together.
  unsigned Idx = 0;
--- a/test/Analysis/ScalarEvolution/different-loops-recs.ll
+++ b/test/Analysis/ScalarEvolution/different-loops-recs.ll
@ -0,0 +1,454 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+; This test set ensures that we can correctly operate with recurrencies from
+; different loops.
+
+; Check that we can evaluate a sum of phis from two different loops in any
+; order.
+
+define void @test_00() {
+
+; CHECK-LABEL: Classifying expressions for: @test_00
+; CHECK:       %sum1 = add i32 %phi1, %phi2
+; CHECK-NEXT:  -->  {14,+,3}<%loop1>
+; CHECK:       %sum2 = add i32 %sum1, %phi3
+; CHECK-NEXT:  -->  {20,+,6}<%loop1>
+; CHECK:       %sum3 = add i32 %phi4, %phi5
+; CHECK-NEXT:  -->  {116,+,3}<%loop2>
+; CHECK:       %sum4 = add i32 %sum3, %phi6
+; CHECK-NEXT:  -->  {159,+,6}<%loop2>
+; CHECK:       %s1 = add i32 %phi1, %phi4
+; CHECK-NEXT:  -->  {{{{}}73,+,1}<%loop1>,+,1}<%loop2>
+; CHECK:       %s2 = add i32 %phi5, %phi2
+; CHECK-NEXT:  -->  {{{{}}57,+,2}<%loop1>,+,2}<%loop2>
+; CHECK:       %s3 = add i32 %sum1, %sum3
+; CHECK-NEXT:  -->  {{{{}}130,+,3}<%loop1>,+,3}<%loop2>
+; CHECK:       %s4 = add i32 %sum4, %sum2
+; CHECK-NEXT:  -->  {{{{}}179,+,6}<%loop1>,+,6}<%loop2>
+; CHECK:       %s5 = add i32 %phi3, %sum3
+; CHECK-NEXT:  -->  {{{{}}122,+,3}<%loop1>,+,3}<%loop2>
+; CHECK:       %s6 = add i32 %sum2, %phi6
+; CHECK-NEXT:  -->  {{{{}}63,+,6}<%loop1>,+,3}<%loop2>
+
+entry:
+  br label %loop1
+
+loop1:
+  %phi1 = phi i32 [ 10, %entry ], [ %phi1.inc, %loop1 ]
+  %phi2 = phi i32 [ 4, %entry ], [ %phi2.inc, %loop1 ]
+  %phi3 = phi i32 [ 6, %entry ], [ %phi3.inc, %loop1 ]
+  %phi1.inc = add i32 %phi1, 1
+  %phi2.inc = add i32 %phi2, 2
+  %phi3.inc = add i32 %phi3, 3
+  %sum1 = add i32 %phi1, %phi2
+  %sum2 = add i32 %sum1, %phi3
+  %cond1 = icmp ult i32 %sum2, 1000
+  br i1 %cond1, label %loop1, label %loop2
+
+loop2:
+  %phi4 = phi i32 [ 63, %loop1 ], [ %phi4.inc, %loop2 ]
+  %phi5 = phi i32 [ 53, %loop1 ], [ %phi5.inc, %loop2 ]
+  %phi6 = phi i32 [ 43, %loop1 ], [ %phi6.inc, %loop2 ]
+  %phi4.inc = add i32 %phi4, 1
+  %phi5.inc = add i32 %phi5, 2
+  %phi6.inc = add i32 %phi6, 3
+  %sum3 = add i32 %phi4, %phi5
+  %sum4 = add i32 %sum3, %phi6
+  %cond2 = icmp ult i32 %sum4, 1000
+  br i1 %cond2, label %loop2, label %exit
+
+exit:
+  %s1 = add i32 %phi1, %phi4
+  %s2 = add i32 %phi5, %phi2
+  %s3 = add i32 %sum1, %sum3
+  %s4 = add i32 %sum4, %sum2
+  %s5 = add i32 %phi3, %sum3
+  %s6 = add i32 %sum2, %phi6
+  ret void
+}
+
+; Check that we can evaluate a sum of phis+invariants from two different loops
+; in any order.
+
+define void @test_01(i32 %a, i32 %b) {
+
+; CHECK-LABEL: Classifying expressions for: @test_01
+; CHECK:       %sum1 = add i32 %phi1, %phi2
+; CHECK-NEXT:  -->  {(%a + %b),+,3}<%loop1>
+; CHECK:       %sum2 = add i32 %sum1, %phi3
+; CHECK-NEXT:  -->  {(6 + %a + %b),+,6}<%loop1>
+; CHECK:       %is1 = add i32 %sum2, %a
+; CHECK-NEXT:  -->  {(6 + (2 * %a) + %b),+,6}<%loop1>
+; CHECK:       %sum3 = add i32 %phi4, %phi5
+; CHECK-NEXT:  -->  {116,+,3}<%loop2>
+; CHECK:       %sum4 = add i32 %sum3, %phi6
+; CHECK-NEXT:  -->  {159,+,6}<%loop2>
+; CHECK:       %is2 = add i32 %sum4, %b
+; CHECK-NEXT:  -->  {(159 + %b),+,6}<%loop2>
+; CHECK:       %ec2 = add i32 %is1, %is2
+; CHECK-NEXT:  -->  {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2>
+; CHECK:       %s1 = add i32 %phi1, %is1
+; CHECK-NEXT:  -->  {(6 + (3 * %a) + %b),+,7}<%loop1>
+; CHECK:       %s2 = add i32 %is2, %phi4
+; CHECK-NEXT:  -->  {(222 + %b),+,7}<%loop2>
+; CHECK:       %s3 = add i32 %is1, %phi5
+; CHECK-NEXT:  -->  {{{{}}(59 + (2 * %a) + %b),+,6}<%loop1>,+,2}<%loop2>
+; CHECK:       %s4 = add i32 %phi2, %is2
+; CHECK-NEXT:  -->  {{{{}}(159 + (2 * %b)),+,2}<%loop1>,+,6}<%loop2>
+; CHECK:       %s5 = add i32 %is1, %is2
+; CHECK-NEXT:  -->  {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2>
+; CHECK:       %s6 = add i32 %is2, %is1
+; CHECK-NEXT:  -->  {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2>
+
+entry:
+  br label %loop1
+
+loop1:
+  %phi1 = phi i32 [ %a, %entry ], [ %phi1.inc, %loop1 ]
+  %phi2 = phi i32 [ %b, %entry ], [ %phi2.inc, %loop1 ]
+  %phi3 = phi i32 [ 6, %entry ], [ %phi3.inc, %loop1 ]
+  %phi1.inc = add i32 %phi1, 1
+  %phi2.inc = add i32 %phi2, 2
+  %phi3.inc = add i32 %phi3, 3
+  %sum1 = add i32 %phi1, %phi2
+  %sum2 = add i32 %sum1, %phi3
+  %is1 = add i32 %sum2, %a
+  %cond1 = icmp ult i32 %is1, 1000
+  br i1 %cond1, label %loop1, label %loop2
+
+loop2:
+  %phi4 = phi i32 [ 63, %loop1 ], [ %phi4.inc, %loop2 ]
+  %phi5 = phi i32 [ 53, %loop1 ], [ %phi5.inc, %loop2 ]
+  %phi6 = phi i32 [ 43, %loop1 ], [ %phi6.inc, %loop2 ]
+  %phi4.inc = add i32 %phi4, 1
+  %phi5.inc = add i32 %phi5, 2
+  %phi6.inc = add i32 %phi6, 3
+  %sum3 = add i32 %phi4, %phi5
+  %sum4 = add i32 %sum3, %phi6
+  %is2 = add i32 %sum4, %b
+  %ec2 = add i32 %is1, %is2
+  %cond2 = icmp ult i32 %ec2, 1000
+  br i1 %cond2, label %loop2, label %exit
+
+exit:
+  %s1 = add i32 %phi1, %is1
+  %s2 = add i32 %is2, %phi4
+  %s3 = add i32 %is1, %phi5
+  %s4 = add i32 %phi2, %is2
+  %s5 = add i32 %is1, %is2
+  %s6 = add i32 %is2, %is1
+  ret void
+}
+
+; Check that we can correctly evaluate a sum of phis+variants from two different
+; loops in any order.
+
+define void @test_02(i32 %a, i32 %b, i32* %p) {
+
+; CHECK-LABEL: Classifying expressions for: @test_02
+; CHECK:       %sum1 = add i32 %phi1, %phi2
+; CHECK-NEXT:  -->  {(%a + %b),+,3}<%loop1>
+; CHECK:       %sum2 = add i32 %sum1, %phi3
+; CHECK-NEXT:  -->  {(6 + %a + %b),+,6}<%loop1>
+; CHECK:       %is1 = add i32 %sum2, %v1
+; CHECK-NEXT:  -->  ({(6 + %a + %b),+,6}<%loop1> + %v1)
+; CHECK:       %sum3 = add i32 %phi4, %phi5
+; CHECK-NEXT:  -->  {(%a + %b),+,3}<%loop2>
+; CHECK:       %sum4 = add i32 %sum3, %phi6
+; CHECK-NEXT:  -->  {(43 + %a + %b),+,6}<%loop2>
+; CHECK:       %is2 = add i32 %sum4, %v2
+; CHECK-NEXT:  -->  ({(43 + %a + %b),+,6}<%loop2> + %v2)
+; CHECK:       %is3 = add i32 %v1, %sum2
+; CHECK-NEXT:  -->  ({(6 + %a + %b),+,6}<%loop1> + %v1)
+; CHECK:       %ec2 = add i32 %is1, %is3
+; CHECK-NEXT:  -->  (2 * ({(6 + %a + %b),+,6}<%loop1> + %v1))
+; CHECK:       %s1 = add i32 %phi1, %is1
+; CHECK-NEXT:  -->  ({(6 + (2 * %a) + %b),+,7}<%loop1> + %v1)
+; CHECK:       %s2 = add i32 %is2, %phi4
+; CHECK-NEXT:  -->  ({(43 + (2 * %a) + %b),+,7}<%loop2> + %v2)
+; CHECK:       %s3 = add i32 %is1, %phi5
+; CHECK-NEXT:  -->  {({(6 + (2 * %b) + %a),+,6}<%loop1> + %v1),+,2}<%loop2>
+; CHECK:       %s4 = add i32 %phi2, %is2
+; CHECK-NEXT:  -->  ({{{{}}(43 + (2 * %b) + %a),+,2}<%loop1>,+,6}<%loop2> + %v2)
+; CHECK:       %s5 = add i32 %is1, %is2
+; CHECK-NEXT:  -->  ({({(49 + (2 * %a) + (2 * %b)),+,6}<%loop1> + %v1),+,6}<%loop2> + %v2)
+; CHECK:       %s6 = add i32 %is2, %is1
+; CHECK-NEXT:  -->  ({({(49 + (2 * %a) + (2 * %b)),+,6}<%loop1> + %v1),+,6}<%loop2> + %v2)
+
+entry:
+  br label %loop1
+
+loop1:
+  %phi1 = phi i32 [ %a, %entry ], [ %phi1.inc, %loop1 ]
+  %phi2 = phi i32 [ %b, %entry ], [ %phi2.inc, %loop1 ]
+  %phi3 = phi i32 [ 6, %entry ], [ %phi3.inc, %loop1 ]
+  %phi1.inc = add i32 %phi1, 1
+  %phi2.inc = add i32 %phi2, 2
+  %phi3.inc = add i32 %phi3, 3
+  %v1 = load i32, i32* %p
+  %sum1 = add i32 %phi1, %phi2
+  %sum2 = add i32 %sum1, %phi3
+  %is1 = add i32 %sum2, %v1
+  %cond1 = icmp ult i32 %is1, 1000
+  br i1 %cond1, label %loop1, label %loop2
+
+loop2:
+  %phi4 = phi i32 [ %a, %loop1 ], [ %phi4.inc, %loop2 ]
+  %phi5 = phi i32 [ %b, %loop1 ], [ %phi5.inc, %loop2 ]
+  %phi6 = phi i32 [ 43, %loop1 ], [ %phi6.inc, %loop2 ]
+  %phi4.inc = add i32 %phi4, 1
+  %phi5.inc = add i32 %phi5, 2
+  %phi6.inc = add i32 %phi6, 3
+  %v2 = load i32, i32* %p
+  %sum3 = add i32 %phi4, %phi5
+  %sum4 = add i32 %sum3, %phi6
+  %is2 = add i32 %sum4, %v2
+  %is3 = add i32 %v1, %sum2
+  %ec2 = add i32 %is1, %is3
+  %cond2 = icmp ult i32 %ec2, 1000
+  br i1 %cond2, label %loop2, label %exit
+
+exit:
+  %s1 = add i32 %phi1, %is1
+  %s2 = add i32 %is2, %phi4
+  %s3 = add i32 %is1, %phi5
+  %s4 = add i32 %phi2, %is2
+  %s5 = add i32 %is1, %is2
+  %s6 = add i32 %is2, %is1
+  ret void
+}
+
+; Mix of previous use cases that demonstrates %s3 can be incorrectly treated as
+; a recurrence of loop1 because of operands order if we pick recurrencies in an
+; incorrect order.
+
+define void @test_03(i32 %a, i32 %b, i32 %c, i32* %p) {
+
+; CHECK-LABEL: Classifying expressions for: @test_03
+; CHECK:       %v1 = load i32, i32* %p
+; CHECK-NEXT:  -->  %v1
+; CHECK:       %s1 = add i32 %phi1, %v1
+; CHECK-NEXT:  -->  {(%a + %v1),+,1}<%loop1>
+; CHECK:       %s2 = add i32 %s1, %b
+; CHECK-NEXT:  -->  {(%a + %b + %v1),+,1}<%loop1>
+; CHECK:       %s3 = add i32 %s2, %phi2
+; CHECK-NEXT:  -->  ({{{{}}((2 * %a) + %b),+,1}<%loop1>,+,2}<%loop2> + %v1)
+
+entry:
+  br label %loop1
+
+loop1:
+  %phi1 = phi i32 [ %a, %entry ], [ %phi1.inc, %loop1 ]
+  %phi1.inc = add i32 %phi1, 1
+  %cond1 = icmp ult i32 %phi1, %c
+  br i1 %cond1, label %loop1, label %loop2
+
+loop2:
+  %phi2 = phi i32 [ %a, %loop1 ], [ %phi2.inc, %loop2 ]
+  %phi2.inc = add i32 %phi2, 2
+  %v1 = load i32, i32* %p
+  %s1 = add i32 %phi1, %v1
+  %s2 = add i32 %s1, %b
+  %s3 = add i32 %s2, %phi2
+  %cond2 = icmp ult i32 %s3, %c
+  br i1 %cond2, label %loop2, label %exit
+
+exit:
+
+  ret void
+}
+
+; Another mix of previous use cases that demonstrates that incorrect picking of
+; a loop for a recurrence may cause a crash of SCEV analysis.
+define void @test_04() {
+
+; CHECK-LABEL: Classifying expressions for: @test_04
+; CHECK:       %tmp = phi i64 [ 2, %bb ], [ %tmp4, %bb3 ]
+; CHECK-NEXT:  -->  {2,+,1}<nuw><nsw><%loop1>
+; CHECK:       %tmp2 = trunc i64 %tmp to i32
+; CHECK-NEXT:  -->  {2,+,1}<%loop1>
+; CHECK:       %tmp4 = add nuw nsw i64 %tmp, 1
+; CHECK-NEXT:  -->  {3,+,1}<nuw><%loop1>
+; CHECK:       %tmp7 = phi i64 [ %tmp15, %loop2 ], [ 2, %loop1 ]
+; CHECK-NEXT:  -->  {2,+,1}<nuw><nsw><%loop2>
+; CHECK:       %tmp10 = sub i64 %tmp9, %tmp7
+; CHECK-NEXT:  -->  ((sext i8 %tmp8 to i64) + {-2,+,-1}<nw><%loop2>)
+; CHECK:       %tmp11 = add i64 %tmp10, undef
+; CHECK-NEXT:  -->  ((sext i8 %tmp8 to i64) + {(-2 + undef),+,-1}<nw><%loop2>)
+; CHECK:       %tmp13 = trunc i64 %tmp11 to i32
+; CHECK-NEXT:  -->  ((sext i8 %tmp8 to i32) + {(trunc i64 (-2 + undef) to i32),+,-1}<%loop2>)
+; CHECK:       %tmp14 = sub i32 %tmp13, %tmp2
+; CHECK-NEXT:  -->  ((sext i8 %tmp8 to i32) + {{{{}}(-2 + (trunc i64 (-2 + undef) to i32)),+,-1}<%loop1>,+,-1}<%loop2>)
+; CHECK:       %tmp15 = add nuw nsw i64 %tmp7, 1
+; CHECK-NEXT:  -->  {3,+,1}<nuw><nsw><%loop2>
+
+bb:
+  br label %loop1
+
+loop1:
+  %tmp = phi i64 [ 2, %bb ], [ %tmp4, %bb3 ]
+  %tmp2 = trunc i64 %tmp to i32
+  br i1 undef, label %loop2, label %bb3
+
+bb3:
+  %tmp4 = add nuw nsw i64 %tmp, 1
+  br label %loop1
+
+bb5:
+  ret void
+
+loop2:
+  %tmp7 = phi i64 [ %tmp15, %loop2 ], [ 2, %loop1 ]
+  %tmp8 = load i8, i8 addrspace(1)* undef, align 1
+  %tmp9 = sext i8 %tmp8 to i64
+  %tmp10 = sub i64 %tmp9, %tmp7
+  %tmp11 = add i64 %tmp10, undef
+  %tmp13 = trunc i64 %tmp11 to i32
+  %tmp14 = sub i32 %tmp13, %tmp2
+  %tmp15 = add nuw nsw i64 %tmp7, 1
+  %tmp16 = icmp slt i64 %tmp15, %tmp
+  br i1 %tmp16, label %loop2, label %bb5
+}
+
+@A = weak global [1000 x i32] zeroinitializer, align 32
+
+; Demonstrate a situation when we can add two recs with different degrees from
+; the same loop.
+define void @test_05(i32 %N) {
+
+; CHECK-LABEL: Classifying expressions for: @test_05
+; CHECK:       %SQ = mul i32 %i.0, %i.0
+; CHECK-NEXT:  -->  {4,+,5,+,2}<%bb3>
+; CHECK:       %tmp4 = mul i32 %i.0, 2
+; CHECK-NEXT:  -->  {4,+,2}<%bb3>
+; CHECK:       %tmp5 = sub i32 %SQ, %tmp4
+; CHECK-NEXT:  -->  {0,+,3,+,2}<%bb3>
+
+entry:
+        %"alloca point" = bitcast i32 0 to i32           ; <i32> [#uses=0]
+        br label %bb3
+
+bb:             ; preds = %bb3
+        %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i32 %i.0          ; <i32*> [#uses=1]
+        store i32 123, i32* %tmp
+        %tmp2 = add i32 %i.0, 1         ; <i32> [#uses=1]
+        br label %bb3
+
+bb3:            ; preds = %bb, %entry
+        %i.0 = phi i32 [ 2, %entry ], [ %tmp2, %bb ]            ; <i32> [#uses=3]
+        %SQ = mul i32 %i.0, %i.0
+        %tmp4 = mul i32 %i.0, 2
+        %tmp5 = sub i32 %SQ, %tmp4
+        %tmp3 = icmp sle i32 %tmp5, 9999          ; <i1> [#uses=1]
+        br i1 %tmp3, label %bb, label %bb5
+
+bb5:            ; preds = %bb3
+        br label %return
+
+return:         ; preds = %bb5
+        ret void
+}
+
+; Check that we can add Phis from different loops with different nesting, nested
+; loop comes first.
+define void @test_06() {
+
+; CHECK-LABEL: Classifying expressions for: @test_06
+; CHECK:       %s1 = add i32 %phi1, %phi2
+; CHECK-NEXT:  -->  {{{{}}30,+,1}<%loop1>,+,2}<%loop2>
+; CHECK:       %s2 = add i32 %phi2, %phi1
+; CHECK-NEXT:  -->  {{{{}}30,+,1}<%loop1>,+,2}<%loop2>
+; CHECK:       %s3 = add i32 %phi1, %phi3
+; CHECK-NEXT:  -->  {{{{}}40,+,1}<%loop1>,+,3}<%loop3>
+; CHECK:       %s4 = add i32 %phi3, %phi1
+; CHECK-NEXT:  -->  {{{{}}40,+,1}<%loop1>,+,3}<%loop3>
+; CHECK:       %s5 = add i32 %phi2, %phi3
+; CHECK-NEXT:  -->  {{{{}}50,+,2}<%loop2>,+,3}<%loop3>
+; CHECK:       %s6 = add i32 %phi3, %phi2
+; CHECK-NEXT:  -->  {{{{}}50,+,2}<%loop2>,+,3}<%loop3>
+
+entry:
+  br label %loop1
+
+loop1:
+  %phi1 = phi i32 [ 10, %entry ], [ %phi1.inc, %loop1.exit ]
+  br label %loop2
+
+loop2:
+  %phi2 = phi i32 [ 20, %loop1 ], [ %phi2.inc, %loop2 ]
+  %phi2.inc = add i32 %phi2, 2
+  %cond2 = icmp ult i32 %phi2.inc, 1000
+  br i1 %cond2, label %loop2, label %loop1.exit
+
+loop1.exit:
+  %phi1.inc = add i32 %phi1, 1
+  %cond1 = icmp ult i32 %phi1.inc, 1000
+  br i1 %cond1, label %loop1, label %loop3
+
+loop3:
+  %phi3 = phi i32 [ 30, %loop1.exit ], [ %phi3.inc, %loop3 ]
+  %phi3.inc = add i32 %phi3, 3
+  %cond3 = icmp ult i32 %phi3.inc, 1000
+  br i1 %cond3, label %loop3, label %exit
+
+exit:
+  %s1 = add i32 %phi1, %phi2
+  %s2 = add i32 %phi2, %phi1
+  %s3 = add i32 %phi1, %phi3
+  %s4 = add i32 %phi3, %phi1
+  %s5 = add i32 %phi2, %phi3
+  %s6 = add i32 %phi3, %phi2
+  ret void
+}
+
+; Check that we can add Phis from different loops with different nesting, nested
+; loop comes second.
+define void @test_07() {
+
+; CHECK-LABEL: Classifying expressions for: @test_07
+; CHECK:       %s1 = add i32 %phi1, %phi2
+; CHECK-NEXT:  -->  {{{{}}30,+,1}<%loop1>,+,2}<%loop2>
+; CHECK:       %s2 = add i32 %phi2, %phi1
+; CHECK-NEXT:  -->  {{{{}}30,+,1}<%loop1>,+,2}<%loop2>
+; CHECK:       %s3 = add i32 %phi1, %phi3
+; CHECK-NEXT:  -->  {{{{}}40,+,3}<%loop3>,+,1}<%loop1>
+; CHECK:       %s4 = add i32 %phi3, %phi1
+; CHECK-NEXT:  -->  {{{{}}40,+,3}<%loop3>,+,1}<%loop1>
+; CHECK:       %s5 = add i32 %phi2, %phi3
+; CHECK-NEXT:  -->  {{{{}}50,+,3}<%loop3>,+,2}<%loop2>
+; CHECK:       %s6 = add i32 %phi3, %phi2
+; CHECK-NEXT:  -->  {{{{}}50,+,3}<%loop3>,+,2}<%loop2>
+
+entry:
+  br label %loop3
+
+loop3:
+  %phi3 = phi i32 [ 30, %entry ], [ %phi3.inc, %loop3 ]
+  %phi3.inc = add i32 %phi3, 3
+  %cond3 = icmp ult i32 %phi3.inc, 1000
+  br i1 %cond3, label %loop3, label %loop1
+
+loop1:
+  %phi1 = phi i32 [ 10, %loop3 ], [ %phi1.inc, %loop1.exit ]
+  br label %loop2
+
+loop2:
+  %phi2 = phi i32 [ 20, %loop1 ], [ %phi2.inc, %loop2 ]
+  %phi2.inc = add i32 %phi2, 2
+  %cond2 = icmp ult i32 %phi2.inc, 1000
+  br i1 %cond2, label %loop2, label %loop1.exit
+
+loop1.exit:
+  %phi1.inc = add i32 %phi1, 1
+  %cond1 = icmp ult i32 %phi1.inc, 1000
+  br i1 %cond1, label %exit, label %loop1
+
+exit:
+  %s1 = add i32 %phi1, %phi2
+  %s2 = add i32 %phi2, %phi1
+  %s3 = add i32 %phi1, %phi3
+  %s4 = add i32 %phi3, %phi1
+  %s5 = add i32 %phi2, %phi3
+  %s6 = add i32 %phi3, %phi2
+  ret void
+}