ScalarEvolution: Compute exit counts for loops with a power-of-2 step.

If we have a loop of the form for (unsigned n = 0; n != (k & -32); n += 32) {} then we know that n is always divisible by 32 and the loop must terminate. Even if we have a condition where the loop counter will overflow it'll always hold this invariant. PR19183. Our loop vectorizer creates this pattern and it's also occasionally formed by loop counters derived from pointers. llvm-svn: 204728
2025-02-03 10:54:42 +00:00 · 2014-03-25 16:25:12 +00:00 · 2014-03-25 16:25:12 +00:00 · 79c500fdd4
commit 79c500fdd4
parent 1772c4217d
2 changed files with 63 additions and 0 deletions
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@ -5744,6 +5744,16 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
      getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
    return ExitLimit(Exact, Exact, /*MustExit=*/false);
  }
+
+  // If Step is a power of two that evenly divides Start we know that the loop
+  // will always terminate.  Start may not be a constant so we just have the
+  // number of trailing zeros available.  This is safe even in presence of
+  // overflow as the recurrence will overflow to exactly 0.
+  const APInt &StepV = StepC->getValue()->getValue();
+  if (StepV.isPowerOf2() &&
+      GetMinTrailingZeros(getNegativeSCEV(Start)) >= StepV.countTrailingZeros())
+    return getUDivExactExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+
  // Then, try to solve the above equation provided that Start is constant.
  if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
    return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
--- a/test/Analysis/ScalarEvolution/trip-count-pow2.ll
+++ b/test/Analysis/ScalarEvolution/trip-count-pow2.ll
@ -0,0 +1,53 @@
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+
+define void @test1(i32 %n) {
+entry:
+  %s = mul i32 %n, 96
+  br label %loop
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add i32 %i, 32
+  %t = icmp ne i32 %i.next, %s
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+
+; CHECK-LABEL: @test1
+; CHECK: Loop %loop: backedge-taken count is ((-32 + (96 * %n)) /u 32)
+; CHECK: Loop %loop: max backedge-taken count is ((-32 + (96 * %n)) /u 32)
+}
+
+; PR19183
+define i32 @test2(i32 %n) {
+entry:
+  %s = and i32 %n, -32
+  br label %loop
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add i32 %i, 32
+  %t = icmp ne i32 %i.next, %s
+  br i1 %t, label %loop, label %exit
+exit:
+  ret i32 %i
+
+; CHECK-LABEL: @test2
+; CHECK: Loop %loop: backedge-taken count is ((-32 + (32 * (%n /u 32))) /u 32)
+; CHECK: Loop %loop: max backedge-taken count is ((-32 + (32 * (%n /u 32))) /u 32)
+}
+
+define void @test3(i32 %n) {
+entry:
+  %s = mul i32 %n, 96
+  br label %loop
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add i32 %i, 96
+  %t = icmp ne i32 %i.next, %s
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+
+; CHECK-LABEL: @test3
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
+}