Fix SCEV denormalization of expressions where the exit value from

one loop is involved in the increment of an addrec for another loop. This fixes rdar://8168938. llvm-svn: 108863
2024-12-24 20:44:51 +00:00 · 2010-07-20 17:06:20 +00:00 · 2010-07-20 17:06:20 +00:00 · 45ba7b5c5c
commit 45ba7b5c5c
parent e5f3371961
2 changed files with 154 additions and 32 deletions
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@ -45,7 +45,7 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
  // their uses occur in the predecessor block, not the block the PHI lives in)
  // should still use the post-inc value.  Check for this case now.
  PHINode *PN = dyn_cast<PHINode>(User);
-  if (!PN) return false;  // not a phi, not dominated by latch block.
+  if (!PN || !Operand) return false; // not a phi, not dominated by latch block.

  // Look at all of the uses of Operand by the PHI node.  If any use corresponds
  // to a block that is not dominated by the latch block, give up and use the
@ -84,6 +84,59 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
    return S;
  }

+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // An addrec. This is the interesting part.
+    SmallVector<const SCEV *, 8> Operands;
+    const Loop *L = AR->getLoop();
+    // The addrec conceptually uses its operands at loop entry.
+    Instruction *LUser = L->getHeader()->begin();
+    // Transform each operand.
+    for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+         I != E; ++I) {
+      const SCEV *O = *I;
+      const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT);
+      Operands.push_back(N);
+    }
+    const SCEV *Result = SE.getAddRecExpr(Operands, L);
+    switch (Kind) {
+    default: llvm_unreachable("Unexpected transform name!");
+    case NormalizeAutodetect:
+      if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
+        const SCEV *TransformedStep =
+          TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+                                 User, OperandValToReplace, Loops, SE, DT);
+        Result = SE.getMinusSCEV(Result, TransformedStep);
+        Loops.insert(L);
+      }
+#ifdef XDEBUG
+      assert(S == TransformForPostIncUse(Denormalize, Result,
+                                         User, OperandValToReplace,
+                                         Loops, SE, DT) &&
+             "SCEV normalization is not invertible!");
+#endif
+      break;
+    case Normalize:
+      if (Loops.count(L)) {
+        const SCEV *TransformedStep =
+          TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+                                 User, OperandValToReplace, Loops, SE, DT);
+        Result = SE.getMinusSCEV(Result, TransformedStep);
+      }
+#ifdef XDEBUG
+      assert(S == TransformForPostIncUse(Denormalize, Result,
+                                         User, OperandValToReplace,
+                                         Loops, SE, DT) &&
+             "SCEV normalization is not invertible!");
+#endif
+      break;
+    case Denormalize:
+      if (Loops.count(L))
+        Result = cast<SCEVAddRecExpr>(Result)->getPostIncExpr(SE);
+      break;
+    }
+    return Result;
+  }
+
  if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
    SmallVector<const SCEV *, 8> Operands;
    bool Changed = false;
@ -96,37 +149,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
      Changed |= N != O;
      Operands.push_back(N);
    }
-    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
-      // An addrec. This is the interesting part.
-      const Loop *L = AR->getLoop();
-      const SCEV *Result = SE.getAddRecExpr(Operands, L);
-      switch (Kind) {
-      default: llvm_unreachable("Unexpected transform name!");
-      case NormalizeAutodetect:
-        if (Instruction *OI = dyn_cast<Instruction>(OperandValToReplace))
-          if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) {
-            const SCEV *TransformedStep =
-              TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
-                                     User, OperandValToReplace, Loops, SE, DT);
-            Result = SE.getMinusSCEV(Result, TransformedStep);
-            Loops.insert(L);
-          }
-        break;
-      case Normalize:
-        if (Loops.count(L)) {
-          const SCEV *TransformedStep =
-            TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
-                                   User, OperandValToReplace, Loops, SE, DT);
-          Result = SE.getMinusSCEV(Result, TransformedStep);
-        }
-        break;
-      case Denormalize:
-        if (Loops.count(L))
-          Result = SE.getAddExpr(Result, AR->getStepRecurrence(SE));
-        break;
-      }
-      return Result;
-    }
+    // If any operand actually changed, return a transformed result.
    if (Changed)
      switch (S->getSCEVType()) {
      case scAddExpr: return SE.getAddExpr(Operands);
--- a/test/CodeGen/X86/lsr-normalization.ll
+++ b/test/CodeGen/X86/lsr-normalization.ll
@ -0,0 +1,99 @@
+; RUN: llc < %s -march=x86-64 | grep div | count 1
+; rdar://8168938
+
+; This testcase involves SCEV normalization with the exit value from
+; one loop involved with the increment value for an addrec on another
+; loop. The expression should be properly normalized and simplified,
+; and require only a single division.
+
+%0 = type { %0*, %0* }
+
+@0 = private constant [13 x i8] c"Result: %lu\0A\00" ; <[13 x i8]*> [#uses=1]
+@1 = internal constant [5 x i8] c"Huh?\00"        ; <[5 x i8]*> [#uses=1]
+
+define i32 @main(i32 %arg, i8** nocapture %arg1) nounwind {
+bb:
+  %tmp = alloca %0, align 8                       ; <%0*> [#uses=11]
+  %tmp2 = bitcast %0* %tmp to i8*                 ; <i8*> [#uses=1]
+  call void @llvm.memset.p0i8.i64(i8* %tmp2, i8 0, i64 16, i32 8, i1 false) nounwind
+  %tmp3 = getelementptr inbounds %0* %tmp, i64 0, i32 0 ; <%0**> [#uses=3]
+  store %0* %tmp, %0** %tmp3
+  %tmp4 = getelementptr inbounds %0* %tmp, i64 0, i32 1 ; <%0**> [#uses=1]
+  store %0* %tmp, %0** %tmp4
+  %tmp5 = call noalias i8* @_Znwm(i64 24) nounwind ; <i8*> [#uses=2]
+  %tmp6 = getelementptr inbounds i8* %tmp5, i64 16 ; <i8*> [#uses=2]
+  %tmp7 = icmp eq i8* %tmp6, null                 ; <i1> [#uses=1]
+  br i1 %tmp7, label %bb10, label %bb8
+
+bb8:                                              ; preds = %bb
+  %tmp9 = bitcast i8* %tmp6 to i32*               ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp9
+  br label %bb10
+
+bb10:                                             ; preds = %bb8, %bb
+  %tmp11 = bitcast i8* %tmp5 to %0*               ; <%0*> [#uses=1]
+  call void @_ZNSt15_List_node_base4hookEPS_(%0* %tmp11, %0* %tmp) nounwind
+  %tmp12 = load %0** %tmp3                        ; <%0*> [#uses=3]
+  %tmp13 = icmp eq %0* %tmp12, %tmp               ; <i1> [#uses=1]
+  br i1 %tmp13, label %bb14, label %bb16
+
+bb14:                                             ; preds = %bb10
+  %tmp15 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @1, i64 0, i64 0))
+  br label %bb35
+
+bb16:                                             ; preds = %bb16, %bb10
+  %tmp17 = phi i64 [ %tmp22, %bb16 ], [ 0, %bb10 ] ; <i64> [#uses=1]
+  %tmp18 = phi %0* [ %tmp20, %bb16 ], [ %tmp12, %bb10 ] ; <%0*> [#uses=1]
+  %tmp19 = getelementptr inbounds %0* %tmp18, i64 0, i32 0 ; <%0**> [#uses=1]
+  %tmp20 = load %0** %tmp19                       ; <%0*> [#uses=2]
+  %tmp21 = icmp eq %0* %tmp20, %tmp               ; <i1> [#uses=1]
+  %tmp22 = add i64 %tmp17, 1                      ; <i64> [#uses=2]
+  br i1 %tmp21, label %bb23, label %bb16
+
+bb23:                                             ; preds = %bb16
+  %tmp24 = udiv i64 100, %tmp22                   ; <i64> [#uses=1]
+  br label %bb25
+
+bb25:                                             ; preds = %bb25, %bb23
+  %tmp26 = phi i64 [ %tmp31, %bb25 ], [ 0, %bb23 ] ; <i64> [#uses=1]
+  %tmp27 = phi %0* [ %tmp29, %bb25 ], [ %tmp12, %bb23 ] ; <%0*> [#uses=1]
+  %tmp28 = getelementptr inbounds %0* %tmp27, i64 0, i32 0 ; <%0**> [#uses=1]
+  %tmp29 = load %0** %tmp28                       ; <%0*> [#uses=2]
+  %tmp30 = icmp eq %0* %tmp29, %tmp               ; <i1> [#uses=1]
+  %tmp31 = add i64 %tmp26, 1                      ; <i64> [#uses=2]
+  br i1 %tmp30, label %bb32, label %bb25
+
+bb32:                                             ; preds = %bb25
+  %tmp33 = mul i64 %tmp31, %tmp24                 ; <i64> [#uses=1]
+  %tmp34 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @0, i64 0, i64 0), i64 %tmp33) nounwind
+  br label %bb35
+
+bb35:                                             ; preds = %bb32, %bb14
+  %tmp36 = load %0** %tmp3                        ; <%0*> [#uses=2]
+  %tmp37 = icmp eq %0* %tmp36, %tmp               ; <i1> [#uses=1]
+  br i1 %tmp37, label %bb44, label %bb38
+
+bb38:                                             ; preds = %bb38, %bb35
+  %tmp39 = phi %0* [ %tmp41, %bb38 ], [ %tmp36, %bb35 ] ; <%0*> [#uses=2]
+  %tmp40 = getelementptr inbounds %0* %tmp39, i64 0, i32 0 ; <%0**> [#uses=1]
+  %tmp41 = load %0** %tmp40                       ; <%0*> [#uses=2]
+  %tmp42 = bitcast %0* %tmp39 to i8*              ; <i8*> [#uses=1]
+  call void @_ZdlPv(i8* %tmp42) nounwind
+  %tmp43 = icmp eq %0* %tmp41, %tmp               ; <i1> [#uses=1]
+  br i1 %tmp43, label %bb44, label %bb38
+
+bb44:                                             ; preds = %bb38, %bb35
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @_ZNSt15_List_node_base4hookEPS_(%0*, %0*)
+
+declare noalias i8* @_Znwm(i64)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind