Fix a bug w/inbounds invalidation in LFTR

This contains fixes for two cases where we might invalidate inbounds and leave it stale in the IR (a miscompile). Case 1 is when switching to an IV with no dynamically live uses, and case 2 is when doing pre-to-post conversion on the same pointer type IV. The basic scheme used is to prove that using the given IV (pre or post increment forms) would have to already trigger UB on the path to the test we're modifying. As such, our potential UB triggering use does not change the semantics of the original program. As was pointed out in the review thread by Nikita, this is defending against a separate issue from the hasConcreteDef case. This is about poison, that's about undef. Unfortunately, the two are different, see Nikita's comment for a fuller explanation, he explains it well. (Note: I'm going to address Nikita's last style comment in a separate commit just to minimize chance of subtle bugs being introduced due to typos.) Differential Revision: https://reviews.llvm.org/D62939 llvm-svn: 363289
2024-12-26 17:57:07 +00:00 · 2019-06-13 18:23:13 +00:00 · 2019-06-13 18:23:13 +00:00 · eb88badff9
commit eb88badff9
parent 34ec6929a1
5 changed files with 96 additions and 22 deletions
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@ -31,6 +31,7 @@
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@ -42,6 +43,7 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@ -2077,6 +2079,48 @@ static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
  return Phi != getLoopPhiForCounter(IncV, L);
 }

+/// Return true if undefined behavior would provable be executed on the path to
+/// OnPathTo if Root produced a posion result.  Note that this doesn't say
+/// anything about whether OnPathTo is actually executed or whether Root is
+/// actually poison.  This can be used to assess whether a new use of Root can
+/// be added at a location which is control equivalent with OnPathTo (such as
+/// immediately before it) without introducing UB which didn't previously
+/// exist.  Note that a false result conveys no information.  
+static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
+                                          Instruction *OnPathTo, 
+                                          DominatorTree *DT) {
+  // Basic approach is to assume Root is poison, propagate poison forward
+  // through all users we can easily track, and then check whether any of those
+  // users are provable UB and must execute before out exiting block might
+  // exit.
+
+  // The set of all recursive users we've visited (which are assumed to all be
+  // poison because of said visit)
+  SmallSet<const Value *, 16> KnownPoison;
+  SmallVector<const Instruction*, 16> Worklist;
+  Worklist.push_back(Root);
+  while (!Worklist.empty()) {
+    const Instruction *I = Worklist.pop_back_val();
+
+    // If we know this must trigger UB on a path leading our target.
+    if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
+      return true;
+    
+    // If we can't analyze propagation through this instruction, just skip it
+    // and transitive users.  Safe as false is a conservative result.
+    if (!propagatesFullPoison(I) && I != Root)
+      continue;
+
+    if (KnownPoison.insert(I).second)
+      for (const User *User : I->users())
+        Worklist.push_back(cast<Instruction>(User));
+  }
+
+  // Might be non-UB, or might have a path we couldn't prove must execute on
+  // way to exiting bb. 
+  return false;
+}
+
 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
 /// down to checking that all operands are constant and listing instructions
 /// that may hide undef.
@ -2165,7 +2209,8 @@ static bool isLoopCounter(PHINode* Phi, Loop *L,
 /// valid count without scaling the address stride, so it remains a pointer
 /// expression as far as SCEV is concerned.
 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
-                                const SCEV *BECount, ScalarEvolution *SE) {
+                                const SCEV *BECount,
+                                ScalarEvolution *SE, DominatorTree *DT) {
  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());

  Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
@ -2210,6 +2255,18 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
            continue;
    }

+    // Avoid introducing undefined behavior due to poison which didn't exist in
+    // the original program.  (Annoyingly, the rules for poison and undef
+    // propagation are distinct, so this does NOT cover the undef case above.)
+    // We have to ensure that we don't introduce UB by introducing a use on an
+    // iteration where said IV produces poison.  Our strategy here differs for
+    // pointers and integer IVs.  For integers, we strip and reinfer as needed,
+    // see code in linearFunctionTestReplace.  For pointers, we restrict
+    // transforms as there is no good way to reinfer inbounds once lost.
+    if (!Phi->getType()->isIntegerTy() &&
+        !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
+      continue;
+    
    const SCEV *Init = AR->getStart();

    if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
@ -2338,15 +2395,30 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
  // compare against the post-incremented value, otherwise we must compare
  // against the preincremented value.
  if (ExitingBB == L->getLoopLatch()) {
-    // Add one to the "backedge-taken" count to get the trip count.
-    // This addition may overflow, which is valid as long as the comparison is
-    // truncated to BackedgeTakenCount->getType().
-    IVCount = SE->getAddExpr(BackedgeTakenCount,
-                             SE->getOne(BackedgeTakenCount->getType()));
-    // The BackedgeTaken expression contains the number of times that the
-    // backedge branches to the loop header.  This is one less than the
-    // number of times the loop executes, so use the incremented indvar.
-    CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBB);
+    bool SafeToPostInc = IndVar->getType()->isIntegerTy();
+    if (!SafeToPostInc) {
+      // For pointer IVs, we chose to not strip inbounds which requires us not
+      // to add a potentially UB introducing use.  We need to either a) show
+      // the loop test we're modifying is already in post-inc form, or b) show
+      // that adding a use must not introduce UB.
+      Instruction *Inc =
+        cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
+      ICmpInst *LoopTest = getLoopTest(L, ExitingBB);
+      SafeToPostInc = LoopTest->getOperand(0) == Inc ||
+        LoopTest->getOperand(1) == Inc ||
+        mustExecuteUBIfPoisonOnPathTo(Inc, ExitingBB->getTerminator(), DT);
+    }
+    if (SafeToPostInc) {
+      // Add one to the "backedge-taken" count to get the trip count.
+      // This addition may overflow, which is valid as long as the comparison
+      // is truncated to BackedgeTakenCount->getType().
+      IVCount = SE->getAddExpr(BackedgeTakenCount,
+                               SE->getOne(BackedgeTakenCount->getType()));
+      // The BackedgeTaken expression contains the number of times that the
+      // backedge branches to the loop header.  This is one less than the
+      // number of times the loop executes, so use the incremented indvar.
+      CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBB);
+    }
  }

  // It may be necessary to drop nowrap flags on the incrementing instruction
@ -2646,7 +2718,7 @@ bool IndVarSimplify::run(Loop *L) {
      if (BETakenCount->isZero())
        continue;
      
-      PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE);
+      PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE, DT);
      if (!IndVar)
        continue;
      
--- a/llvm/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
@ -28,13 +28,15 @@ define void @test() nounwind {
 ; CHECK-NEXT:    br label [[FOR_BODY21_I:%.*]]
 ; CHECK:       for.body21.i:
 ; CHECK-NEXT:    [[DESTYPIXELPTR_010_I:%.*]] = phi i8* [ null, [[FOR_BODY21_LR_PH_I]] ], [ [[INCDEC_PTR_I:%.*]], [[IF_END_I126:%.*]] ]
+; CHECK-NEXT:    [[X_09_I:%.*]] = phi i32 [ 0, [[FOR_BODY21_LR_PH_I]] ], [ [[INC_I125:%.*]], [[IF_END_I126]] ]
 ; CHECK-NEXT:    br i1 undef, label [[IF_END_I126]], label [[IF_ELSE_I124:%.*]]
 ; CHECK:       if.else.i124:
 ; CHECK-NEXT:    store i8 undef, i8* [[DESTYPIXELPTR_010_I]], align 1
 ; CHECK-NEXT:    br label [[IF_END_I126]]
 ; CHECK:       if.end.i126:
 ; CHECK-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds i8, i8* [[DESTYPIXELPTR_010_I]], i32 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR_I]], null
+; CHECK-NEXT:    [[INC_I125]] = add nuw i32 [[X_09_I]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC_I125]], undef
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY21_I]], label [[FOR_END_I129_LOOPEXIT:%.*]]
 ; CHECK:       for.end.i129.loopexit:
 ; CHECK-NEXT:    br label [[FOR_END_I129]]
--- a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
@ -301,13 +301,12 @@ define void @testnullptr([512 x i8]* %base) nounwind {
 ; PTR64-NEXT:    [[CMP1604192:%.*]] = icmp ult i8* undef, [[ADD_PTR1603]]
 ; PTR64-NEXT:    br i1 [[CMP1604192]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END1609:%.*]]
 ; PTR64:       for.body.preheader:
-; PTR64-NEXT:    [[SCEVGEP:%.*]] = getelementptr [512 x i8], [512 x i8]* [[BASE]], i64 1, i64 0
 ; PTR64-NEXT:    br label [[FOR_BODY:%.*]]
 ; PTR64:       for.body:
 ; PTR64-NEXT:    [[R_17193:%.*]] = phi i8* [ [[INCDEC_PTR1608:%.*]], [[FOR_BODY]] ], [ null, [[FOR_BODY_PREHEADER]] ]
 ; PTR64-NEXT:    [[INCDEC_PTR1608]] = getelementptr i8, i8* [[R_17193]], i64 1
-; PTR64-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR1608]], [[SCEVGEP]]
-; PTR64-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]]
+; PTR64-NEXT:    [[CMP1604:%.*]] = icmp ult i8* [[INCDEC_PTR1608]], [[ADD_PTR1603]]
+; PTR64-NEXT:    br i1 [[CMP1604]], label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]]
 ; PTR64:       for.end1609.loopexit:
 ; PTR64-NEXT:    br label [[FOR_END1609]]
 ; PTR64:       for.end1609:
@ -321,13 +320,12 @@ define void @testnullptr([512 x i8]* %base) nounwind {
 ; PTR32-NEXT:    [[CMP1604192:%.*]] = icmp ult i8* undef, [[ADD_PTR1603]]
 ; PTR32-NEXT:    br i1 [[CMP1604192]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END1609:%.*]]
 ; PTR32:       for.body.preheader:
-; PTR32-NEXT:    [[SCEVGEP:%.*]] = getelementptr [512 x i8], [512 x i8]* [[BASE]], i32 1, i32 0
 ; PTR32-NEXT:    br label [[FOR_BODY:%.*]]
 ; PTR32:       for.body:
 ; PTR32-NEXT:    [[R_17193:%.*]] = phi i8* [ [[INCDEC_PTR1608:%.*]], [[FOR_BODY]] ], [ null, [[FOR_BODY_PREHEADER]] ]
 ; PTR32-NEXT:    [[INCDEC_PTR1608]] = getelementptr i8, i8* [[R_17193]], i64 1
-; PTR32-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR1608]], [[SCEVGEP]]
-; PTR32-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]]
+; PTR32-NEXT:    [[CMP1604:%.*]] = icmp ult i8* [[INCDEC_PTR1608]], [[ADD_PTR1603]]
+; PTR32-NEXT:    br i1 [[CMP1604]], label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]]
 ; PTR32:       for.end1609.loopexit:
 ; PTR32-NEXT:    br label [[FOR_END1609]]
 ; PTR32:       for.end1609:
--- a/llvm/test/Transforms/IndVarSimplify/lftr-dead-ivs.ll
+++ b/llvm/test/Transforms/IndVarSimplify/lftr-dead-ivs.ll
@ -25,14 +25,16 @@ define void @neg_dynamically_dead_inbounds(i1 %always_false) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[P_0:%.*]] = phi i8* [ getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[CONT:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[TMP4:%.*]], [[CONT:%.*]] ]
+; CHECK-NEXT:    [[P_0:%.*]] = phi i8* [ getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), [[ENTRY]] ], [ [[TMP3:%.*]], [[CONT]] ]
 ; CHECK-NEXT:    [[TMP3]] = getelementptr inbounds i8, i8* [[P_0]], i64 1
 ; CHECK-NEXT:    br i1 [[ALWAYS_FALSE:%.*]], label [[NEVER_EXECUTED:%.*]], label [[CONT]]
 ; CHECK:       never_executed:
 ; CHECK-NEXT:    store volatile i8 0, i8* [[TMP3]]
 ; CHECK-NEXT:    br label [[CONT]]
 ; CHECK:       cont:
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[TMP3]], getelementptr (i8, i8* getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), i64 246)
+; CHECK-NEXT:    [[TMP4]] = add nuw i8 [[I_0]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i8 [[TMP4]], -10
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
@ -110,7 +112,7 @@ define void @dom_store_preinc() #0 {
 ; CHECK-NEXT:    [[P_0:%.*]] = phi i8* [ getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    store volatile i8 0, i8* [[P_0]]
 ; CHECK-NEXT:    [[TMP3]] = getelementptr inbounds i8, i8* [[P_0]], i64 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[TMP3]], getelementptr (i8, i8* getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), i64 246)
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[P_0]], getelementptr (i8, i8* getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), i64 245)
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
--- a/llvm/test/Transforms/IndVarSimplify/lftr.ll
+++ b/llvm/test/Transforms/IndVarSimplify/lftr.ll
@ -167,7 +167,7 @@ define void @test_zext(i8* %a) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i8, i8* [[DOT0]], align 1
 ; CHECK-NEXT:    [[TMP3]] = getelementptr inbounds i8, i8* [[P_0]], i64 1
 ; CHECK-NEXT:    store i8 [[TMP2]], i8* [[P_0]], align 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[TMP3]], getelementptr (i8, i8* getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), i64 240)
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[P_0]], getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 239)
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void