Add support for pointer induction variables even when there is no integer induction variable.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168558 91177308-0d34-0410-b5e6-96231b3b80d8
2024-11-28 06:00:30 +00:00 · 2012-11-25 08:41:35 +00:00 · 2012-11-25 08:41:35 +00:00 · 0af63ac245
commit 0af63ac245
parent 327e4cba09
2 changed files with 213 additions and 103 deletions
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -106,9 +106,10 @@ class SingleBlockLoopVectorizer {
 public:
  /// Ctor.
  SingleBlockLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li,
-                            DominatorTree *dt, LPPassManager *Lpm,
+                            DominatorTree *dt, DataLayout *dl,
+                            LPPassManager *Lpm,
                            unsigned VecWidth):
-  OrigLoop(Orig), SE(Se), LI(Li), DT(dt), LPM(Lpm), VF(VecWidth),
+  OrigLoop(Orig), SE(Se), LI(Li), DT(dt), DL(dl), LPM(Lpm), VF(VecWidth),
  Builder(Se->getContext()), Induction(0), OldInduction(0) { }

  // Perform the actual loop widening (vectorization).
@ -167,6 +168,8 @@ private:
  LoopInfo *LI;
  // Dominator Tree.
  DominatorTree *DT;
+  // Data Layout;
+  DataLayout *DL;
  // Loop Pass Manager;
  LPPassManager *LPM;
  // The vectorization factor to use.
@ -250,10 +253,36 @@ public:
  // This POD struct holds information about the memory runtime legality
  // check that a group of pointers do not overlap.
  struct RuntimePointerCheck {
+    RuntimePointerCheck(): Need(false) {}
+
+    /// Reset the state of the pointer runtime information.
+    void reset() {
+      Need = false;
+      Pointers.clear();
+      Starts.clear();
+      Ends.clear();
+    }
+
+    /// Insert a pointer and calculate the start and end SCEVs.
+    void insert_pointer(ScalarEvolution *SE, Loop *Lp, Value *Ptr) {
+      const SCEV *Sc = SE->getSCEV(Ptr);
+      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
+      assert(AR && "Invalid addrec expression");
+      const SCEV *Ex = SE->getExitCount(Lp, Lp->getHeader());
+      const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
+      Pointers.push_back(Ptr);
+      Starts.push_back(AR->getStart());
+      Ends.push_back(ScEnd);
+    }
+
    /// This flag indicates if we need to add the runtime check.
    bool Need;
    /// Holds the pointers that we need to check.
    SmallVector<Value*, 2> Pointers;
+    /// Holds the pointer value at the beginning of the loop.
+    SmallVector<const SCEV*, 2> Starts;
+    /// Holds the pointer value at the end of the loop.
+    SmallVector<const SCEV*, 2> Ends;
  };

  /// ReductionList contains the reduction descriptors for all
@ -278,11 +307,11 @@ public:
  /// Returns the induction variables found in the loop.
  InductionList *getInductionVars() { return &Inductions; }

-  /// Check if the pointer returned by this GEP is consecutive
-  /// when the index is vectorized. This happens when the last
-  /// index of the GEP is consecutive, like the induction variable.
+  /// Check if this  pointer is consecutive when vectorizing. This happens
+  /// when the last index of the GEP is the induction variable, or that the
+  /// pointer itself is an induction variable.
  /// This check allows us to vectorize A[idx] into a wide load/store.
-  bool isConsecutiveGep(Value *Ptr);
+  bool isConsecutivePtr(Value *Ptr);

  /// Returns true if the value V is uniform within the loop.
  bool isUniform(Value *V);
@ -451,7 +480,7 @@ struct LoopVectorize : public LoopPass {
          "\n");

    // If we decided that it is *legal* to vectorizer the loop then do it.
-    SingleBlockLoopVectorizer LB(L, SE, LI, DT, &LPM, VF);
+    SingleBlockLoopVectorizer LB(L, SE, LI, DT, DL, &LPM, VF);
    LB.vectorize(&LVL);

    DEBUG(verifyFunction(*L->getHeader()->getParent()));
@ -472,10 +501,6 @@ struct LoopVectorize : public LoopPass {
 };

 Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) {
-  // Instructions that access the old induction variable
-  // actually want to get the new one.
-  if (V == OldInduction)
-    V = Induction;
  // Create the types.
  LLVMContext &C = V->getContext();
  Type *VTy = VectorType::get(V->getType(), VF);
@ -515,7 +540,14 @@ Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) {
  return Builder.CreateAdd(Val, Cv, "induction");
 }

-bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) {
+bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
+  assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
+
+  // If this pointer is an induction variable, return it.
+  PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
+  if (Phi && getInductionVars()->count(Phi))
+    return true;
+
  GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
  if (!Gep)
    return false;
@ -576,7 +608,7 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {

    // If we are accessing the old induction variable, use the new one.
    if (SrcOp == OldInduction) {
-      Params.push_back(getBroadcastInstrs(Induction));
+      Params.push_back(getVectorValue(Induction));
      continue;
    }

@ -666,9 +698,13 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
   ...
   */

+  // Some loops have a single integer induction variable, while other loops
+  // don't. One example is c++ iterators that often have multiple pointer
+  // induction variables. In the code below we also support a case where we
+  // don't have a single induction variable.
  OldInduction = Legal->getInduction();
-  assert(OldInduction && "We must have a single phi node.");
-  Type *IdxTy = OldInduction->getType();
+  Type *IdxTy = OldInduction ? OldInduction->getType() :
+    DL->getIntPtrType(SE->getContext());

  // Find the loop boundaries.
  const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
@ -677,19 +713,18 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
  // Get the total trip count from the count by adding 1.
  ExitCount = SE->getAddExpr(ExitCount,
                             SE->getConstant(ExitCount->getType(), 1));
-  // We may need to extend the index in case there is a type mismatch.
-  // We know that the count starts at zero and does not overflow.
-  // We are using Zext because it should be less expensive.
-  if (ExitCount->getType() != IdxTy)
-    ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy);

  // This is the original scalar-loop preheader.
  BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
  BasicBlock *ExitBlock = OrigLoop->getExitBlock();
  assert(ExitBlock && "Must have an exit block");

-  // The loop index does not have to start at Zero. It starts with this value.
-  Value *StartIdx = OldInduction->getIncomingValueForBlock(BypassBlock);
+  // The loop index does not have to start at Zero. Find the original start
+  // value from the induction PHI node. If we don't have an induction variable
+  // then we know that it starts at zero.
+  Value *StartIdx = OldInduction ?
+    OldInduction->getIncomingValueForBlock(BypassBlock):
+    ConstantInt::get(IdxTy, 0);

  assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop");
  assert(BypassBlock && "Invalid loop structure");
@ -721,7 +756,18 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
  Instruction *Loc = BypassBlock->getTerminator();

  // Count holds the overall loop count (N).
-  Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc);
+  Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(), Loc);
+
+  // We may need to extend the index in case there is a type mismatch.
+  // We know that the count starts at zero and does not overflow.
+  if (Count->getType() != IdxTy) {
+    // The exit count can be of pointer type. Convert it to the correct
+    // integer type.
+    if (ExitCount->getType()->isPointerTy())
+      Count = CastInst::CreatePointerCast(Count, IdxTy, "ptrcnt.to.int", Loc);
+    else
+      Count = CastInst::CreateZExtOrBitCast(Count, IdxTy, "zext.cnt", Loc);
+  }

  // Add the start index to the loop count to get the new end index.
  Value *IdxEnd = BinaryOperator::CreateAdd(Count, StartIdx, "end.idx", Loc);
@ -734,7 +780,8 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
  Value *IdxEndRoundDown = BinaryOperator::CreateAdd(CountRoundDown, StartIdx,
                                                     "end.idx.rnd.down", Loc);

-  // Now, compare the new count to zero. If it is zero, jump to the scalar part.
+  // Now, compare the new count to zero. If it is zero skip the vector loop and
+  // jump to the scalar loop.
  Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
                               IdxEndRoundDown,
                               StartIdx,
@ -762,23 +809,21 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
        Ends.push_back(Ptr);
      } else {
        DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
-        const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
-        Value *Start = Exp.expandCodeFor(AR->getStart(), PtrArithTy, Loc);
-        const SCEV *Ex = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
-        const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
-        assert(!isa<SCEVCouldNotCompute>(ScEnd) && "Invalid scev range.");
-        Value *End = Exp.expandCodeFor(ScEnd, PtrArithTy, Loc);
+
+        Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i],
+                                         PtrArithTy, Loc);
+        Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
        Starts.push_back(Start);
        Ends.push_back(End);
      }
    }

-    for (unsigned i=0; i < NumPointers; ++i) {
-      for (unsigned j=i+1; j < NumPointers; ++j) {
+    for (unsigned i = 0; i < NumPointers; ++i) {
+      for (unsigned j = i+1; j < NumPointers; ++j) {
        Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
-                                      Starts[0], Ends[1], "bound0", Loc);
+                                      Starts[i], Ends[j], "bound0", Loc);
        Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
-                                      Starts[1], Ends[0], "bound1", Loc);
+                                      Starts[j], Ends[i], "bound1", Loc);
        Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1,
                                                    "found.conflict", Loc);
        if (MemoryRuntimeCheck) {
@ -812,7 +857,7 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
  // value.

  // This variable saves the new starting index for the scalar loop.
-  Value *ResumeIndex = 0;
+  PHINode *ResumeIndex = 0;
  LoopVectorizationLegality::InductionList::iterator I, E;
  LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
  for (I = List->begin(), E = List->end(); I != E; ++I) {
@ -830,7 +875,7 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
    } else {
      // For pointer induction variables, calculate the offset using
      // the end index.
-      EndValue = GetElementPtrInst::Create(I->second, IdxEndRoundDown,
+      EndValue = GetElementPtrInst::Create(I->second, CountRoundDown,
                                           "ptr.ind.end",
                                           BypassBlock->getTerminator());
    }
@ -841,10 +886,22 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
    ResumeVal->addIncoming(EndValue, VecBody);

    // Fix the scalar body counter (PHI node).
-    unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH);
+    unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
    OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
  }

+  // If we are generating a new induction variable then we also need to
+  // generate the code that calculates the exit value. This value is not
+  // simply the end of the counter because we may skip the vectorized body
+  // in case of a runtime check.
+  if (!OldInduction){
+    assert(!ResumeIndex && "Unexpected resume value found");
+    ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val",
+                                  MiddleBlock->getTerminator());
+    ResumeIndex->addIncoming(StartIdx, BypassBlock);
+    ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
+  }
+
  // Make sure that we found the index where scalar loop needs to continue.
  assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() &&
         "Invalid resume Index");
@ -953,43 +1010,54 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
        continue;
      case Instruction::PHI:{
        PHINode* P = cast<PHINode>(Inst);
-        // Special handling for the induction var.
-        if (OldInduction == Inst)
-          continue;
-
        // Handle reduction variables:
        if (Legal->getReductionVars()->count(P)) {
          // This is phase one of vectorizing PHIs.
          Type *VecTy = VectorType::get(Inst->getType(), VF);
-          WidenMap[Inst] = Builder.CreatePHI(VecTy, 2, "vec.phi");
+          WidenMap[Inst] = PHINode::Create(VecTy, 2, "vec.phi",
+                                  LoopVectorBody->getFirstInsertionPt());
          RdxPHIsToFix.push_back(P);
          continue;
        }

-        // Handle pointer inductions:
-        if (Legal->getInductionVars()->count(P)) {
-          Value *StartIdx = Legal->getInductionVars()->lookup(OldInduction);
-          Value *StartPtr = Legal->getInductionVars()->lookup(P);
-          // This is the normalized GEP that starts counting at zero.
-          Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
-                                                   "normalized.idx");
-          // This is the first GEP in the sequence.
-          Value *FirstGep = Builder.CreateGEP(StartPtr, NormalizedIdx,
-                                              "induc.ptr");
-          // This is the vector of results. Notice that we don't generate vector
-          // geps because scalar geps result in better code.
-          Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
-          for (unsigned int i = 0; i < VF; ++i) {
-            Value *SclrGep = Builder.CreateGEP(FirstGep, Builder.getInt32(i),
-                                               "next.gep");
-            VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
-                                                 Builder.getInt32(i),
-                                                 "insert.gep");
-          }
+        // This PHINode must be an induction variable.
+        // Make sure that we know about it.
+        assert(Legal->getInductionVars()->count(P) &&
+               "Not an induction variable");

-          WidenMap[Inst] = VecVal;
+        if (P->getType()->isIntegerTy()) {
+          assert(P == OldInduction && "Unexpected PHI");
+          WidenMap[Inst] = getBroadcastInstrs(Induction);
          continue;
        }
+
+        // Handle pointer inductions:
+        assert(P->getType()->isPointerTy() && "Unexpected type.");
+        Value *StartIdx = OldInduction ?
+          Legal->getInductionVars()->lookup(OldInduction) :
+          ConstantInt::get(Induction->getType(), 0);
+
+        // This is the pointer value coming into the loop.
+        Value *StartPtr = Legal->getInductionVars()->lookup(P);
+
+        // This is the normalized GEP that starts counting at zero.
+        Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
+                                                 "normalized.idx");
+
+        // This is the vector of results. Notice that we don't generate vector
+        // geps because scalar geps result in better code.
+        Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
+        for (unsigned int i = 0; i < VF; ++i) {
+          Constant *Idx = ConstantInt::get(Induction->getType(), i);
+          Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+          Value *SclrGep = Builder.CreateGEP(StartPtr, GlobalIdx, "next.gep");
+          VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
+                                               Builder.getInt32(i),
+                                               "insert.gep");
+        }
+
+        WidenMap[Inst] = VecVal;
+        continue;
      }
      case Instruction::Add:
      case Instruction::FAdd:
@ -1076,21 +1144,27 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
        GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);

        // This store does not use GEPs.
-        if (!Legal->isConsecutiveGep(Gep)) {
+        if (!Legal->isConsecutivePtr(Ptr)) {
          scalarizeInstruction(Inst);
          break;
        }

-        // The last index does not have to be the induction. It can be
-        // consecutive and be a function of the index. For example A[I+1];
-        unsigned NumOperands = Gep->getNumOperands();
-        Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1));
-        LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+        if (Gep) {
+          // The last index does not have to be the induction. It can be
+          // consecutive and be a function of the index. For example A[I+1];
+          unsigned NumOperands = Gep->getNumOperands();
+          Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1));
+          LastIndex = Builder.CreateExtractElement(LastIndex, Zero);

-        // Create the new GEP with the new induction variable.
-        GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
-        Gep2->setOperand(NumOperands - 1, LastIndex);
-        Ptr = Builder.Insert(Gep2);
+          // Create the new GEP with the new induction variable.
+          GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+          Gep2->setOperand(NumOperands - 1, LastIndex);
+          Ptr = Builder.Insert(Gep2);
+        } else {
+          // Use the induction element ptr.
+          assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+          Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero);
+        }
        Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo());
        Value *Val = getVectorValue(SI->getValueOperand());
        Builder.CreateStore(Val, Ptr)->setAlignment(Alignment);
@ -1104,23 +1178,31 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
        unsigned Alignment = LI->getAlignment();
        GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);

-        // If we don't have a gep, or that the pointer is loop invariant,
+        // If the pointer is loop invariant or if it is non consecutive,
        // scalarize the load.
-        if (!Gep || Legal->isUniform(Gep) || !Legal->isConsecutiveGep(Gep)) {
+        bool Con = Legal->isConsecutivePtr(Ptr);
+        if (Legal->isUniform(Ptr) || !Con) {
          scalarizeInstruction(Inst);
          break;
        }

-        // The last index does not have to be the induction. It can be
-        // consecutive and be a function of the index. For example A[I+1];
-        unsigned NumOperands = Gep->getNumOperands();
-        Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
-        LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+        if (Gep) {
+          // The last index does not have to be the induction. It can be
+          // consecutive and be a function of the index. For example A[I+1];
+          unsigned NumOperands = Gep->getNumOperands();
+          Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
+          LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+
+          // Create the new GEP with the new induction variable.
+          GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+          Gep2->setOperand(NumOperands - 1, LastIndex);
+          Ptr = Builder.Insert(Gep2);
+        } else {
+          // Use the induction element ptr.
+          assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+          Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero);
+        }

-        // Create the new GEP with the new induction variable.
-        GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
-        Gep2->setOperand(NumOperands - 1, LastIndex);
-        Ptr = Builder.Insert(Gep2);
        Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo());
        LI = Builder.CreateLoad(Ptr);
        LI->setAlignment(Alignment);
@ -1301,7 +1383,7 @@ bool LoopVectorizationLegality::canVectorize() {
  if (!TheLoop->getLoopPreheader()) {
    assert(false && "No preheader!!");
    DEBUG(dbgs() << "LV: Loop not normalized." << "\n");
-    return  false;
+    return false;
  }

  // We can only vectorize single basic block loops.
@ -1347,6 +1429,7 @@ bool LoopVectorizationLegality::canVectorize() {
 }

 bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
+
  BasicBlock *PreHeader = TheLoop->getLoopPreheader();

  // Scan the instructions in the block and look for hazards.
@ -1440,8 +1523,8 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
  } // next instr.

  if (!Induction) {
-      DEBUG(dbgs() << "LV: Did not find an induction var.\n");
-      return false;
+    DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
+    assert(getInductionVars()->size() && "No induction variables");
  }

  // Don't vectorize if the memory dependencies do not allow vectorization.
@ -1458,15 +1541,10 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
  while (Worklist.size()) {
    Instruction *I = dyn_cast<Instruction>(Worklist.back());
    Worklist.pop_back();
-    // Look at instructions inside this block.
-    if (!I) continue;
-    if (I->getParent() != &BB) continue;

-    // Stop when reaching PHI nodes.
-    if (isa<PHINode>(I)) {
-      assert(I == Induction && "Found a uniform PHI that is not the induction");
-      break;
-    }
+    // Look at instructions inside this block. Stop when reaching PHI nodes.
+    if (!I || I->getParent() != &BB || isa<PHINode>(I))
+      continue;

    // This is a known uniform.
    Uniforms.insert(I);
@ -1569,7 +1647,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
    // If the address of i is unknown (for example A[B[i]]) then we may
    // read a few words, modify, and write a few words, and some of the
    // words may be written to the same address.
-    if (Seen.insert(Ptr) || !isConsecutiveGep(Ptr))
+    if (Seen.insert(Ptr) || !isConsecutivePtr(Ptr))
      Reads.push_back(Ptr);
  }

@ -1585,7 +1663,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
  bool RT = true;
  for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I)
    if (hasComputableBounds(*I)) {
-      PtrRtCheck.Pointers.push_back(*I);
+      PtrRtCheck.insert_pointer(SE, TheLoop, *I);
      DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
    } else {
      RT = false;
@ -1593,7 +1671,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
    }
  for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I)
    if (hasComputableBounds(*I)) {
-      PtrRtCheck.Pointers.push_back(*I);
+      PtrRtCheck.insert_pointer(SE, TheLoop, *I);
      DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
    } else {
      RT = false;
@ -1603,7 +1681,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
  // Check that we did not collect too many pointers or found a
  // unsizeable pointer.
  if (!RT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) {
-    PtrRtCheck.Pointers.clear();
+    PtrRtCheck.reset();
    RT = false;
  }

@ -1658,8 +1736,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {

  // It is safe to vectorize and we don't need any runtime checks.
  DEBUG(dbgs() << "LV: We don't need a runtime memory check.\n");
-  PtrRtCheck.Pointers.clear();
-  PtrRtCheck.Need = false;
+  PtrRtCheck.reset();
  return true;
 }

@ -1917,7 +1994,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
                              SI->getAlignment(), SI->getPointerAddressSpace());

      // Scalarized stores.
-      if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
+      if (!Legal->isConsecutivePtr(SI->getPointerOperand())) {
        unsigned Cost = 0;
        unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
                                              ValTy);
@ -1944,7 +2021,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
                                     LI->getPointerAddressSpace());

      // Scalarized loads.
-      if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
+      if (!Legal->isConsecutivePtr(LI->getPointerOperand())) {
        unsigned Cost = 0;
        unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
        // The cost of inserting the loaded value into the result vector.
--- a/test/Transforms/LoopVectorize/no_int_induction.ll
+++ b/test/Transforms/LoopVectorize/no_int_induction.ll
@ -0,0 +1,33 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+; int __attribute__((noinline)) sum_array(int *A, int n) {
+;  return std::accumulate(A, A + n, 0);
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @sum_array
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: ret i32
+define i32 @sum_array(i32* %A, i32 %n) nounwind uwtable readonly noinline ssp {
+  %1 = sext i32 %n to i64
+  %2 = getelementptr inbounds i32* %A, i64 %1
+  %3 = icmp eq i32 %n, 0
+  br i1 %3, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
+
+.lr.ph.i:                                         ; preds = %0, %.lr.ph.i
+  %.03.i = phi i32* [ %6, %.lr.ph.i ], [ %A, %0 ]
+  %.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ]
+  %4 = load i32* %.03.i, align 4
+  %5 = add nsw i32 %4, %.012.i
+  %6 = getelementptr inbounds i32* %.03.i, i64 1
+  %7 = icmp eq i32* %6, %2
+  br i1 %7, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0
+  %.01.lcssa.i = phi i32 [ 0, %0 ], [ %5, %.lr.ph.i ]
+  ret i32 %.01.lcssa.i
+}