Speculatively revert r258620 as it is the likely culprid of PR26293.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258703 91177308-0d34-0410-b5e6-96231b3b80d8
2024-11-30 23:20:54 +00:00 · 2016-01-25 19:12:49 +00:00 · 2016-01-25 19:12:49 +00:00 · 79b3dc3c04
commit 79b3dc3c04
parent 79890da71d
7 changed files with 124 additions and 775 deletions
--- a/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/include/llvm/Analysis/LoopAccessAnalysis.h
@ -659,11 +659,6 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
 int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
                 const ValueToValueMap &StridesMap);

-/// \brief Returns true if the memory operations \p A and \p B are consecutive.
-/// This is a simple API that does not depend on the analysis pass. 
-bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
-                         ScalarEvolution &SE, bool CheckType = true);
-
 /// \brief This analysis provides dependence information for the memory accesses
 /// of a loop.
 ///
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@ -901,78 +901,6 @@ int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr,
  return Stride;
 }

-/// Take the pointer operand from the Load/Store instruction.
-/// Returns NULL if this is not a valid Load/Store instruction.
-static Value *getPointerOperand(Value *I) {
-  if (LoadInst *LI = dyn_cast<LoadInst>(I))
-    return LI->getPointerOperand();
-  if (StoreInst *SI = dyn_cast<StoreInst>(I))
-    return SI->getPointerOperand();
-  return nullptr;
-}
-
-/// Take the address space operand from the Load/Store instruction.
-/// Returns -1 if this is not a valid Load/Store instruction.
-static unsigned getAddressSpaceOperand(Value *I) {
-  if (LoadInst *L = dyn_cast<LoadInst>(I))
-    return L->getPointerAddressSpace();
-  if (StoreInst *S = dyn_cast<StoreInst>(I))
-    return S->getPointerAddressSpace();
-  return -1;
-}
-
-/// Returns true if the memory operations \p A and \p B are consecutive.
-bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
-                               ScalarEvolution &SE, bool CheckType) {
-  Value *PtrA = getPointerOperand(A);
-  Value *PtrB = getPointerOperand(B);
-  unsigned ASA = getAddressSpaceOperand(A);
-  unsigned ASB = getAddressSpaceOperand(B);
-
-  // Check that the address spaces match and that the pointers are valid.
-  if (!PtrA || !PtrB || (ASA != ASB))
-    return false;
-
-  // Make sure that A and B are different pointers.
-  if (PtrA == PtrB)
-    return false;
-
-  // Make sure that A and B have the same type if required.
-  if(CheckType && PtrA->getType() != PtrB->getType())
-      return false;
-
-  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
-  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
-  APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
-
-  APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
-  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
-  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
-
-  //  OffsetDelta = OffsetB - OffsetA;
-  const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
-  const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
-  const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
-  const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
-  const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
-  // Check if they are based on the same pointer. That makes the offsets
-  // sufficient.
-  if (PtrA == PtrB)
-    return OffsetDelta == Size;
-
-  // Compute the necessary base pointer delta to have the necessary final delta
-  // equal to the size.
-  // BaseDelta = Size - OffsetDelta;
-  const SCEV *SizeSCEV = SE.getConstant(Size);
-  const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
-
-  // Otherwise compute the distance with SCEV between the base pointers.
-  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
-  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
-  const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
-  return X == PtrSCEVB;
-}
-
 bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
  switch (Type) {
  case NoDep:
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@ -26,20 +26,22 @@
 // i64 and larger types when i64 is legal and the value has few bits set.  It
 // would be good to enhance isel to emit a loop for ctpop in this case.
 //
+// We should enhance the memset/memcpy recognition to handle multiple stores in
+// the loop.  This would handle things like:
+//   void foo(_Complex float *P)
+//     for (i) { __real__(*P) = 0;  __imag__(*P) = 0; }
+//
 // This could recognize common matrix multiplies and dot product idioms and
 // replace them with calls to BLAS (if linked in??).
 //
 //===----------------------------------------------------------------------===//

 #include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@ -106,9 +108,7 @@ public:

 private:
  typedef SmallVector<StoreInst *, 8> StoreList;
-  typedef MapVector<Value *, StoreList> StoreListMap;
-  StoreListMap StoreRefsForMemset;
-  StoreListMap StoreRefsForMemsetPattern;
+  StoreList StoreRefsForMemset;
  StoreList StoreRefsForMemcpy;
  bool HasMemset;
  bool HasMemsetPattern;
@ -122,18 +122,14 @@ private:
                      SmallVectorImpl<BasicBlock *> &ExitBlocks);

  void collectStores(BasicBlock *BB);
-  bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemsetPattern,
-                    bool &ForMemcpy);
-  bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
-                         bool ForMemset);
+  bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy);
+  bool processLoopStore(StoreInst *SI, const SCEV *BECount);
  bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);

  bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
                               unsigned StoreAlignment, Value *StoredVal,
-                               Instruction *TheStore,
-                               SmallPtrSetImpl<Instruction *> &Stores,
-                               const SCEVAddRecExpr *Ev, const SCEV *BECount,
-                               bool NegStride);
+                               Instruction *TheStore, const SCEVAddRecExpr *Ev,
+                               const SCEV *BECount, bool NegStride);
  bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);

  /// @}
@ -309,7 +305,7 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
 }

 bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
-                                      bool &ForMemsetPattern, bool &ForMemcpy) {
+                                      bool &ForMemcpy) {
  // Don't touch volatile stores.
  if (!SI->isSimple())
    return false;
@ -357,7 +353,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
             StorePtr->getType()->getPointerAddressSpace() == 0 &&
             (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
    // It looks like we can use PatternValue!
-    ForMemsetPattern = true;
+    ForMemset = true;
    return true;
  }

@ -397,7 +393,6 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,

 void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
  StoreRefsForMemset.clear();
-  StoreRefsForMemsetPattern.clear();
  StoreRefsForMemcpy.clear();
  for (Instruction &I : *BB) {
    StoreInst *SI = dyn_cast<StoreInst>(&I);
@ -405,22 +400,15 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
      continue;

    bool ForMemset = false;
-    bool ForMemsetPattern = false;
    bool ForMemcpy = false;
    // Make sure this is a strided store with a constant stride.
-    if (!isLegalStore(SI, ForMemset, ForMemsetPattern, ForMemcpy))
+    if (!isLegalStore(SI, ForMemset, ForMemcpy))
      continue;

    // Save the store locations.
-    if (ForMemset) {
-      // Find the base pointer.
-      Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
-      StoreRefsForMemset[Ptr].push_back(SI);
-    } else if (ForMemsetPattern) {
-      // Find the base pointer.
-      Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
-      StoreRefsForMemsetPattern[Ptr].push_back(SI);
-    } else if (ForMemcpy)
+    if (ForMemset)
+      StoreRefsForMemset.push_back(SI);
+    else if (ForMemcpy)
      StoreRefsForMemcpy.push_back(SI);
  }
 }
@ -442,14 +430,9 @@ bool LoopIdiomRecognize::runOnLoopBlock(
  // Look for store instructions, which may be optimized to memset/memcpy.
  collectStores(BB);

-  // Look for a single store or sets of stores with a common base, which can be
-  // optimized into a memset (memset_pattern).  The latter most commonly happens
-  // with structs and handunrolled loops.
-  for (auto &SL : StoreRefsForMemset)
-    MadeChange |= processLoopStores(SL.second, BECount, true);
-
-  for (auto &SL : StoreRefsForMemsetPattern)
-    MadeChange |= processLoopStores(SL.second, BECount, false);
+  // Look for a single store which can be optimized into a memset.
+  for (auto &SI : StoreRefsForMemset)
+    MadeChange |= processLoopStore(SI, BECount);

  // Optimize the store into a memcpy, if it feeds an similarly strided load.
  for (auto &SI : StoreRefsForMemcpy)
@ -475,155 +458,26 @@ bool LoopIdiomRecognize::runOnLoopBlock(
  return MadeChange;
 }

-/// processLoopStores - See if this store(s) can be promoted to a memset.
-bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
-                                           const SCEV *BECount,
-                                           bool ForMemset) {
-  // Try to find consecutive stores that can be transformed into memsets.
-  SetVector<StoreInst *> Heads, Tails;
-  SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
+/// processLoopStore - See if this store can be promoted to a memset.
+bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
+  assert(SI->isSimple() && "Expected only non-volatile stores.");

-  // Do a quadratic search on all of the given stores and find
-  // all of the pairs of stores that follow each other.
-  SmallVector<unsigned, 16> IndexQueue;
-  for (unsigned i = 0, e = SL.size(); i < e; ++i) {
-    assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
+  Value *StoredVal = SI->getValueOperand();
+  Value *StorePtr = SI->getPointerOperand();

-    Value *FirstStoredVal = SL[i]->getValueOperand();
-    Value *FirstStorePtr = SL[i]->getPointerOperand();
-    const SCEVAddRecExpr *FirstStoreEv =
-        cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
-    unsigned FirstStride = getStoreStride(FirstStoreEv);
-    unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL);
+  // Check to see if the stride matches the size of the store.  If so, then we
+  // know that every byte is touched in the loop.
+  const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+  unsigned Stride = getStoreStride(StoreEv);
+  unsigned StoreSize = getStoreSizeInBytes(SI, DL);
+  if (StoreSize != Stride && StoreSize != -Stride)
+    return false;

-    // See if we can optimize just this store in isolation.
-    if (FirstStride == FirstStoreSize || FirstStride == -FirstStoreSize) {
-      Heads.insert(SL[i]);
-      continue;
-    }
+  bool NegStride = StoreSize == -Stride;

-    Value *FirstSplatValue = nullptr;
-    Constant *FirstPatternValue = nullptr;
-
-    if (ForMemset)
-      FirstSplatValue = isBytewiseValue(FirstStoredVal);
-    else
-      FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
-
-    assert((FirstSplatValue || FirstPatternValue) &&
-           "Expected either splat value or pattern value.");
-
-    IndexQueue.clear();
-    // If a store has multiple consecutive store candidates, search Stores
-    // array according to the sequence: from i+1 to e, then from i-1 to 0.
-    // This is because usually pairing with immediate succeeding or preceding
-    // candidate create the best chance to find memset opportunity.
-    unsigned j = 0;
-    for (j = i + 1; j < e; ++j)
-      IndexQueue.push_back(j);
-    for (j = i; j > 0; --j)
-      IndexQueue.push_back(j - 1);
-
-    for (auto &k : IndexQueue) {
-      assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
-      Value *SecondStorePtr = SL[k]->getPointerOperand();
-      const SCEVAddRecExpr *SecondStoreEv =
-          cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
-      unsigned SecondStride = getStoreStride(SecondStoreEv);
-
-      if (FirstStride != SecondStride)
-        continue;
-
-      Value *SecondStoredVal = SL[k]->getValueOperand();
-      Value *SecondSplatValue = nullptr;
-      Constant *SecondPatternValue = nullptr;
-
-      if (ForMemset)
-        SecondSplatValue = isBytewiseValue(SecondStoredVal);
-      else
-        SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
-
-      assert((SecondSplatValue || SecondPatternValue) &&
-             "Expected either splat value or pattern value.");
-
-      if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
-        if (ForMemset) {
-          ConstantInt *C1 = dyn_cast<ConstantInt>(FirstSplatValue);
-          ConstantInt *C2 = dyn_cast<ConstantInt>(SecondSplatValue);
-          if (!C1 || !C2 || C1 != C2)
-            continue;
-        } else {
-          Constant *C1 = FirstPatternValue;
-          Constant *C2 = SecondPatternValue;
-
-          if (ConstantArray *CA1 = dyn_cast<ConstantArray>(C1))
-            C1 = CA1->getSplatValue();
-
-          if (ConstantArray *CA2 = dyn_cast<ConstantArray>(C2))
-            C2 = CA2->getSplatValue();
-
-          if (C1 != C2)
-            continue;
-        }
-        Tails.insert(SL[k]);
-        Heads.insert(SL[i]);
-        ConsecutiveChain[SL[i]] = SL[k];
-        break;
-      }
-    }
-  }
-
-  // We may run into multiple chains that merge into a single chain. We mark the
-  // stores that we transformed so that we don't visit the same store twice.
-  SmallPtrSet<Value *, 16> TransformedStores;
-  bool Changed = false;
-
-  // For stores that start but don't end a link in the chain:
-  for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
-       it != e; ++it) {
-    if (Tails.count(*it))
-      continue;
-
-    // We found a store instr that starts a chain. Now follow the chain and try
-    // to transform it.
-    SmallPtrSet<Instruction *, 8> AdjacentStores;
-    StoreInst *I = *it;
-
-    StoreInst *HeadStore = I;
-    unsigned StoreSize = 0;
-
-    // Collect the chain into a list.
-    while (Tails.count(I) || Heads.count(I)) {
-      if (TransformedStores.count(I))
-        break;
-      AdjacentStores.insert(I);
-
-      StoreSize += getStoreSizeInBytes(I, DL);
-      // Move to the next value in the chain.
-      I = ConsecutiveChain[I];
-    }
-
-    Value *StoredVal = HeadStore->getValueOperand();
-    Value *StorePtr = HeadStore->getPointerOperand();
-    const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
-    unsigned Stride = getStoreStride(StoreEv);
-
-    // Check to see if the stride matches the size of the stores.  If so, then
-    // we know that every byte is touched in the loop.
-    if (StoreSize != Stride && StoreSize != -Stride)
-      continue;
-
-    bool NegStride = StoreSize == -Stride;
-
-    if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(),
-                                StoredVal, HeadStore, AdjacentStores, StoreEv,
-                                BECount, NegStride)) {
-      TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
-      Changed = true;
-    }
-  }
-
-  return Changed;
+  // See if we can optimize just this store in isolation.
+  return processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
+                                 StoredVal, SI, StoreEv, BECount, NegStride);
 }

 /// processLoopMemSet - See if this memset can be promoted to a large memset.
@ -666,21 +520,18 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
  if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
    return false;

-  SmallPtrSet<Instruction *, 1> MSIs;
-  MSIs.insert(MSI);
  return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
-                                 MSI->getAlignment(), SplatValue, MSI, MSIs, Ev,
+                                 MSI->getAlignment(), SplatValue, MSI, Ev,
                                 BECount, /*NegStride=*/false);
 }

 /// mayLoopAccessLocation - Return true if the specified loop might access the
 /// specified pointer location, which is a loop-strided access.  The 'Access'
 /// argument specifies what the verboten forms of access are (read or write).
-static bool
-mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
-                      const SCEV *BECount, unsigned StoreSize,
-                      AliasAnalysis &AA,
-                      SmallPtrSetImpl<Instruction *> &IgnoredStores) {
+static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
+                                  const SCEV *BECount, unsigned StoreSize,
+                                  AliasAnalysis &AA,
+                                  Instruction *IgnoredStore) {
  // Get the location that may be stored across the loop.  Since the access is
  // strided positively through memory, we say that the modified location starts
  // at the pointer and has infinite size.
@ -700,8 +551,7 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
       ++BI)
    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
-      if (IgnoredStores.count(&*I) == 0 &&
-          (AA.getModRefInfo(&*I, StoreLoc) & Access))
+      if (&*I != IgnoredStore && (AA.getModRefInfo(&*I, StoreLoc) & Access))
        return true;

  return false;
@ -724,8 +574,7 @@ static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
 /// transform this into a memset or memset_pattern in the loop preheader, do so.
 bool LoopIdiomRecognize::processLoopStridedStore(
    Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
-    Value *StoredVal, Instruction *TheStore,
-    SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
+    Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
    const SCEV *BECount, bool NegStride) {
  Value *SplatValue = isBytewiseValue(StoredVal);
  Constant *PatternValue = nullptr;
@ -760,7 +609,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
  Value *BasePtr =
      Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
  if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize,
-                            *AA, Stores)) {
+                            *AA, TheStore)) {
    Expander.clear();
    // If we generated new code for the base pointer, clean up.
    RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
@ -813,8 +662,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(

  // Okay, the memset has been formed.  Zap the original store and anything that
  // feeds into it.
-  for (auto *I : Stores)
-    deleteDeadInstruction(I, TLI);
+  deleteDeadInstruction(TheStore, TLI);
  ++NumMemSet;
  return true;
 }
@ -866,10 +714,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
  Value *StoreBasePtr = Expander.expandCodeFor(
      StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());

-  SmallPtrSet<Instruction *, 1> Stores;
-  Stores.insert(SI);
  if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
-                            StoreSize, *AA, Stores)) {
+                            StoreSize, *AA, SI)) {
    Expander.clear();
    // If we generated new code for the base pointer, clean up.
    RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
@ -889,7 +735,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
      LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());

  if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
-                            *AA, Stores)) {
+                            *AA, SI)) {
    Expander.clear();
    // If we generated new code for the base pointer, clean up.
    RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@ -26,7 +26,6 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@ -402,6 +401,9 @@ public:
    }
  }

+  /// \returns true if the memory operations A and B are consecutive.
+  bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL);
+
  /// \brief Perform LICM and CSE on the newly generated gather sequences.
  void optimizeGatherSequence();

@ -436,6 +438,14 @@ private:
  /// vectorized, or NULL. They may happen in cycles.
  Value *alreadyVectorized(ArrayRef<Value *> VL) const;

+  /// \brief Take the pointer operand from the Load/Store instruction.
+  /// \returns NULL if this is not a valid Load/Store instruction.
+  static Value *getPointerOperand(Value *I);
+
+  /// \brief Take the address space operand from the Load/Store instruction.
+  /// \returns -1 if this is not a valid Load/Store instruction.
+  static unsigned getAddressSpaceOperand(Value *I);
+
  /// \returns the scalarization cost for this type. Scalarization in this
  /// context means the creation of vectors from a group of scalars.
  int getGatherCost(Type *Ty);
@ -1181,8 +1191,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
          return;
        }

-        if (!isConsecutiveAccess(VL[i], VL[i + 1], DL, *SE)) {
-          if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL, *SE)) {
+        if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
+          if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) {
            ++NumLoadsWantToChangeOrder;
          }
          BS.cancelScheduling(VL);
@ -1354,7 +1364,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
      const DataLayout &DL = F->getParent()->getDataLayout();
      // Check if the stores are consecutive or of we need to swizzle them.
      for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
-        if (!isConsecutiveAccess(VL[i], VL[i + 1], DL, *SE)) {
+        if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
          BS.cancelScheduling(VL);
          newTreeEntry(VL, false);
          DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
@ -1827,6 +1837,63 @@ int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) {
  return getGatherCost(VecTy);
 }

+Value *BoUpSLP::getPointerOperand(Value *I) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return LI->getPointerOperand();
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->getPointerOperand();
+  return nullptr;
+}
+
+unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
+  if (LoadInst *L = dyn_cast<LoadInst>(I))
+    return L->getPointerAddressSpace();
+  if (StoreInst *S = dyn_cast<StoreInst>(I))
+    return S->getPointerAddressSpace();
+  return -1;
+}
+
+bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL) {
+  Value *PtrA = getPointerOperand(A);
+  Value *PtrB = getPointerOperand(B);
+  unsigned ASA = getAddressSpaceOperand(A);
+  unsigned ASB = getAddressSpaceOperand(B);
+
+  // Check that the address spaces match and that the pointers are valid.
+  if (!PtrA || !PtrB || (ASA != ASB))
+    return false;
+
+  // Make sure that A and B are different pointers of the same type.
+  if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
+    return false;
+
+  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
+  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
+  APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
+
+  APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
+  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
+  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
+
+  APInt OffsetDelta = OffsetB - OffsetA;
+
+  // Check if they are based on the same pointer. That makes the offsets
+  // sufficient.
+  if (PtrA == PtrB)
+    return OffsetDelta == Size;
+
+  // Compute the necessary base pointer delta to have the necessary final delta
+  // equal to the size.
+  APInt BaseDelta = Size - OffsetDelta;
+
+  // Otherwise compute the distance with SCEV between the base pointers.
+  const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
+  const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
+  const SCEV *C = SE->getConstant(BaseDelta);
+  const SCEV *X = SE->getAddExpr(PtrSCEVA, C);
+  return X == PtrSCEVB;
+}
+
 // Reorder commutative operations in alternate shuffle if the resulting vectors
 // are consecutive loads. This would allow us to vectorize the tree.
 // If we have something like-
@ -1854,10 +1921,10 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
      if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
        Instruction *VL1 = cast<Instruction>(VL[j]);
        Instruction *VL2 = cast<Instruction>(VL[j + 1]);
-        if (isConsecutiveAccess(L, L1, DL, *SE) && VL1->isCommutative()) {
+        if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
          std::swap(Left[j], Right[j]);
          continue;
-        } else if (isConsecutiveAccess(L, L1, DL, *SE) && VL2->isCommutative()) {
+        } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
          std::swap(Left[j + 1], Right[j + 1]);
          continue;
        }
@ -1868,10 +1935,10 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
      if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
        Instruction *VL1 = cast<Instruction>(VL[j]);
        Instruction *VL2 = cast<Instruction>(VL[j + 1]);
-        if (isConsecutiveAccess(L, L1, DL, *SE) && VL1->isCommutative()) {
+        if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
          std::swap(Left[j], Right[j]);
          continue;
-        } else if (isConsecutiveAccess(L, L1, DL, *SE) && VL2->isCommutative()) {
+        } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
          std::swap(Left[j + 1], Right[j + 1]);
          continue;
        }
@ -2021,7 +2088,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
  for (unsigned j = 0; j < VL.size() - 1; ++j) {
    if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
      if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
-        if (isConsecutiveAccess(L, L1, DL, *SE)) {
+        if (isConsecutiveAccess(L, L1, DL)) {
          std::swap(Left[j + 1], Right[j + 1]);
          continue;
        }
@ -2029,7 +2096,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
    }
    if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
      if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
-        if (isConsecutiveAccess(L, L1, DL, *SE)) {
+        if (isConsecutiveAccess(L, L1, DL)) {
          std::swap(Left[j + 1], Right[j + 1]);
          continue;
        }
@ -3394,7 +3461,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
      IndexQueue.push_back(j - 1);

    for (auto &k : IndexQueue) {
-      if (isConsecutiveAccess(Stores[i], Stores[k], DL, *SE)) {
+      if (R.isConsecutiveAccess(Stores[i], Stores[k], DL)) {
        Tails.insert(Stores[k]);
        Heads.insert(Stores[i]);
        ConsecutiveChain[Stores[i]] = Stores[k];
--- a/test/Transforms/LoopIdiom/struct.ll
+++ b/test/Transforms/LoopIdiom/struct.ll
@ -1,221 +0,0 @@
-; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
-target triple = "x86_64-apple-darwin10.0.0"
-
-%struct.foo = type { i32, i32 }
-%struct.foo1 = type { i32, i32, i32 }
-%struct.foo2 = type { i32, i16, i16 }
-
-;void bar1(foo_t *f, unsigned n) {
-;  for (unsigned i = 0; i < n; ++i) {
-;    f[i].a = 0;
-;    f[i].b = 0;
-;  }
-;}
-define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp {
-entry:
-  %cmp1 = icmp eq i32 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body.preheader
-
-for.body.preheader:                               ; preds = %entry
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
-  store i32 0, i32* %a, align 4
-  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
-  store i32 0, i32* %b, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp ne i32 %lftr.wideiv, %n
-  br i1 %exitcond, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:                                 ; preds = %for.body
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret void
-; CHECK-LABEL: @bar1(
-; CHECK: call void @llvm.memset
-; CHECK-NOT: store
-}
-
-;void bar2(foo_t *f, unsigned n) {
-;  for (unsigned i = 0; i < n; ++i) {
-;    f[i].b = 0;
-;    f[i].a = 0;
-;  }
-;}
-define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp {
-entry:
-  %cmp1 = icmp eq i32 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body.preheader
-
-for.body.preheader:                               ; preds = %entry
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
-  store i32 0, i32* %b, align 4
-  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
-  store i32 0, i32* %a, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp ne i32 %lftr.wideiv, %n
-  br i1 %exitcond, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:                                 ; preds = %for.body
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret void
-; CHECK-LABEL: @bar2(
-; CHECK: call void @llvm.memset
-; CHECK-NOT: store
-}
-
-;void bar3(foo_t *f, unsigned n) {
-;  for (unsigned i = n; i > 0; --i) {
-;    f[i].a = 0;
-;    f[i].b = 0;
-;  }
-;}
-define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
-entry:
-  %cmp1 = icmp eq i32 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body.preheader
-
-for.body.preheader:                               ; preds = %entry
-  %0 = zext i32 %n to i64
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
-  store i32 0, i32* %a, align 4
-  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
-  store i32 0, i32* %b, align 4
-  %1 = trunc i64 %indvars.iv to i32
-  %dec = add i32 %1, -1
-  %cmp = icmp eq i32 %dec, 0
-  %indvars.iv.next = add nsw i64 %indvars.iv, -1
-  br i1 %cmp, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:                                 ; preds = %for.body
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret void
-; CHECK-LABEL: @bar3(
-; CHECK: call void @llvm.memset
-; CHECK-NOT: store
-}
-
-;void bar4(foo_t *f, unsigned n) {
-;  for (unsigned i = 0; i < n; ++i) {
-;    f[i].a = 0;
-;    f[i].b = 1;
-;  }
-;}
-define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
-entry:
-  %cmp1 = icmp eq i32 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body.preheader
-
-for.body.preheader:                               ; preds = %entry
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
-  store i32 0, i32* %a, align 4
-  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
-  store i32 1, i32* %b, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp ne i32 %lftr.wideiv, %n
-  br i1 %exitcond, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:                                 ; preds = %for.body
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret void
-; CHECK-LABEL: @bar4(
-; CHECK-NOT: call void @llvm.memset 
-}
-
-;void bar5(foo1_t *f, unsigned n) {
-;  for (unsigned i = 0; i < n; ++i) {
-;    f[i].a = 0;
-;    f[i].b = 0;
-;  }
-;}
-define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp {
-entry:
-  %cmp1 = icmp eq i32 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body.preheader
-
-for.body.preheader:                               ; preds = %entry
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0
-  store i32 0, i32* %a, align 4
-  %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1
-  store i32 0, i32* %b, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp ne i32 %lftr.wideiv, %n
-  br i1 %exitcond, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:                                 ; preds = %for.body
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret void
-; CHECK-LABEL: @bar5(
-; CHECK-NOT: call void @llvm.memset 
-}
-
-;void bar6(foo2_t *f, unsigned n) {
-;  for (unsigned i = 0; i < n; ++i) {
-;    f[i].a = 0;
-;    f[i].b = 0;
-;    f[i].c = 0;
-;  }
-;}
-define void @bar6(%struct.foo2* nocapture %f, i32 %n) nounwind ssp {
-entry:
-  %cmp1 = icmp eq i32 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body.preheader
-
-for.body.preheader:                               ; preds = %entry
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %a = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 0
-  store i32 0, i32* %a, align 4
-  %b = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 1
-  store i16 0, i16* %b, align 4
-  %c = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 2
-  store i16 0, i16* %c, align 2
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp ne i32 %lftr.wideiv, %n
-  br i1 %exitcond, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:                                 ; preds = %for.body
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret void
-; CHECK-LABEL: @bar6(
-; CHECK: call void @llvm.memset
-; CHECK-NOT: store
-}
--- a/test/Transforms/LoopIdiom/struct_pattern.ll
+++ b/test/Transforms/LoopIdiom/struct_pattern.ll
@ -1,186 +0,0 @@
-; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
-; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
-; CHECK: @.memset_pattern.1 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
-; CHECK: @.memset_pattern.2 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
-
-target triple = "x86_64-apple-darwin10.0.0"
-
-%struct.foo = type { i32, i32 }
-%struct.foo1 = type { i32, i32, i32 }
-
-;void bar1(foo_t *f, unsigned n) {
-;  for (unsigned i = 0; i < n; ++i) {
-;    f[i].a = 2;
-;    f[i].b = 2;
-;  }
-;}
-define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp {
-entry:
-  %cmp1 = icmp eq i32 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body.preheader
-
-for.body.preheader:                               ; preds = %entry
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
-  store i32 2, i32* %a, align 4
-  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
-  store i32 2, i32* %b, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp ne i32 %lftr.wideiv, %n
-  br i1 %exitcond, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:                                 ; preds = %for.body
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret void
-; CHECK-LABEL: @bar1(
-; CHECK: call void @memset_pattern16
-; CHECK-NOT: store
-}
-
-;void bar2(foo_t *f, unsigned n) {
-;  for (unsigned i = 0; i < n; ++i) {
-;    f[i].b = 2;
-;    f[i].a = 2;
-;  }
-;}
-define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp {
-entry:
-  %cmp1 = icmp eq i32 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body.preheader
-
-for.body.preheader:                               ; preds = %entry
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
-  store i32 2, i32* %b, align 4
-  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
-  store i32 2, i32* %a, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp ne i32 %lftr.wideiv, %n
-  br i1 %exitcond, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:                                 ; preds = %for.body
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret void
-; CHECK-LABEL: @bar2(
-; CHECK: call void @memset_pattern16
-; CHECK-NOT: store
-}
-
-;void bar3(foo_t *f, unsigned n) {
-;  for (unsigned i = n; i > 0; --i) {
-;    f[i].a = 2;
-;    f[i].b = 2;
-;  }
-;}
-define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
-entry:
-  %cmp1 = icmp eq i32 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body.preheader
-
-for.body.preheader:                               ; preds = %entry
-  %0 = zext i32 %n to i64
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
-  store i32 2, i32* %a, align 4
-  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
-  store i32 2, i32* %b, align 4
-  %1 = trunc i64 %indvars.iv to i32
-  %dec = add i32 %1, -1
-  %cmp = icmp eq i32 %dec, 0
-  %indvars.iv.next = add nsw i64 %indvars.iv, -1
-  br i1 %cmp, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:                                 ; preds = %for.body
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret void
-; CHECK-LABEL: @bar3(
-; CHECK: call void @memset_pattern16
-; CHECK-NOT: store
-}
-
-;void bar4(foo_t *f, unsigned n) {
-;  for (unsigned i = 0; i < n; ++i) {
-;    f[i].a = 0;
-;    f[i].b = 1;
-;  }
-;}
-define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
-entry:
-  %cmp1 = icmp eq i32 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body.preheader
-
-for.body.preheader:                               ; preds = %entry
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
-  store i32 0, i32* %a, align 4
-  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
-  store i32 1, i32* %b, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp ne i32 %lftr.wideiv, %n
-  br i1 %exitcond, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:                                 ; preds = %for.body
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret void
-; CHECK-LABEL: @bar4(
-; CHECK-NOT: call void @memset_pattern16 
-}
-
-;void bar5(foo1_t *f, unsigned n) {
-;  for (unsigned i = 0; i < n; ++i) {
-;    f[i].a = 1;
-;    f[i].b = 1;
-;  }
-;}
-define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp {
-entry:
-  %cmp1 = icmp eq i32 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body.preheader
-
-for.body.preheader:                               ; preds = %entry
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0
-  store i32 1, i32* %a, align 4
-  %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1
-  store i32 1, i32* %b, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp ne i32 %lftr.wideiv, %n
-  br i1 %exitcond, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:                                 ; preds = %for.body
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret void
-; CHECK-LABEL: @bar5(
-; CHECK-NOT: call void @memset_pattern16
-}
--- a/test/Transforms/LoopIdiom/unroll.ll
+++ b/test/Transforms/LoopIdiom/unroll.ll
@ -1,80 +0,0 @@
-; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
-; CHECK @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
-
-target triple = "x86_64-apple-darwin10.0.0"
-
-;void test(int *f, unsigned n) {
-;  for (unsigned i = 0; i < 2 * n; i += 2) {
-;    f[i] = 0;
-;    f[i+1] = 0;
-;  }
-;}
-define void @test(i32* %f, i32 %n) nounwind ssp {
-entry:
-  %mul = shl i32 %n, 1
-  %cmp1 = icmp eq i32 %mul, 0
-  br i1 %cmp1, label %for.end, label %for.body.preheader
-
-for.body.preheader:                               ; preds = %entry
-  %0 = zext i32 %mul to i64
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
-  store i32 0, i32* %arrayidx, align 4
-  %1 = or i64 %indvars.iv, 1
-  %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1
-  store i32 0, i32* %arrayidx2, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
-  %cmp = icmp ult i64 %indvars.iv.next, %0
-  br i1 %cmp, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:                                 ; preds = %for.body
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret void
-; CHECK-LABEL: @test(
-; CHECK: call void @llvm.memset
-; CHECK-NOT: store
-}
-
-;void test_pattern(int *f, unsigned n) {
-;  for (unsigned i = 0; i < 2 * n; i += 2) {
-;    f[i] = 2;
-;    f[i+1] = 2;
-;  }
-;}
-define void @test_pattern(i32* %f, i32 %n) nounwind ssp {
-entry:
-  %mul = shl i32 %n, 1
-  %cmp1 = icmp eq i32 %mul, 0
-  br i1 %cmp1, label %for.end, label %for.body.preheader
-
-for.body.preheader:                               ; preds = %entry
-  %0 = zext i32 %mul to i64
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
-  store i32 2, i32* %arrayidx, align 4
-  %1 = or i64 %indvars.iv, 1
-  %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1
-  store i32 2, i32* %arrayidx2, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
-  %cmp = icmp ult i64 %indvars.iv.next, %0
-  br i1 %cmp, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:                                 ; preds = %for.body
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret void
-; CHECK-LABEL: @test_pattern(
-; CHECK: call void @memset_pattern16
-; CHECK-NOT: store
-}