diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 3f1d82cf5b0..f9064325b53 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -106,9 +106,10 @@ class SingleBlockLoopVectorizer { public: /// Ctor. SingleBlockLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li, - DominatorTree *dt, LPPassManager *Lpm, + DominatorTree *dt, DataLayout *dl, + LPPassManager *Lpm, unsigned VecWidth): - OrigLoop(Orig), SE(Se), LI(Li), DT(dt), LPM(Lpm), VF(VecWidth), + OrigLoop(Orig), SE(Se), LI(Li), DT(dt), DL(dl), LPM(Lpm), VF(VecWidth), Builder(Se->getContext()), Induction(0), OldInduction(0) { } // Perform the actual loop widening (vectorization). @@ -167,6 +168,8 @@ private: LoopInfo *LI; // Dominator Tree. DominatorTree *DT; + // Data Layout; + DataLayout *DL; // Loop Pass Manager; LPPassManager *LPM; // The vectorization factor to use. @@ -250,10 +253,36 @@ public: // This POD struct holds information about the memory runtime legality // check that a group of pointers do not overlap. struct RuntimePointerCheck { + RuntimePointerCheck(): Need(false) {} + + /// Reset the state of the pointer runtime information. + void reset() { + Need = false; + Pointers.clear(); + Starts.clear(); + Ends.clear(); + } + + /// Insert a pointer and calculate the start and end SCEVs. + void insert_pointer(ScalarEvolution *SE, Loop *Lp, Value *Ptr) { + const SCEV *Sc = SE->getSCEV(Ptr); + const SCEVAddRecExpr *AR = dyn_cast(Sc); + assert(AR && "Invalid addrec expression"); + const SCEV *Ex = SE->getExitCount(Lp, Lp->getHeader()); + const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); + Pointers.push_back(Ptr); + Starts.push_back(AR->getStart()); + Ends.push_back(ScEnd); + } + /// This flag indicates if we need to add the runtime check. bool Need; /// Holds the pointers that we need to check. SmallVector Pointers; + /// Holds the pointer value at the beginning of the loop. + SmallVector Starts; + /// Holds the pointer value at the end of the loop. + SmallVector Ends; }; /// ReductionList contains the reduction descriptors for all @@ -278,11 +307,11 @@ public: /// Returns the induction variables found in the loop. InductionList *getInductionVars() { return &Inductions; } - /// Check if the pointer returned by this GEP is consecutive - /// when the index is vectorized. This happens when the last - /// index of the GEP is consecutive, like the induction variable. + /// Check if this pointer is consecutive when vectorizing. This happens + /// when the last index of the GEP is the induction variable, or that the + /// pointer itself is an induction variable. /// This check allows us to vectorize A[idx] into a wide load/store. - bool isConsecutiveGep(Value *Ptr); + bool isConsecutivePtr(Value *Ptr); /// Returns true if the value V is uniform within the loop. bool isUniform(Value *V); @@ -451,7 +480,7 @@ struct LoopVectorize : public LoopPass { "\n"); // If we decided that it is *legal* to vectorizer the loop then do it. - SingleBlockLoopVectorizer LB(L, SE, LI, DT, &LPM, VF); + SingleBlockLoopVectorizer LB(L, SE, LI, DT, DL, &LPM, VF); LB.vectorize(&LVL); DEBUG(verifyFunction(*L->getHeader()->getParent())); @@ -472,10 +501,6 @@ struct LoopVectorize : public LoopPass { }; Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) { - // Instructions that access the old induction variable - // actually want to get the new one. - if (V == OldInduction) - V = Induction; // Create the types. LLVMContext &C = V->getContext(); Type *VTy = VectorType::get(V->getType(), VF); @@ -515,7 +540,14 @@ Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) { return Builder.CreateAdd(Val, Cv, "induction"); } -bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) { +bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { + assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr"); + + // If this pointer is an induction variable, return it. + PHINode *Phi = dyn_cast_or_null(Ptr); + if (Phi && getInductionVars()->count(Phi)) + return true; + GetElementPtrInst *Gep = dyn_cast_or_null(Ptr); if (!Gep) return false; @@ -576,7 +608,7 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) { // If we are accessing the old induction variable, use the new one. if (SrcOp == OldInduction) { - Params.push_back(getBroadcastInstrs(Induction)); + Params.push_back(getVectorValue(Induction)); continue; } @@ -666,9 +698,13 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { ... */ + // Some loops have a single integer induction variable, while other loops + // don't. One example is c++ iterators that often have multiple pointer + // induction variables. In the code below we also support a case where we + // don't have a single induction variable. OldInduction = Legal->getInduction(); - assert(OldInduction && "We must have a single phi node."); - Type *IdxTy = OldInduction->getType(); + Type *IdxTy = OldInduction ? OldInduction->getType() : + DL->getIntPtrType(SE->getContext()); // Find the loop boundaries. const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader()); @@ -677,19 +713,18 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // Get the total trip count from the count by adding 1. ExitCount = SE->getAddExpr(ExitCount, SE->getConstant(ExitCount->getType(), 1)); - // We may need to extend the index in case there is a type mismatch. - // We know that the count starts at zero and does not overflow. - // We are using Zext because it should be less expensive. - if (ExitCount->getType() != IdxTy) - ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy); // This is the original scalar-loop preheader. BasicBlock *BypassBlock = OrigLoop->getLoopPreheader(); BasicBlock *ExitBlock = OrigLoop->getExitBlock(); assert(ExitBlock && "Must have an exit block"); - // The loop index does not have to start at Zero. It starts with this value. - Value *StartIdx = OldInduction->getIncomingValueForBlock(BypassBlock); + // The loop index does not have to start at Zero. Find the original start + // value from the induction PHI node. If we don't have an induction variable + // then we know that it starts at zero. + Value *StartIdx = OldInduction ? + OldInduction->getIncomingValueForBlock(BypassBlock): + ConstantInt::get(IdxTy, 0); assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop"); assert(BypassBlock && "Invalid loop structure"); @@ -721,7 +756,18 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { Instruction *Loc = BypassBlock->getTerminator(); // Count holds the overall loop count (N). - Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc); + Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(), Loc); + + // We may need to extend the index in case there is a type mismatch. + // We know that the count starts at zero and does not overflow. + if (Count->getType() != IdxTy) { + // The exit count can be of pointer type. Convert it to the correct + // integer type. + if (ExitCount->getType()->isPointerTy()) + Count = CastInst::CreatePointerCast(Count, IdxTy, "ptrcnt.to.int", Loc); + else + Count = CastInst::CreateZExtOrBitCast(Count, IdxTy, "zext.cnt", Loc); + } // Add the start index to the loop count to get the new end index. Value *IdxEnd = BinaryOperator::CreateAdd(Count, StartIdx, "end.idx", Loc); @@ -734,7 +780,8 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { Value *IdxEndRoundDown = BinaryOperator::CreateAdd(CountRoundDown, StartIdx, "end.idx.rnd.down", Loc); - // Now, compare the new count to zero. If it is zero, jump to the scalar part. + // Now, compare the new count to zero. If it is zero skip the vector loop and + // jump to the scalar loop. Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEndRoundDown, StartIdx, @@ -762,23 +809,21 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { Ends.push_back(Ptr); } else { DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n"); - const SCEVAddRecExpr *AR = dyn_cast(Sc); - Value *Start = Exp.expandCodeFor(AR->getStart(), PtrArithTy, Loc); - const SCEV *Ex = SE->getExitCount(OrigLoop, OrigLoop->getHeader()); - const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); - assert(!isa(ScEnd) && "Invalid scev range."); - Value *End = Exp.expandCodeFor(ScEnd, PtrArithTy, Loc); + + Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], + PtrArithTy, Loc); + Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc); Starts.push_back(Start); Ends.push_back(End); } } - for (unsigned i=0; i < NumPointers; ++i) { - for (unsigned j=i+1; j < NumPointers; ++j) { + for (unsigned i = 0; i < NumPointers; ++i) { + for (unsigned j = i+1; j < NumPointers; ++j) { Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, - Starts[0], Ends[1], "bound0", Loc); + Starts[i], Ends[j], "bound0", Loc); Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, - Starts[1], Ends[0], "bound1", Loc); + Starts[j], Ends[i], "bound1", Loc); Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1, "found.conflict", Loc); if (MemoryRuntimeCheck) { @@ -812,7 +857,7 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // value. // This variable saves the new starting index for the scalar loop. - Value *ResumeIndex = 0; + PHINode *ResumeIndex = 0; LoopVectorizationLegality::InductionList::iterator I, E; LoopVectorizationLegality::InductionList *List = Legal->getInductionVars(); for (I = List->begin(), E = List->end(); I != E; ++I) { @@ -830,7 +875,7 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { } else { // For pointer induction variables, calculate the offset using // the end index. - EndValue = GetElementPtrInst::Create(I->second, IdxEndRoundDown, + EndValue = GetElementPtrInst::Create(I->second, CountRoundDown, "ptr.ind.end", BypassBlock->getTerminator()); } @@ -841,10 +886,22 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { ResumeVal->addIncoming(EndValue, VecBody); // Fix the scalar body counter (PHI node). - unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH); + unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH); OrigPhi->setIncomingValue(BlockIdx, ResumeVal); } + // If we are generating a new induction variable then we also need to + // generate the code that calculates the exit value. This value is not + // simply the end of the counter because we may skip the vectorized body + // in case of a runtime check. + if (!OldInduction){ + assert(!ResumeIndex && "Unexpected resume value found"); + ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val", + MiddleBlock->getTerminator()); + ResumeIndex->addIncoming(StartIdx, BypassBlock); + ResumeIndex->addIncoming(IdxEndRoundDown, VecBody); + } + // Make sure that we found the index where scalar loop needs to continue. assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() && "Invalid resume Index"); @@ -953,43 +1010,54 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { continue; case Instruction::PHI:{ PHINode* P = cast(Inst); - // Special handling for the induction var. - if (OldInduction == Inst) - continue; - // Handle reduction variables: if (Legal->getReductionVars()->count(P)) { // This is phase one of vectorizing PHIs. Type *VecTy = VectorType::get(Inst->getType(), VF); - WidenMap[Inst] = Builder.CreatePHI(VecTy, 2, "vec.phi"); + WidenMap[Inst] = PHINode::Create(VecTy, 2, "vec.phi", + LoopVectorBody->getFirstInsertionPt()); RdxPHIsToFix.push_back(P); continue; } - // Handle pointer inductions: - if (Legal->getInductionVars()->count(P)) { - Value *StartIdx = Legal->getInductionVars()->lookup(OldInduction); - Value *StartPtr = Legal->getInductionVars()->lookup(P); - // This is the normalized GEP that starts counting at zero. - Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx, - "normalized.idx"); - // This is the first GEP in the sequence. - Value *FirstGep = Builder.CreateGEP(StartPtr, NormalizedIdx, - "induc.ptr"); - // This is the vector of results. Notice that we don't generate vector - // geps because scalar geps result in better code. - Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); - for (unsigned int i = 0; i < VF; ++i) { - Value *SclrGep = Builder.CreateGEP(FirstGep, Builder.getInt32(i), - "next.gep"); - VecVal = Builder.CreateInsertElement(VecVal, SclrGep, - Builder.getInt32(i), - "insert.gep"); - } + // This PHINode must be an induction variable. + // Make sure that we know about it. + assert(Legal->getInductionVars()->count(P) && + "Not an induction variable"); - WidenMap[Inst] = VecVal; + if (P->getType()->isIntegerTy()) { + assert(P == OldInduction && "Unexpected PHI"); + WidenMap[Inst] = getBroadcastInstrs(Induction); continue; } + + // Handle pointer inductions: + assert(P->getType()->isPointerTy() && "Unexpected type."); + Value *StartIdx = OldInduction ? + Legal->getInductionVars()->lookup(OldInduction) : + ConstantInt::get(Induction->getType(), 0); + + // This is the pointer value coming into the loop. + Value *StartPtr = Legal->getInductionVars()->lookup(P); + + // This is the normalized GEP that starts counting at zero. + Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx, + "normalized.idx"); + + // This is the vector of results. Notice that we don't generate vector + // geps because scalar geps result in better code. + Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); + for (unsigned int i = 0; i < VF; ++i) { + Constant *Idx = ConstantInt::get(Induction->getType(), i); + Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx"); + Value *SclrGep = Builder.CreateGEP(StartPtr, GlobalIdx, "next.gep"); + VecVal = Builder.CreateInsertElement(VecVal, SclrGep, + Builder.getInt32(i), + "insert.gep"); + } + + WidenMap[Inst] = VecVal; + continue; } case Instruction::Add: case Instruction::FAdd: @@ -1076,21 +1144,27 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { GetElementPtrInst *Gep = dyn_cast(Ptr); // This store does not use GEPs. - if (!Legal->isConsecutiveGep(Gep)) { + if (!Legal->isConsecutivePtr(Ptr)) { scalarizeInstruction(Inst); break; } - // The last index does not have to be the induction. It can be - // consecutive and be a function of the index. For example A[I+1]; - unsigned NumOperands = Gep->getNumOperands(); - Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1)); - LastIndex = Builder.CreateExtractElement(LastIndex, Zero); + if (Gep) { + // The last index does not have to be the induction. It can be + // consecutive and be a function of the index. For example A[I+1]; + unsigned NumOperands = Gep->getNumOperands(); + Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1)); + LastIndex = Builder.CreateExtractElement(LastIndex, Zero); - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast(Gep->clone()); - Gep2->setOperand(NumOperands - 1, LastIndex); - Ptr = Builder.Insert(Gep2); + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast(Gep->clone()); + Gep2->setOperand(NumOperands - 1, LastIndex); + Ptr = Builder.Insert(Gep2); + } else { + // Use the induction element ptr. + assert(isa(Ptr) && "Invalid induction ptr"); + Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero); + } Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo()); Value *Val = getVectorValue(SI->getValueOperand()); Builder.CreateStore(Val, Ptr)->setAlignment(Alignment); @@ -1104,23 +1178,31 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { unsigned Alignment = LI->getAlignment(); GetElementPtrInst *Gep = dyn_cast(Ptr); - // If we don't have a gep, or that the pointer is loop invariant, + // If the pointer is loop invariant or if it is non consecutive, // scalarize the load. - if (!Gep || Legal->isUniform(Gep) || !Legal->isConsecutiveGep(Gep)) { + bool Con = Legal->isConsecutivePtr(Ptr); + if (Legal->isUniform(Ptr) || !Con) { scalarizeInstruction(Inst); break; } - // The last index does not have to be the induction. It can be - // consecutive and be a function of the index. For example A[I+1]; - unsigned NumOperands = Gep->getNumOperands(); - Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1)); - LastIndex = Builder.CreateExtractElement(LastIndex, Zero); + if (Gep) { + // The last index does not have to be the induction. It can be + // consecutive and be a function of the index. For example A[I+1]; + unsigned NumOperands = Gep->getNumOperands(); + Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1)); + LastIndex = Builder.CreateExtractElement(LastIndex, Zero); + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast(Gep->clone()); + Gep2->setOperand(NumOperands - 1, LastIndex); + Ptr = Builder.Insert(Gep2); + } else { + // Use the induction element ptr. + assert(isa(Ptr) && "Invalid induction ptr"); + Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero); + } - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast(Gep->clone()); - Gep2->setOperand(NumOperands - 1, LastIndex); - Ptr = Builder.Insert(Gep2); Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo()); LI = Builder.CreateLoad(Ptr); LI->setAlignment(Alignment); @@ -1301,7 +1383,7 @@ bool LoopVectorizationLegality::canVectorize() { if (!TheLoop->getLoopPreheader()) { assert(false && "No preheader!!"); DEBUG(dbgs() << "LV: Loop not normalized." << "\n"); - return false; + return false; } // We can only vectorize single basic block loops. @@ -1347,6 +1429,7 @@ bool LoopVectorizationLegality::canVectorize() { } bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { + BasicBlock *PreHeader = TheLoop->getLoopPreheader(); // Scan the instructions in the block and look for hazards. @@ -1440,8 +1523,8 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { } // next instr. if (!Induction) { - DEBUG(dbgs() << "LV: Did not find an induction var.\n"); - return false; + DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); + assert(getInductionVars()->size() && "No induction variables"); } // Don't vectorize if the memory dependencies do not allow vectorization. @@ -1458,15 +1541,10 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { while (Worklist.size()) { Instruction *I = dyn_cast(Worklist.back()); Worklist.pop_back(); - // Look at instructions inside this block. - if (!I) continue; - if (I->getParent() != &BB) continue; - // Stop when reaching PHI nodes. - if (isa(I)) { - assert(I == Induction && "Found a uniform PHI that is not the induction"); - break; - } + // Look at instructions inside this block. Stop when reaching PHI nodes. + if (!I || I->getParent() != &BB || isa(I)) + continue; // This is a known uniform. Uniforms.insert(I); @@ -1569,7 +1647,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { // If the address of i is unknown (for example A[B[i]]) then we may // read a few words, modify, and write a few words, and some of the // words may be written to the same address. - if (Seen.insert(Ptr) || !isConsecutiveGep(Ptr)) + if (Seen.insert(Ptr) || !isConsecutivePtr(Ptr)) Reads.push_back(Ptr); } @@ -1585,7 +1663,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { bool RT = true; for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) if (hasComputableBounds(*I)) { - PtrRtCheck.Pointers.push_back(*I); + PtrRtCheck.insert_pointer(SE, TheLoop, *I); DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n"); } else { RT = false; @@ -1593,7 +1671,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { } for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) if (hasComputableBounds(*I)) { - PtrRtCheck.Pointers.push_back(*I); + PtrRtCheck.insert_pointer(SE, TheLoop, *I); DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n"); } else { RT = false; @@ -1603,7 +1681,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { // Check that we did not collect too many pointers or found a // unsizeable pointer. if (!RT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) { - PtrRtCheck.Pointers.clear(); + PtrRtCheck.reset(); RT = false; } @@ -1658,8 +1736,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { // It is safe to vectorize and we don't need any runtime checks. DEBUG(dbgs() << "LV: We don't need a runtime memory check.\n"); - PtrRtCheck.Pointers.clear(); - PtrRtCheck.Need = false; + PtrRtCheck.reset(); return true; } @@ -1917,7 +1994,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { SI->getAlignment(), SI->getPointerAddressSpace()); // Scalarized stores. - if (!Legal->isConsecutiveGep(SI->getPointerOperand())) { + if (!Legal->isConsecutivePtr(SI->getPointerOperand())) { unsigned Cost = 0; unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, ValTy); @@ -1944,7 +2021,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { LI->getPointerAddressSpace()); // Scalarized loads. - if (!Legal->isConsecutiveGep(LI->getPointerOperand())) { + if (!Legal->isConsecutivePtr(LI->getPointerOperand())) { unsigned Cost = 0; unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy); // The cost of inserting the loaded value into the result vector. diff --git a/test/Transforms/LoopVectorize/no_int_induction.ll b/test/Transforms/LoopVectorize/no_int_induction.ll new file mode 100644 index 00000000000..516fd1de07f --- /dev/null +++ b/test/Transforms/LoopVectorize/no_int_induction.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s + +; int __attribute__((noinline)) sum_array(int *A, int n) { +; return std::accumulate(A, A + n, 0); +; } + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: @sum_array +;CHECK: phi <4 x i32> +;CHECK: load <4 x i32> +;CHECK: add nsw <4 x i32> +;CHECK: ret i32 +define i32 @sum_array(i32* %A, i32 %n) nounwind uwtable readonly noinline ssp { + %1 = sext i32 %n to i64 + %2 = getelementptr inbounds i32* %A, i64 %1 + %3 = icmp eq i32 %n, 0 + br i1 %3, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i + +.lr.ph.i: ; preds = %0, %.lr.ph.i + %.03.i = phi i32* [ %6, %.lr.ph.i ], [ %A, %0 ] + %.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ] + %4 = load i32* %.03.i, align 4 + %5 = add nsw i32 %4, %.012.i + %6 = getelementptr inbounds i32* %.03.i, i64 1 + %7 = icmp eq i32* %6, %2 + br i1 %7, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i + +_ZSt10accumulateIPiiET0_T_S2_S1_.exit: ; preds = %.lr.ph.i, %0 + %.01.lcssa.i = phi i32 [ 0, %0 ], [ %5, %.lr.ph.i ] + ret i32 %.01.lcssa.i +}