From 43d200ded1c1e5dc3a365f9a506dc6c8718c7fc9 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Tue, 4 Dec 2012 06:15:11 +0000 Subject: [PATCH] Add the last part that is needed for vectorization of if-converted code. Added the code that actually performs the if-conversion during vectorization. We can now vectorize this code: for (int i=0; i b[i]) <------ IF inside the loop. k = k * 5 + 3; a[i] = k; <---- K is a phi node that becomes vector-select. } llvm-svn: 169217 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 579 ++++++++++-------- .../Transforms/LoopVectorize/if-conversion.ll | 60 ++ 2 files changed, 394 insertions(+), 245 deletions(-) create mode 100644 test/Transforms/LoopVectorize/if-conversion.ll diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 0e33228cc93..f538e081793 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -50,6 +50,7 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -134,6 +135,9 @@ public: } private: + /// A small list of PHINodes. + typedef SmallVector PhiVector; + /// Add code that checks at runtime if the accessed arrays overlap. /// Returns the comperator value or NULL if no check is needed. Value *addRuntimeCheck(LoopVectorizationLegality *Legal, @@ -142,6 +146,19 @@ private: void createEmptyLoop(LoopVectorizationLegality *Legal); /// Copy and widen the instructions from the old loop. void vectorizeLoop(LoopVectorizationLegality *Legal); + + /// A helper function that computes the predicate of the block BB, assuming + /// that the header block of the loop is set to True. It returns the *entry* + /// mask for the block BB. + Value *createBlockInMask(BasicBlock *BB); + /// A helper function that computes the predicate of the edge between SRC + /// and DST. + Value *createEdgeMask(BasicBlock *Src, BasicBlock *Dst); + + /// A helper function to vectorize a single BB within the innermost loop. + void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB, + PhiVector *PV); + /// Insert the new loop to the loop hierarchy and pass manager /// and update the analysis passes. void updateAnalysis(); @@ -816,7 +833,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { DL->getIntPtrType(SE->getContext()); // Find the loop boundaries. - const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader()); + const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch()); assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count"); // Get the total trip count from the count by adding 1. @@ -838,7 +855,6 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { OldInduction->getIncomingValueForBlock(BypassBlock): ConstantInt::get(IdxTy, 0); - assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop"); assert(BypassBlock && "Invalid loop structure"); // Generate the code that checks in runtime if arrays overlap. @@ -1044,7 +1060,6 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // the cost-model. // //===------------------------------------------------===// - typedef SmallVector PhiVector; BasicBlock &BB = *OrigLoop->getHeader(); Constant *Zero = ConstantInt::get( IntegerType::getInt32Ty(BB.getContext()), 0); @@ -1059,250 +1074,17 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // construct the PHI. PhiVector RdxPHIsToFix; - // For each instruction in the old loop. - for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { - Instruction *Inst = it; + // Scan the loop in a topological order to ensure that defs are vectorized + // before users. + LoopBlocksDFS DFS(OrigLoop); + DFS.perform(LI); - switch (Inst->getOpcode()) { - case Instruction::Br: - // Nothing to do for PHIs and BR, since we already took care of the - // loop control flow instructions. - continue; - case Instruction::PHI:{ - PHINode* P = cast(Inst); - // Handle reduction variables: - if (Legal->getReductionVars()->count(P)) { - // This is phase one of vectorizing PHIs. - Type *VecTy = VectorType::get(Inst->getType(), VF); - WidenMap[Inst] = PHINode::Create(VecTy, 2, "vec.phi", - LoopVectorBody->getFirstInsertionPt()); - RdxPHIsToFix.push_back(P); - continue; - } + // Vectorize all of the blocks in the original loop. + for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), + be = DFS.endRPO(); bb != be; ++bb) + vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix); - // This PHINode must be an induction variable. - // Make sure that we know about it. - assert(Legal->getInductionVars()->count(P) && - "Not an induction variable"); - - if (P->getType()->isIntegerTy()) { - assert(P == OldInduction && "Unexpected PHI"); - Value *Broadcasted = getBroadcastInstrs(Induction); - // After broadcasting the induction variable we need to make the - // vector consecutive by adding 0, 1, 2 ... - Value *ConsecutiveInduction = getConsecutiveVector(Broadcasted); - - WidenMap[OldInduction] = ConsecutiveInduction; - continue; - } - - // Handle pointer inductions. - assert(P->getType()->isPointerTy() && "Unexpected type."); - Value *StartIdx = OldInduction ? - Legal->getInductionVars()->lookup(OldInduction) : - ConstantInt::get(Induction->getType(), 0); - - // This is the pointer value coming into the loop. - Value *StartPtr = Legal->getInductionVars()->lookup(P); - - // This is the normalized GEP that starts counting at zero. - Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx, - "normalized.idx"); - - // This is the vector of results. Notice that we don't generate vector - // geps because scalar geps result in better code. - Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); - for (unsigned int i = 0; i < VF; ++i) { - Constant *Idx = ConstantInt::get(Induction->getType(), i); - Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx"); - Value *SclrGep = Builder.CreateGEP(StartPtr, GlobalIdx, "next.gep"); - VecVal = Builder.CreateInsertElement(VecVal, SclrGep, - Builder.getInt32(i), - "insert.gep"); - } - - WidenMap[Inst] = VecVal; - continue; - } - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - // Just widen binops. - BinaryOperator *BinOp = dyn_cast(Inst); - Value *A = getVectorValue(Inst->getOperand(0)); - Value *B = getVectorValue(Inst->getOperand(1)); - - // Use this vector value for all users of the original instruction. - Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B); - WidenMap[Inst] = V; - - // Update the NSW, NUW and Exact flags. - BinaryOperator *VecOp = cast(V); - if (isa(BinOp)) { - VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap()); - VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap()); - } - if (isa(VecOp)) - VecOp->setIsExact(BinOp->isExact()); - break; - } - case Instruction::Select: { - // Widen selects. - // If the selector is loop invariant we can create a select - // instruction with a scalar condition. Otherwise, use vector-select. - Value *Cond = Inst->getOperand(0); - bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop); - - // The condition can be loop invariant but still defined inside the - // loop. This means that we can't just use the original 'cond' value. - // We have to take the 'vectorized' value and pick the first lane. - // Instcombine will make this a no-op. - Cond = getVectorValue(Cond); - if (InvariantCond) - Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0)); - - Value *Op0 = getVectorValue(Inst->getOperand(1)); - Value *Op1 = getVectorValue(Inst->getOperand(2)); - WidenMap[Inst] = Builder.CreateSelect(Cond, Op0, Op1); - break; - } - - case Instruction::ICmp: - case Instruction::FCmp: { - // Widen compares. Generate vector compares. - bool FCmp = (Inst->getOpcode() == Instruction::FCmp); - CmpInst *Cmp = dyn_cast(Inst); - Value *A = getVectorValue(Inst->getOperand(0)); - Value *B = getVectorValue(Inst->getOperand(1)); - if (FCmp) - WidenMap[Inst] = Builder.CreateFCmp(Cmp->getPredicate(), A, B); - else - WidenMap[Inst] = Builder.CreateICmp(Cmp->getPredicate(), A, B); - break; - } - - case Instruction::Store: { - // Attempt to issue a wide store. - StoreInst *SI = dyn_cast(Inst); - Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF); - Value *Ptr = SI->getPointerOperand(); - unsigned Alignment = SI->getAlignment(); - - assert(!Legal->isUniform(Ptr) && - "We do not allow storing to uniform addresses"); - - GetElementPtrInst *Gep = dyn_cast(Ptr); - - // This store does not use GEPs. - if (!Legal->isConsecutivePtr(Ptr)) { - scalarizeInstruction(Inst); - break; - } - - if (Gep) { - // The last index does not have to be the induction. It can be - // consecutive and be a function of the index. For example A[I+1]; - unsigned NumOperands = Gep->getNumOperands(); - Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1)); - LastIndex = Builder.CreateExtractElement(LastIndex, Zero); - - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast(Gep->clone()); - Gep2->setOperand(NumOperands - 1, LastIndex); - Ptr = Builder.Insert(Gep2); - } else { - // Use the induction element ptr. - assert(isa(Ptr) && "Invalid induction ptr"); - Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero); - } - Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo()); - Value *Val = getVectorValue(SI->getValueOperand()); - Builder.CreateStore(Val, Ptr)->setAlignment(Alignment); - break; - } - case Instruction::Load: { - // Attempt to issue a wide load. - LoadInst *LI = dyn_cast(Inst); - Type *RetTy = VectorType::get(LI->getType(), VF); - Value *Ptr = LI->getPointerOperand(); - unsigned Alignment = LI->getAlignment(); - GetElementPtrInst *Gep = dyn_cast(Ptr); - - // If the pointer is loop invariant or if it is non consecutive, - // scalarize the load. - bool Con = Legal->isConsecutivePtr(Ptr); - if (Legal->isUniform(Ptr) || !Con) { - scalarizeInstruction(Inst); - break; - } - - if (Gep) { - // The last index does not have to be the induction. It can be - // consecutive and be a function of the index. For example A[I+1]; - unsigned NumOperands = Gep->getNumOperands(); - Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1)); - LastIndex = Builder.CreateExtractElement(LastIndex, Zero); - - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast(Gep->clone()); - Gep2->setOperand(NumOperands - 1, LastIndex); - Ptr = Builder.Insert(Gep2); - } else { - // Use the induction element ptr. - assert(isa(Ptr) && "Invalid induction ptr"); - Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero); - } - - Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo()); - LI = Builder.CreateLoad(Ptr); - LI->setAlignment(Alignment); - // Use this vector value for all users of the load. - WidenMap[Inst] = LI; - break; - } - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: { - /// Vectorize bitcasts. - CastInst *CI = dyn_cast(Inst); - Value *A = getVectorValue(Inst->getOperand(0)); - Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF); - WidenMap[Inst] = Builder.CreateCast(CI->getOpcode(), A, DestTy); - break; - } - - default: - /// All other instructions are unsupported. Scalarize them. - scalarizeInstruction(Inst); - break; - }// end of switch. - }// end of for_each instr. - - // At this point every instruction in the original loop is widended to + // At this point every instruction in the original loop is widened to // a vector form. We are almost done. Now, we need to fix the PHI nodes // that we vectorized. The PHI nodes are currently empty because we did // not want to introduce cycles. Notice that the remaining PHI nodes @@ -1426,6 +1208,313 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { }// end of for each redux variable. } +Value *InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { + assert(std::find(pred_begin(Dst), pred_end(Dst), Src) != pred_end(Dst) && + "Invalid edge"); + + Value *SrcMask = createBlockInMask(Src); + + // The terminator has to be a branch inst! + BranchInst *BI = dyn_cast(Src->getTerminator()); + assert(BI && "Unexpected terminator found"); + + Value *EdgeMask = SrcMask; + if (BI->isConditional()) { + EdgeMask = getVectorValue(BI->getCondition()); + if (BI->getSuccessor(0) != Dst) + EdgeMask = Builder.CreateNot(EdgeMask); + } + + return Builder.CreateAnd(EdgeMask, SrcMask); +} + +Value *InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { + assert(OrigLoop->contains(BB) && "Block is not a part of a loop"); + + // Loop incoming mask is all-one. + if (OrigLoop->getHeader() == BB) + return getVectorValue( + ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1)); + + // This is the block mask. We OR all incoming edges, and with zero. + Value *BlockMask = getVectorValue( + ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0)); + + // For each pred: + for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it) + BlockMask = Builder.CreateOr(BlockMask, createEdgeMask(*it, BB)); + + return BlockMask; +} + +void +InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, + BasicBlock *BB, PhiVector *PV) { + Constant *Zero = + ConstantInt::get(IntegerType::getInt32Ty(BB->getContext()), 0); + + // For each instruction in the old loop. + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + switch (it->getOpcode()) { + case Instruction::Br: + // Nothing to do for PHIs and BR, since we already took care of the + // loop control flow instructions. + continue; + case Instruction::PHI:{ + PHINode* P = cast(it); + // Handle reduction variables: + if (Legal->getReductionVars()->count(P)) { + // This is phase one of vectorizing PHIs. + Type *VecTy = VectorType::get(it->getType(), VF); + WidenMap[it] = + PHINode::Create(VecTy, 2, "vec.phi", + LoopVectorBody->getFirstInsertionPt()); + PV->push_back(P); + continue; + } + + // Check for PHI nodes that are lowered to vector selects. + if (P->getParent() != OrigLoop->getHeader()) { + // We know that all PHIs in non header blocks are converted into + // selects, so we don't have to worry about the insertion order and we + // can just use the builder. + + // At this point we generate the predication tree. There may be + // duplications since this is a simple recursive scan, but future + // optimizations will clean it up. + Value *Cond = createBlockInMask(P->getIncomingBlock(0)); + WidenMap[P] = + Builder.CreateSelect(Cond, + getVectorValue(P->getIncomingValue(0)), + getVectorValue(P->getIncomingValue(1)), + "predphi"); + continue; + } + + // This PHINode must be an induction variable. + // Make sure that we know about it. + assert(Legal->getInductionVars()->count(P) && + "Not an induction variable"); + + if (P->getType()->isIntegerTy()) { + assert(P == OldInduction && "Unexpected PHI"); + Value *Broadcasted = getBroadcastInstrs(Induction); + // After broadcasting the induction variable we need to make the + // vector consecutive by adding 0, 1, 2 ... + Value *ConsecutiveInduction = getConsecutiveVector(Broadcasted); + + WidenMap[OldInduction] = ConsecutiveInduction; + continue; + } + + // Handle pointer inductions. + assert(P->getType()->isPointerTy() && "Unexpected type."); + Value *StartIdx = OldInduction ? + Legal->getInductionVars()->lookup(OldInduction) : + ConstantInt::get(Induction->getType(), 0); + + // This is the pointer value coming into the loop. + Value *StartPtr = Legal->getInductionVars()->lookup(P); + + // This is the normalized GEP that starts counting at zero. + Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx, + "normalized.idx"); + + // This is the vector of results. Notice that we don't generate vector + // geps because scalar geps result in better code. + Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); + for (unsigned int i = 0; i < VF; ++i) { + Constant *Idx = ConstantInt::get(Induction->getType(), i); + Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx"); + Value *SclrGep = Builder.CreateGEP(StartPtr, GlobalIdx, "next.gep"); + VecVal = Builder.CreateInsertElement(VecVal, SclrGep, + Builder.getInt32(i), + "insert.gep"); + } + + WidenMap[it] = VecVal; + continue; + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + // Just widen binops. + BinaryOperator *BinOp = dyn_cast(it); + Value *A = getVectorValue(it->getOperand(0)); + Value *B = getVectorValue(it->getOperand(1)); + + // Use this vector value for all users of the original instruction. + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B); + WidenMap[it] = V; + + // Update the NSW, NUW and Exact flags. + BinaryOperator *VecOp = cast(V); + if (isa(BinOp)) { + VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap()); + VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap()); + } + if (isa(VecOp)) + VecOp->setIsExact(BinOp->isExact()); + break; + } + case Instruction::Select: { + // Widen selects. + // If the selector is loop invariant we can create a select + // instruction with a scalar condition. Otherwise, use vector-select. + Value *Cond = it->getOperand(0); + bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop); + + // The condition can be loop invariant but still defined inside the + // loop. This means that we can't just use the original 'cond' value. + // We have to take the 'vectorized' value and pick the first lane. + // Instcombine will make this a no-op. + Cond = getVectorValue(Cond); + if (InvariantCond) + Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0)); + + Value *Op0 = getVectorValue(it->getOperand(1)); + Value *Op1 = getVectorValue(it->getOperand(2)); + WidenMap[it] = Builder.CreateSelect(Cond, Op0, Op1); + break; + } + + case Instruction::ICmp: + case Instruction::FCmp: { + // Widen compares. Generate vector compares. + bool FCmp = (it->getOpcode() == Instruction::FCmp); + CmpInst *Cmp = dyn_cast(it); + Value *A = getVectorValue(it->getOperand(0)); + Value *B = getVectorValue(it->getOperand(1)); + if (FCmp) + WidenMap[it] = Builder.CreateFCmp(Cmp->getPredicate(), A, B); + else + WidenMap[it] = Builder.CreateICmp(Cmp->getPredicate(), A, B); + break; + } + + case Instruction::Store: { + // Attempt to issue a wide store. + StoreInst *SI = dyn_cast(it); + Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF); + Value *Ptr = SI->getPointerOperand(); + unsigned Alignment = SI->getAlignment(); + + assert(!Legal->isUniform(Ptr) && + "We do not allow storing to uniform addresses"); + + GetElementPtrInst *Gep = dyn_cast(Ptr); + + // This store does not use GEPs. + if (!Legal->isConsecutivePtr(Ptr)) { + scalarizeInstruction(it); + break; + } + + if (Gep) { + // The last index does not have to be the induction. It can be + // consecutive and be a function of the index. For example A[I+1]; + unsigned NumOperands = Gep->getNumOperands(); + Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1)); + LastIndex = Builder.CreateExtractElement(LastIndex, Zero); + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast(Gep->clone()); + Gep2->setOperand(NumOperands - 1, LastIndex); + Ptr = Builder.Insert(Gep2); + } else { + // Use the induction element ptr. + assert(isa(Ptr) && "Invalid induction ptr"); + Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero); + } + Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo()); + Value *Val = getVectorValue(SI->getValueOperand()); + Builder.CreateStore(Val, Ptr)->setAlignment(Alignment); + break; + } + case Instruction::Load: { + // Attempt to issue a wide load. + LoadInst *LI = dyn_cast(it); + Type *RetTy = VectorType::get(LI->getType(), VF); + Value *Ptr = LI->getPointerOperand(); + unsigned Alignment = LI->getAlignment(); + GetElementPtrInst *Gep = dyn_cast(Ptr); + + // If the pointer is loop invariant or if it is non consecutive, + // scalarize the load. + bool Con = Legal->isConsecutivePtr(Ptr); + if (Legal->isUniform(Ptr) || !Con) { + scalarizeInstruction(it); + break; + } + + if (Gep) { + // The last index does not have to be the induction. It can be + // consecutive and be a function of the index. For example A[I+1]; + unsigned NumOperands = Gep->getNumOperands(); + Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1)); + LastIndex = Builder.CreateExtractElement(LastIndex, Zero); + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast(Gep->clone()); + Gep2->setOperand(NumOperands - 1, LastIndex); + Ptr = Builder.Insert(Gep2); + } else { + // Use the induction element ptr. + assert(isa(Ptr) && "Invalid induction ptr"); + Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero); + } + + Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo()); + LI = Builder.CreateLoad(Ptr); + LI->setAlignment(Alignment); + // Use this vector value for all users of the load. + WidenMap[it] = LI; + break; + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + /// Vectorize bitcasts. + CastInst *CI = dyn_cast(it); + Value *A = getVectorValue(it->getOperand(0)); + Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF); + WidenMap[it] = Builder.CreateCast(CI->getOpcode(), A, DestTy); + break; + } + + default: + /// All other instructions are unsupported. Scalarize them. + scalarizeInstruction(it); + break; + }// end of switch. + }// end of for_each instr. +} + + void InnerLoopVectorizer::updateAnalysis() { // Forget the original basic block. SE->forgetLoop(OrigLoop); diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll new file mode 100644 index 00000000000..35c549caf18 --- /dev/null +++ b/test/Transforms/LoopVectorize/if-conversion.ll @@ -0,0 +1,60 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -enable-if-conversion -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; This is the loop in this example: +; +;int function0(int *a, int *b, int start, int end) { +; +; for (int i=start; i b[i]) <------ notice the IF inside the loop. +; k = k * 5 + 3; +; +; a[i] = k; <---- K is a phi node that becomes vector-select. +; } +;} + +;CHECK: @function0 +;CHECK: load <4 x i32> +;CHECK: icmp sgt <4 x i32> +;CHECK: mul <4 x i32> +;CHECK: add <4 x i32> +;CHECK: select <4 x i1> +;CHECK: ret i32 +define i32 @function0(i32* nocapture %a, i32* nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp { +entry: + %cmp16 = icmp slt i32 %start, %end + br i1 %cmp16, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: + %0 = sext i32 %start to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %if.end ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %1 = load i32* %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx4, align 4 + %cmp5 = icmp sgt i32 %1, %2 + br i1 %cmp5, label %if.then, label %if.end + +if.then: + %mul = mul i32 %1, 5 + %add = add i32 %mul, 3 + br label %if.end + +if.end: + %k.0 = phi i32 [ %add, %if.then ], [ %1, %for.body ] + store i32 %k.0, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %3 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %3, %end + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret i32 undef +}