From 0b827993ed98b20425c33fecaa06b7b1a32a40c4 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 20 Jun 2013 17:54:36 +0000 Subject: [PATCH] Clang-format the SLP vectorizer. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184446 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/SLPVectorizer.cpp | 96 ++++--- lib/Transforms/Vectorize/VecUtils.cpp | 317 ++++++++++++--------- lib/Transforms/Vectorize/VecUtils.h | 48 ++-- 3 files changed, 259 insertions(+), 202 deletions(-) diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 91c04633244..c3cb03764b2 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -41,14 +41,14 @@ using namespace llvm; static cl::opt -SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, - cl::desc("Only vectorize trees if the gain is above this " - "number. (gain = -cost of vectorization)")); + SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, + cl::desc("Only vectorize trees if the gain is above this " + "number. (gain = -cost of vectorization)")); namespace { /// The SLPVectorizer Pass. struct SLPVectorizer : public FunctionPass { - typedef MapVector StoreListMap; + typedef MapVector StoreListMap; /// Pass identification, replacement for typeid static char ID; @@ -78,7 +78,7 @@ struct SLPVectorizer : public FunctionPass { if (!DL) return false; - DEBUG(dbgs()<<"SLP: Analyzing blocks in " << F.getName() << ".\n"); + DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n"); for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) { BasicBlock *BB = it; @@ -94,7 +94,7 @@ struct SLPVectorizer : public FunctionPass { // Vectorize trees that end at stores. if (unsigned count = collectStores(BB, R)) { (void)count; - DEBUG(dbgs()<<"SLP: Found " << count << " stores to vectorize.\n"); + DEBUG(dbgs() << "SLP: Found " << count << " stores to vectorize.\n"); BBChanged |= vectorizeStoreChains(R); } @@ -108,7 +108,7 @@ struct SLPVectorizer : public FunctionPass { } if (Changed) { - DEBUG(dbgs()<<"SLP: vectorized \""< VL, BoUpSLP &R, bool NeedExtracts); /// \brief Try to vectorize a chain that may start at the operands of \V; - bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); + bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); /// \brief Vectorize the stores that were collected in StoreRefs. bool vectorizeStoreChains(BoUpSLP &R); @@ -188,8 +188,9 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { return count; } -bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { - if (!A || !B) return false; +bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { + if (!A || !B) + return false; Value *VL[] = { A, B }; return tryToVectorizeList(VL, R, true); } @@ -199,11 +200,12 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, if (VL.size() < 2) return false; - DEBUG(dbgs()<<"SLP: Vectorizing a list of length = " << VL.size() << ".\n"); + DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n"); // Check that all of the parts are scalar instructions of the same type. Instruction *I0 = dyn_cast(VL[0]); - if (!I0) return 0; + if (!I0) + return 0; unsigned Opcode0 = I0->getOpcode(); @@ -217,17 +219,20 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, } int Cost = R.getTreeCost(VL); - int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0; - DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << - " Cost of extract:" << ExtrCost << ".\n"); - if ((Cost+ExtrCost) >= -SLPCostThreshold) return false; - DEBUG(dbgs()<<"SLP: Vectorizing pair.\n"); + int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0; + DEBUG(dbgs() << "SLP: Cost of pair:" << Cost + << " Cost of extract:" << ExtrCost << ".\n"); + if ((Cost + ExtrCost) >= -SLPCostThreshold) + return false; + DEBUG(dbgs() << "SLP: Vectorizing pair.\n"); R.vectorizeArith(VL); return true; } -bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { - if (!V) return false; +bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { + if (!V) + return false; + // Try to vectorize V. if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R)) return true; @@ -267,22 +272,27 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { bool Changed = false; for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - if (isa(it)) continue; + if (isa(it)) + continue; // Try to vectorize reductions that use PHINodes. if (PHINode *P = dyn_cast(it)) { // Check that the PHI is a reduction PHI. - if (P->getNumIncomingValues() != 2) return Changed; - Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) : - (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : - 0)); + if (P->getNumIncomingValues() != 2) + return Changed; + Value *Rdx = + (P->getIncomingBlock(0) == BB + ? (P->getIncomingValue(0)) + : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : 0)); // Check if this is a Binary Operator. BinaryOperator *BI = dyn_cast_or_null(Rdx); if (!BI) continue; Value *Inst = BI->getOperand(0); - if (Inst == P) Inst = BI->getOperand(1); + if (Inst == P) + Inst = BI->getOperand(1); + Changed |= tryToVectorize(dyn_cast(Inst), R); continue; } @@ -295,7 +305,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { } for (int i = 0; i < 2; ++i) if (BinaryOperator *BI = dyn_cast(CI->getOperand(i))) - Changed |= tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R); + Changed |= + tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R); continue; } } @@ -303,7 +314,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // Scan the PHINodes in our successors in search for pairing hints. for (succ_iterator it = succ_begin(BB), e = succ_end(BB); it != e; ++it) { BasicBlock *Succ = *it; - SmallVector Incoming; + SmallVector Incoming; // Collect the incoming values from the PHIs. for (BasicBlock::iterator instr = Succ->begin(), ie = Succ->end(); @@ -322,7 +333,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (Incoming.size() > 1) Changed |= tryToVectorizeList(Incoming, R, true); } - + return Changed; } @@ -334,8 +345,8 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) { if (it->second.size() < 2) continue; - DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " << - it->second.size() << ".\n"); + DEBUG(dbgs() << "SLP: Analyzing a store chain of length " + << it->second.size() << ".\n"); Changed |= R.vectorizeStores(it->second, -SLPCostThreshold); } @@ -343,7 +354,7 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) { } bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) { - SmallVector Seq; + SmallVector Seq; bool Changed = false; for (int i = 0, e = Gathers.size(); i < e; ++i) { InsertElementInst *IEI = dyn_cast_or_null(Gathers[i]); @@ -359,13 +370,13 @@ bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) { Instruction *I = cast(Seq[0]); BasicBlock *BB = I->getParent(); - DEBUG(dbgs()<<"SLP: Inspecting a gather list of size " << Seq.size() << - " in " << BB->getName() << ".\n"); + DEBUG(dbgs() << "SLP: Inspecting a gather list of size " << Seq.size() + << " in " << BB->getName() << ".\n"); // Check if the gathered values have multiple uses. If they only have one // user then we know that the insert/extract pair will go away. bool HasMultipleUsers = false; - for (int i=0; e = Seq.size(), i < e; ++i) { + for (int i = 0; e = Seq.size(), i < e; ++i) { if (!Seq[i]->hasOneUse()) { HasMultipleUsers = true; break; @@ -375,8 +386,8 @@ bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) { BoUpSLP BO(BB, SE, DL, TTI, AA, LI->getLoopFor(BB)); if (tryToVectorizeList(Seq, BO, HasMultipleUsers)) { - DEBUG(dbgs()<<"SLP: Vectorized a gather list of len " << Seq.size() << - " in " << BB->getName() << ".\n"); + DEBUG(dbgs() << "SLP: Vectorized a gather list of len " << Seq.size() + << " in " << BB->getName() << ".\n"); Changed = true; } @@ -418,8 +429,10 @@ void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, // hoist this instruction. Instruction *CurrVec = dyn_cast(Insert->getOperand(0)); Instruction *NewElem = dyn_cast(Insert->getOperand(1)); - if (CurrVec && L->contains(CurrVec)) continue; - if (NewElem && L->contains(NewElem)) continue; + if (CurrVec && L->contains(CurrVec)) + continue; + if (NewElem && L->contains(NewElem)) + continue; // We can hoist this instruction. Move it to the pre-header. Insert->moveBefore(Location); @@ -438,8 +451,5 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false) namespace llvm { - Pass *createSLPVectorizerPass() { - return new SLPVectorizer(); - } +Pass *createSLPVectorizerPass() { return new SLPVectorizer(); } } - diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp index 88c457d1176..16189860420 100644 --- a/lib/Transforms/Vectorize/VecUtils.cpp +++ b/lib/Transforms/Vectorize/VecUtils.cpp @@ -45,8 +45,8 @@ static const unsigned RecursionMaxDepth = 6; namespace llvm { BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl, - TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) : - Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) { + TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) + : Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) { numberInstructions(); } @@ -55,7 +55,7 @@ void BoUpSLP::numberInstructions() { InstrIdx.clear(); InstrVec.clear(); // Number the instructions in the block. - for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) { + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { InstrIdx[it] = Loc++; InstrVec.push_back(it); assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation"); @@ -63,14 +63,18 @@ void BoUpSLP::numberInstructions() { } Value *BoUpSLP::getPointerOperand(Value *I) { - if (LoadInst *LI = dyn_cast(I)) return LI->getPointerOperand(); - if (StoreInst *SI = dyn_cast(I)) return SI->getPointerOperand(); + if (LoadInst *LI = dyn_cast(I)) + return LI->getPointerOperand(); + if (StoreInst *SI = dyn_cast(I)) + return SI->getPointerOperand(); return 0; } unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { - if (LoadInst *L=dyn_cast(I)) return L->getPointerAddressSpace(); - if (StoreInst *S=dyn_cast(I)) return S->getPointerAddressSpace(); + if (LoadInst *L = dyn_cast(I)) + return L->getPointerAddressSpace(); + if (StoreInst *S = dyn_cast(I)) + return S->getPointerAddressSpace(); return -1; } @@ -81,10 +85,12 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { unsigned ASB = getAddressSpaceOperand(B); // Check that the address spaces match and that the pointers are valid. - if (!PtrA || !PtrB || (ASA != ASB)) return false; + if (!PtrA || !PtrB || (ASA != ASB)) + return false; // Check that A and B are of the same type. - if (PtrA->getType() != PtrB->getType()) return false; + if (PtrA->getType() != PtrB->getType()) + return false; // Calculate the distance. const SCEV *PtrSCEVA = SE->getSCEV(PtrA); @@ -93,7 +99,8 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { const SCEVConstant *ConstOffSCEV = dyn_cast(OffsetSCEV); // Non constant distance. - if (!ConstOffSCEV) return false; + if (!ConstOffSCEV) + return false; int64_t Offset = ConstOffSCEV->getValue()->getSExtValue(); Type *Ty = cast(PtrA->getType())->getElementType(); @@ -105,25 +112,29 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { bool BoUpSLP::vectorizeStoreChain(ArrayRef Chain, int CostThreshold) { unsigned ChainLen = Chain.size(); - DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <(Chain[0])->getValueOperand()->getType(); unsigned Sz = DL->getTypeSizeInBits(StoreTy); unsigned VF = MinVecRegSize / Sz; - if (!isPowerOf2_32(Sz) || VF < 2) return false; + if (!isPowerOf2_32(Sz) || VF < 2) + return false; bool Changed = false; // Look for profitable vectorizable trees at all offsets, starting at zero. for (unsigned i = 0, e = ChainLen; i < e; ++i) { - if (i + VF > e) break; - DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n"); + if (i + VF > e) + break; + DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i + << "\n"); ArrayRef Operands = Chain.slice(i, VF); int Cost = getTreeCost(Operands); DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); if (Cost < CostThreshold) { DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); - Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands,VF))); + Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands, VF))); vectorizeTree(Operands, VF); i += VF - 1; Changed = true; @@ -135,8 +146,8 @@ bool BoUpSLP::vectorizeStoreChain(ArrayRef Chain, int CostThreshold) { int Cost = getTreeCost(Chain); if (Cost < CostThreshold) { - DEBUG(dbgs() << "SLP: Found store chain cost = "<< Cost <<" for size = " << - ChainLen << "\n"); + DEBUG(dbgs() << "SLP: Found store chain cost = " << Cost + << " for size = " << ChainLen << "\n"); Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Chain, ChainLen))); vectorizeTree(Chain, ChainLen); return true; @@ -146,8 +157,8 @@ bool BoUpSLP::vectorizeStoreChain(ArrayRef Chain, int CostThreshold) { } bool BoUpSLP::vectorizeStores(ArrayRef Stores, int costThreshold) { - SetVector Heads, Tails; - SmallDenseMap ConsecutiveChain; + SetVector Heads, Tails; + SmallDenseMap ConsecutiveChain; // We may run into multiple chains that merge into a single chain. We mark the // stores that we vectorized so that we don't visit the same store twice. @@ -158,7 +169,9 @@ bool BoUpSLP::vectorizeStores(ArrayRef Stores, int costThreshold) { // all of the pairs of loads that follow each other. for (unsigned i = 0, e = Stores.size(); i < e; ++i) for (unsigned j = 0; j < e; ++j) { - if (i == j) continue; + if (i == j) + continue; + if (isConsecutiveAccess(Stores[i], Stores[j])) { Tails.insert(Stores[j]); Heads.insert(Stores[i]); @@ -167,9 +180,10 @@ bool BoUpSLP::vectorizeStores(ArrayRef Stores, int costThreshold) { } // For stores that start but don't end a link in the chain: - for (SetVector::iterator it = Heads.begin(), e = Heads.end(); + for (SetVector::iterator it = Heads.begin(), e = Heads.end(); it != e; ++it) { - if (Tails.count(*it)) continue; + if (Tails.count(*it)) + continue; // We found a store instr that starts a chain. Now follow the chain and try // to vectorize it. @@ -177,7 +191,8 @@ bool BoUpSLP::vectorizeStores(ArrayRef Stores, int costThreshold) { Value *I = *it; // Collect the chain into a list. while (Tails.count(I) || Heads.count(I)) { - if (VectorizedStores.count(I)) break; + if (VectorizedStores.count(I)) + break; Operands.push_back(I); // Move to the next value in the chain. I = ConsecutiveChain[I]; @@ -212,8 +227,10 @@ int BoUpSLP::getScalarizationCost(Type *Ty) { } AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) { - if (StoreInst *SI = dyn_cast(I)) return AA->getLocation(SI); - if (LoadInst *LI = dyn_cast(I)) return AA->getLocation(LI); + if (StoreInst *SI = dyn_cast(I)) + return AA->getLocation(SI); + if (LoadInst *LI = dyn_cast(I)) + return AA->getLocation(LI); return AliasAnalysis::Location(); } @@ -224,11 +241,14 @@ Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) { /// the source may alias. for (++I; I != E; ++I) { // Ignore store instructions that are marked as 'ignore'. - if (MemBarrierIgnoreList.count(I)) continue; + if (MemBarrierIgnoreList.count(I)) + continue; if (Src->mayWriteToMemory()) /* Write */ { - if (!I->mayReadOrWriteMemory()) continue; + if (!I->mayReadOrWriteMemory()) + continue; } else /* Read */ { - if (!I->mayWriteToMemory()) continue; + if (!I->mayWriteToMemory()) + continue; } AliasAnalysis::Location A = getLocation(&*I); AliasAnalysis::Location B = getLocation(Src); @@ -244,7 +264,7 @@ Value *BoUpSLP::vectorizeArith(ArrayRef Operands) { Instruction *Loc = getInsertionPoint(LastIdx); Builder.SetInsertPoint(Loc); - assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx && + assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx && "Vectorizing with in-tree users"); Value *Vec = vectorizeTree(Operands, Operands.size()); @@ -283,15 +303,16 @@ int BoUpSLP::getTreeCost(ArrayRef VL) { // Check that instructions with multiple users can be vectorized. Mark unsafe // instructions. - for (SetVector::iterator it = MultiUserVals.begin(), - e = MultiUserVals.end(); it != e; ++it) { + for (SetVector::iterator it = MultiUserVals.begin(), + e = MultiUserVals.end(); + it != e; ++it) { // Check that all of the users of this instr are within the tree // and that they are all from the same lane. int Lane = -1; for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end(); I != E; ++I) { if (LaneMap.find(*I) == LaneMap.end()) { - DEBUG(dbgs()<<"SLP: Instr " << **it << " has multiple users.\n"); + DEBUG(dbgs() << "SLP: Instr " << **it << " has multiple users.\n"); // We don't have an ordering problem if the user is not in this basic // block. @@ -305,23 +326,23 @@ int BoUpSLP::getTreeCost(ArrayRef VL) { int Idx = InstrIdx[Inst]; if (Idx < LastRootIndex) { MustScalarize.insert(*it); - DEBUG(dbgs()<<"SLP: Adding to MustScalarize " - "because of an unsafe out of tree usage.\n"); + DEBUG(dbgs() << "SLP: Adding to MustScalarize " + "because of an unsafe out of tree usage.\n"); break; } - - DEBUG(dbgs()<<"SLP: Adding to MustExtract " - "because of a safe out of tree usage.\n"); + DEBUG(dbgs() << "SLP: Adding to MustExtract " + "because of a safe out of tree usage.\n"); MustExtract.insert(*it); continue; } - if (Lane == -1) Lane = LaneMap[*I]; + if (Lane == -1) + Lane = LaneMap[*I]; if (Lane != LaneMap[*I]) { MustScalarize.insert(*it); - DEBUG(dbgs()<<"SLP: Adding " << **it << - " to MustScalarize because multiple lane use it: " - << Lane << " and " << LaneMap[*I] << ".\n"); + DEBUG(dbgs() << "SLP: Adding " << **it + << " to MustScalarize because multiple lane use it: " + << Lane << " and " << LaneMap[*I] << ".\n"); break; } } @@ -360,12 +381,16 @@ static bool CanReuseExtract(ArrayRef VL, unsigned VF, } void BoUpSLP::getTreeUses_rec(ArrayRef VL, unsigned Depth) { - if (Depth == RecursionMaxDepth) return; + if (Depth == RecursionMaxDepth) + return; // Don't handle vectors. - if (VL[0]->getType()->isVectorTy()) return; + if (VL[0]->getType()->isVectorTy()) + return; + if (StoreInst *SI = dyn_cast(VL[0])) - if (SI->getValueOperand()->getType()->isVectorTy()) return; + if (SI->getValueOperand()->getType()->isVectorTy()) + return; // Check if all of the operands are constants. bool AllConst = true; @@ -375,27 +400,32 @@ void BoUpSLP::getTreeUses_rec(ArrayRef VL, unsigned Depth) { AllSameScalar &= (VL[0] == VL[i]); Instruction *I = dyn_cast(VL[i]); // If one of the instructions is out of this BB, we need to scalarize all. - if (I && I->getParent() != BB) return; + if (I && I->getParent() != BB) + return; } // If all of the operands are identical or constant we have a simple solution. - if (AllConst || AllSameScalar) return; + if (AllConst || AllSameScalar) + return; // Scalarize unknown structures. Instruction *VL0 = dyn_cast(VL[0]); - if (!VL0) return; + if (!VL0) + return; unsigned Opcode = VL0->getOpcode(); for (unsigned i = 0, e = VL.size(); i < e; ++i) { Instruction *I = dyn_cast(VL[i]); // If not all of the instructions are identical then we have to scalarize. - if (!I || Opcode != I->getOpcode()) return; + if (!I || Opcode != I->getOpcode()) + return; } for (int i = 0, e = VL.size(); i < e; ++i) { // Check that the instruction is only used within // one lane. - if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return; + if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) + return; // Make this instruction as 'seen' and remember the lane. LaneMap[VL[i]] = i; } @@ -407,70 +437,71 @@ void BoUpSLP::getTreeUses_rec(ArrayRef VL, unsigned Depth) { // within our tree. At depth zero we have no local users, only external // users that we don't care about. if (Depth && I && I->getNumUses() > 1) { - DEBUG(dbgs()<<"SLP: Adding to MultiUserVals " - "because it has multiple users:" << *I << " \n"); + DEBUG(dbgs() << "SLP: Adding to MultiUserVals " + "because it has multiple users:" << *I << " \n"); MultiUserVals.insert(I); } } switch (Opcode) { - case Instruction::ExtractElement: { - VectorType *VecTy = VectorType::get(VL[0]->getType(), VL.size()); - // No need to follow ExtractElements that are going to be optimized away. - if (CanReuseExtract(VL, VL.size(), VecTy)) return; - // Fall through. - } - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: - case Instruction::Select: - case Instruction::ICmp: - case Instruction::FCmp: - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { - ValueList Operands; - // Prepare the operand vector. - for (unsigned j = 0; j < VL.size(); ++j) - Operands.push_back(cast(VL[j])->getOperand(i)); - - getTreeUses_rec(Operands, Depth+1); - } + case Instruction::ExtractElement: { + VectorType *VecTy = VectorType::get(VL[0]->getType(), VL.size()); + // No need to follow ExtractElements that are going to be optimized away. + if (CanReuseExtract(VL, VL.size(), VecTy)) return; - } - case Instruction::Store: { + // Fall through. + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: + case Instruction::Select: + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { ValueList Operands; + // Prepare the operand vector. for (unsigned j = 0; j < VL.size(); ++j) - Operands.push_back(cast(VL[j])->getOperand(0)); - getTreeUses_rec(Operands, Depth+1); - return; + Operands.push_back(cast(VL[j])->getOperand(i)); + + getTreeUses_rec(Operands, Depth + 1); } - default: + return; + } + case Instruction::Store: { + ValueList Operands; + for (unsigned j = 0; j < VL.size(); ++j) + Operands.push_back(cast(VL[j])->getOperand(0)); + getTreeUses_rec(Operands, Depth + 1); + return; + } + default: return; } } @@ -482,10 +513,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef VL, unsigned Depth) { ScalarTy = SI->getValueOperand()->getType(); /// Don't mess with vectors. - if (ScalarTy->isVectorTy()) return max_cost; + if (ScalarTy->isVectorTy()) + return max_cost; + VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); - if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy); + if (Depth == RecursionMaxDepth) + return getScalarizationCost(VecTy); // Check if all of the operands are constants. bool AllConst = true; @@ -503,7 +537,8 @@ int BoUpSLP::getTreeCost_rec(ArrayRef VL, unsigned Depth) { } // Is this a simple vector constant. - if (AllConst) return 0; + if (AllConst) + return 0; // If all of the operands are identical we can broadcast them. Instruction *VL0 = dyn_cast(VL[0]); @@ -523,14 +558,16 @@ int BoUpSLP::getTreeCost_rec(ArrayRef VL, unsigned Depth) { if (MustScalarizeFlag) return getScalarizationCost(VecTy); - if (!VL0) return getScalarizationCost(VecTy); + if (!VL0) + return getScalarizationCost(VecTy); assert(VL0->getParent() == BB && "Wrong BB"); unsigned Opcode = VL0->getOpcode(); for (unsigned i = 0, e = VL.size(); i < e; ++i) { Instruction *I = dyn_cast(VL[i]); // If not all of the instructions are identical then we have to scalarize. - if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy); + if (!I || Opcode != I->getOpcode()) + return getScalarizationCost(VecTy); } // Check if it is safe to sink the loads or the stores. @@ -538,12 +575,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef VL, unsigned Depth) { int MaxIdx = getLastIndex(VL, VL.size()); Instruction *Last = InstrVec[MaxIdx]; - for (unsigned i = 0, e = VL.size(); i < e; ++i ) { - if (VL[i] == Last) continue; + for (unsigned i = 0, e = VL.size(); i < e; ++i) { + if (VL[i] == Last) + continue; Value *Barrier = isUnsafeToSink(cast(VL[i]), Last); if (Barrier) { - DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << - *Last << "\n because of " << *Barrier << "\n"); + DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << *Last + << "\n because of " << *Barrier << "\n"); return max_cost; } } @@ -554,7 +592,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef VL, unsigned Depth) { for (unsigned i = 0, e = VL.size(); i < e; ++i) if (MustExtract.count(VL[i])) ExternalUserExtractCost += - TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i); + TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i); switch (Opcode) { case Instruction::ExtractElement: { @@ -585,8 +623,9 @@ int BoUpSLP::getTreeCost_rec(ArrayRef VL, unsigned Depth) { return getScalarizationCost(VecTy); } - Cost += getTreeCost_rec(Operands, Depth+1); - if (Cost >= max_cost) return max_cost; + Cost += getTreeCost_rec(Operands, Depth + 1); + if (Cost >= max_cost) + return max_cost; // Calculate the cost of this instruction. int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(), @@ -635,8 +674,9 @@ int BoUpSLP::getTreeCost_rec(ArrayRef VL, unsigned Depth) { for (unsigned j = 0; j < VL.size(); ++j) Operands.push_back(cast(VL[j])->getOperand(i)); - Cost += getTreeCost_rec(Operands, Depth+1); - if (Cost >= max_cost) return max_cost; + Cost += getTreeCost_rec(Operands, Depth + 1); + if (Cost >= max_cost) + return max_cost; } // Calculate the cost of this instruction. @@ -645,12 +685,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef VL, unsigned Depth) { if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp || Opcode == Instruction::Select) { VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); - ScalarCost = VecTy->getNumElements() * - TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty()); + ScalarCost = + VecTy->getNumElements() * + TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty()); VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy); } else { ScalarCost = VecTy->getNumElements() * - TTI->getArithmeticInstrCost(Opcode, ScalarTy); + TTI->getArithmeticInstrCost(Opcode, ScalarTy); VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy); } Cost += (VecCost - ScalarCost); @@ -658,21 +699,21 @@ int BoUpSLP::getTreeCost_rec(ArrayRef VL, unsigned Depth) { } case Instruction::Load: { // If we are scalarize the loads, add the cost of forming the vector. - for (unsigned i = 0, e = VL.size()-1; i < e; ++i) - if (!isConsecutiveAccess(VL[i], VL[i+1])) + for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) + if (!isConsecutiveAccess(VL[i], VL[i + 1])) return getScalarizationCost(VecTy); // Cost of wide load - cost of scalar loads. int ScalarLdCost = VecTy->getNumElements() * - TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); + TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); return VecLdCost - ScalarLdCost + ExternalUserExtractCost; } case Instruction::Store: { // We know that we can merge the stores. Calculate the cost. int ScalarStCost = VecTy->getNumElements() * - TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); - int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0); + TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); + int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); int StoreCost = VecStCost - ScalarStCost; ValueList Operands; @@ -692,7 +733,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef VL, unsigned Depth) { int BoUpSLP::getLastIndex(ArrayRef VL, unsigned VF) { int MaxIdx = InstrIdx[BB->getFirstNonPHI()]; - for (unsigned i = 0; i < VF; ++i ) + for (unsigned i = 0; i < VF; ++i) MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); return MaxIdx; } @@ -716,7 +757,7 @@ int BoUpSLP::getFirstUserIndex(ArrayRef VL, unsigned VF) { int BoUpSLP::getLastIndex(Instruction *I, Instruction *J) { assert(I->getParent() == BB && "Invalid parent for instruction I"); assert(J->getParent() == BB && "Invalid parent for instruction J"); - return std::max(InstrIdx[I],InstrIdx[J]); + return std::max(InstrIdx[I], InstrIdx[J]); } Instruction *BoUpSLP::getInsertionPoint(unsigned Index) { @@ -725,7 +766,7 @@ Instruction *BoUpSLP::getInsertionPoint(unsigned Index) { Value *BoUpSLP::Scalarize(ArrayRef VL, VectorType *Ty) { Value *Vec = UndefValue::get(Ty); - for (unsigned i=0; i < Ty->getNumElements(); ++i) { + for (unsigned i = 0; i < Ty->getNumElements(); ++i) { // Generate the 'InsertElement' instruction. Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); // Remember that this instruction is used as part of a 'gather' sequence. @@ -748,8 +789,9 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL, int VF) { Value *V = vectorizeTree_rec(VL, VF); int LastInstrIdx = getLastIndex(VL, VL.size()); - for (SetVector::iterator it = MustExtract.begin(), - e = MustExtract.end(); it != e; ++it) { + for (SetVector::iterator it = MustExtract.begin(), + e = MustExtract.end(); + it != e; ++it) { Instruction *I = cast(*it); // This is a scalarized value, so we can use the original value. @@ -770,7 +812,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL, int VF) { ++U) { Instruction *UI = cast(*U); if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx) - UI->replaceUsesOfWith(I ,Extract); + UI->replaceUsesOfWith(I, Extract); Replaced = true; } assert(Replaced && "Must replace at least one outside user"); @@ -814,7 +856,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef VL, int VF) { return Scalarize(VL, VecTy); if (VectorizedValues.count(VL0)) { - Value * Vec = VectorizedValues[VL0]; + Value *Vec = VectorizedValues[VL0]; for (int i = 0; i < VF; ++i) VectorizedValues[VL[i]] = Vec; return Vec; @@ -886,7 +928,6 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef VL, int VF) { VectorizedValues[VL[i]] = V; return V; - } case Instruction::Select: { ValueList TrueVec, FalseVec, CondVec; @@ -933,7 +974,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef VL, int VF) { Value *LHS = vectorizeTree_rec(LHSVL, VF); Value *RHS = vectorizeTree_rec(RHSVL, VF); BinaryOperator *BinOp = cast(VL0); - Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS,RHS); + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS); for (int i = 0; i < VF; ++i) VectorizedValues[VL[i]] = V; @@ -946,7 +987,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef VL, int VF) { // Check if all of the loads are consecutive. for (unsigned i = 1, e = VF; i < e; ++i) - if (!isConsecutiveAccess(VL[i-1], VL[i])) + if (!isConsecutiveAccess(VL[i - 1], VL[i])) return Scalarize(VL, VecTy); // Loads are inserted at the head of the tree because we don't want to sink @@ -972,8 +1013,8 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef VL, int VF) { ValueOp.push_back(cast(VL[i])->getValueOperand()); Value *VecValue = vectorizeTree_rec(ValueOp, VF); - Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), - VecTy->getPointerTo()); + Value *VecPtr = + Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo()); Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment); for (int i = 0; i < VF; ++i) diff --git a/lib/Transforms/Vectorize/VecUtils.h b/lib/Transforms/Vectorize/VecUtils.h index 28a61e3c0dd..c9fe6d23ab6 100644 --- a/lib/Transforms/Vectorize/VecUtils.h +++ b/lib/Transforms/Vectorize/VecUtils.h @@ -25,23 +25,29 @@ namespace llvm { -class BasicBlock; class Instruction; class Type; -class VectorType; class StoreInst; class Value; -class ScalarEvolution; class DataLayout; -class TargetTransformInfo; class AliasAnalysis; +class BasicBlock; +class Instruction; +class Type; +class VectorType; +class StoreInst; +class Value; +class ScalarEvolution; +class DataLayout; +class TargetTransformInfo; +class AliasAnalysis; class Loop; /// Bottom Up SLP vectorization utility class. -struct BoUpSLP { - typedef SmallVector ValueList; - typedef SmallVector InstrList; - typedef SmallPtrSet ValueSet; - typedef SmallVector StoreList; - static const int max_cost = 1<<20; +struct BoUpSLP { + typedef SmallVector ValueList; + typedef SmallVector InstrList; + typedef SmallPtrSet ValueSet; + typedef SmallVector StoreList; + static const int max_cost = 1 << 20; // \brief C'tor. BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl, - TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp); + TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp); /// \brief Take the pointer operand from the Load/Store instruction. /// \returns NULL if this is not a valid Load/Store instruction. @@ -73,13 +79,13 @@ struct BoUpSLP { bool vectorizeStores(ArrayRef Stores, int costThreshold); /// \brief Vectorize a group of scalars into a vector tree. - /// \returns the vectorized value. + /// \returns the vectorized value. Value *vectorizeArith(ArrayRef Operands); /// \returns the list of new instructions that were added in order to collect /// scalars into vectors. This list can be used to further optimize the gather /// sequences. - InstrList &getGatherSeqInstructions() {return GatherInstructions; } + InstrList &getGatherSeqInstructions() { return GatherInstructions; } private: /// \brief This method contains the recursive part of getTreeCost. @@ -130,9 +136,9 @@ private: private: /// Maps instructions to numbers and back. - SmallDenseMap InstrIdx; + SmallDenseMap InstrIdx; /// Maps integers to Instructions. - std::vector InstrVec; + std::vector InstrVec; // -- containers that are used during getTreeCost -- // @@ -144,14 +150,14 @@ private: /// Contains values that have users outside of the vectorized graph. /// We need to generate extract instructions for these values. /// NOTICE: The vectorization methods also use this set. - SetVector MustExtract; + SetVector MustExtract; /// Contains a list of values that are used outside the current tree. This /// set must be reset between runs. - SetVector MultiUserVals; + SetVector MultiUserVals; /// Maps values in the tree to the vector lanes that uses them. This map must /// be reset between runs of getCost. - std::map LaneMap; + std::map LaneMap; /// A list of instructions to ignore while sinking /// memory instructions. This map must be reset between runs of getCost. ValueSet MemBarrierIgnoreList; @@ -159,8 +165,8 @@ private: // -- Containers that are used during vectorizeTree -- // /// Maps between the first scalar to the vector. This map must be reset - ///between runs. - DenseMap VectorizedValues; + /// between runs. + DenseMap VectorizedValues; // -- Containers that are used after vectorization by the caller -- // @@ -169,7 +175,7 @@ private: /// Iterating over this list is faster than calling LICM. /// Notice: We insert NULL ptrs to separate between the different gather /// sequences. - InstrList GatherInstructions; + InstrList GatherInstructions; /// Instruction builder to construct the vectorized tree. IRBuilder<> Builder;