SLPVectorization: Add a basic support for cross-basic block slp vectorization.

We collect gather sequences when we vectorize basic blocks. Gather sequences are excellent
hints for vectorization of other basic blocks.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184444 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem 2013-06-20 17:41:45 +00:00
parent 63b8e299e4
commit d69d9f20bc
4 changed files with 134 additions and 13 deletions

View File

@ -99,7 +99,10 @@ struct SLPVectorizer : public FunctionPass {
} }
// Try to hoist some of the scalarization code to the preheader. // Try to hoist some of the scalarization code to the preheader.
if (BBChanged) hoistGatherSequence(LI, BB, R); if (BBChanged) {
hoistGatherSequence(LI, BB, R);
Changed |= vectorizeUsingGatherHints(R.getGatherSeqInstructions());
}
Changed |= BBChanged; Changed |= BBChanged;
} }
@ -130,8 +133,10 @@ private:
/// \brief Try to vectorize a chain that starts at two arithmetic instrs. /// \brief Try to vectorize a chain that starts at two arithmetic instrs.
bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
/// \brief Try to vectorize a list of operands. /// \brief Try to vectorize a list of operands. If \p NeedExtracts is true
bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R); /// then we calculate the cost of extracting the scalars from the vector.
/// \returns true if a value was vectorized.
bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool NeedExtracts);
/// \brief Try to vectorize a chain that may start at the operands of \V; /// \brief Try to vectorize a chain that may start at the operands of \V;
bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
@ -143,6 +148,13 @@ private:
/// all of the sources are loop invariant. /// all of the sources are loop invariant.
void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R); void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);
/// \brief Try to vectorize additional sequences in different basic blocks
/// based on values that we gathered in previous blocks. The list \p Gathers
/// holds the gather InsertElement instructions that were generated during
/// vectorization.
/// \returns True if some code was vectorized.
bool vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers);
/// \brief Scan the basic block and look for patterns that are likely to start /// \brief Scan the basic block and look for patterns that are likely to start
/// a vectorization chain. /// a vectorization chain.
bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R); bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R);
@ -179,10 +191,11 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
if (!A || !B) return false; if (!A || !B) return false;
Value *VL[] = { A, B }; Value *VL[] = { A, B };
return tryToVectorizeList(VL, R); return tryToVectorizeList(VL, R, true);
} }
bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
bool NeedExtracts) {
if (VL.size() < 2) if (VL.size() < 2)
return false; return false;
@ -204,7 +217,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
} }
int Cost = R.getTreeCost(VL); int Cost = R.getTreeCost(VL);
int ExtrCost = R.getScalarizationCost(VL); int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0;
DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
" Cost of extract:" << ExtrCost << ".\n"); " Cost of extract:" << ExtrCost << ".\n");
if ((Cost+ExtrCost) >= -SLPCostThreshold) return false; if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
@ -307,7 +320,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
} }
if (Incoming.size() > 1) if (Incoming.size() > 1)
Changed |= tryToVectorizeList(Incoming, R); Changed |= tryToVectorizeList(Incoming, R, true);
} }
return Changed; return Changed;
@ -329,6 +342,51 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
return Changed; return Changed;
} }
bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) {
SmallVector<Value*, 4> Seq;
bool Changed = false;
for (int i = 0, e = Gathers.size(); i < e; ++i) {
InsertElementInst *IEI = dyn_cast_or_null<InsertElementInst>(Gathers[i]);
if (IEI) {
if (Instruction *I = dyn_cast<Instruction>(IEI->getOperand(1)))
Seq.push_back(I);
} else {
if (!Seq.size())
continue;
Instruction *I = cast<Instruction>(Seq[0]);
BasicBlock *BB = I->getParent();
DEBUG(dbgs()<<"SLP: Inspecting a gather list of size " << Seq.size() <<
" in " << BB->getName() << ".\n");
// Check if the gathered values have multiple uses. If they only have one
// user then we know that the insert/extract pair will go away.
bool HasMultipleUsers = false;
for (int i=0; e = Seq.size(), i < e; ++i) {
if (!Seq[i]->hasOneUse()) {
HasMultipleUsers = true;
break;
}
}
BoUpSLP BO(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));
if (tryToVectorizeList(Seq, BO, HasMultipleUsers)) {
DEBUG(dbgs()<<"SLP: Vectorized a gather list of len " << Seq.size() <<
" in " << BB->getName() << ".\n");
Changed = true;
}
Seq.clear();
}
}
return Changed;
}
void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
BoUpSLP &R) { BoUpSLP &R) {
// Check if this block is inside a loop. // Check if this block is inside a loop.
@ -344,12 +402,14 @@ void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
// Mark the insertion point for the block. // Mark the insertion point for the block.
Instruction *Location = PreHeader->getTerminator(); Instruction *Location = PreHeader->getTerminator();
BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions(); BoUpSLP::InstrList &Gathers = R.getGatherSeqInstructions();
for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end(); for (BoUpSLP::InstrList::iterator it = Gathers.begin(), e = Gathers.end();
it != e; ++it) { it != e; ++it) {
InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it); InsertElementInst *Insert = dyn_cast_or_null<InsertElementInst>(*it);
// The InsertElement sequence can be simplified into a constant. // The InsertElement sequence can be simplified into a constant.
// Also Ignore NULL pointers because they are only here to separate
// sequences.
if (!Insert) if (!Insert)
continue; continue;

View File

@ -731,9 +731,13 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
// Remember that this instruction is used as part of a 'gather' sequence. // Remember that this instruction is used as part of a 'gather' sequence.
// The caller of the bottom-up slp vectorizer can try to hoist the sequence // The caller of the bottom-up slp vectorizer can try to hoist the sequence
// if the users are outside of the basic block. // if the users are outside of the basic block.
GatherInstructions.push_back(Vec); if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Vec))
GatherInstructions.push_back(IEI);
} }
// Mark the end of the gather sequence.
GatherInstructions.push_back(0);
for (unsigned i = 0; i < Ty->getNumElements(); ++i) for (unsigned i = 0; i < Ty->getNumElements(); ++i)
VectorizedValues[VL[i]] = Vec; VectorizedValues[VL[i]] = Vec;

View File

@ -34,6 +34,7 @@ class Loop;
/// Bottom Up SLP vectorization utility class. /// Bottom Up SLP vectorization utility class.
struct BoUpSLP { struct BoUpSLP {
typedef SmallVector<Value*, 8> ValueList; typedef SmallVector<Value*, 8> ValueList;
typedef SmallVector<Instruction*, 16> InstrList;
typedef SmallPtrSet<Value*, 16> ValueSet; typedef SmallPtrSet<Value*, 16> ValueSet;
typedef SmallVector<StoreInst*, 8> StoreList; typedef SmallVector<StoreInst*, 8> StoreList;
static const int max_cost = 1<<20; static const int max_cost = 1<<20;
@ -78,7 +79,7 @@ struct BoUpSLP {
/// \returns the list of new instructions that were added in order to collect /// \returns the list of new instructions that were added in order to collect
/// scalars into vectors. This list can be used to further optimize the gather /// scalars into vectors. This list can be used to further optimize the gather
/// sequences. /// sequences.
ValueList &getGatherSeqInstructions() {return GatherInstructions; } InstrList &getGatherSeqInstructions() {return GatherInstructions; }
private: private:
/// \brief This method contains the recursive part of getTreeCost. /// \brief This method contains the recursive part of getTreeCost.
@ -166,7 +167,9 @@ private:
/// A list of instructions that are used when gathering scalars into vectors. /// A list of instructions that are used when gathering scalars into vectors.
/// In many cases these instructions can be hoisted outside of the BB. /// In many cases these instructions can be hoisted outside of the BB.
/// Iterating over this list is faster than calling LICM. /// Iterating over this list is faster than calling LICM.
ValueList GatherInstructions; /// Notice: We insert NULL ptrs to separate between the different gather
/// sequences.
InstrList GatherInstructions;
/// Instruction builder to construct the vectorized tree. /// Instruction builder to construct the vectorized tree.
IRBuilder<> Builder; IRBuilder<> Builder;

View File

@ -0,0 +1,54 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
; int foo(double *A, float *B, int g) {
; float B0 = B[0];
; float B1 = B[1]; <----- BasicBlock #1
; B0 += 5;
; B1 += 8;
;
; if (g) bar();
;
; A[0] += B0; <------- BasicBlock #3
; A[1] += B1;
; }
;CHECK: @foo
;CHECK: load <2 x float>
;CHECK: fadd <2 x float>
;CHECK: call i32
;CHECK: load <2 x double>
;CHECK: fadd <2 x double>
;CHECK: store <2 x double>
;CHECK: ret
define i32 @foo(double* nocapture %A, float* nocapture %B, i32 %g) {
entry:
%0 = load float* %B, align 4
%arrayidx1 = getelementptr inbounds float* %B, i64 1
%1 = load float* %arrayidx1, align 4
%add = fadd float %0, 5.000000e+00
%add2 = fadd float %1, 8.000000e+00
%tobool = icmp eq i32 %g, 0
br i1 %tobool, label %if.end, label %if.then
if.then:
%call = tail call i32 (...)* @bar()
br label %if.end
if.end:
%conv = fpext float %add to double
%2 = load double* %A, align 8
%add4 = fadd double %conv, %2
store double %add4, double* %A, align 8
%conv5 = fpext float %add2 to double
%arrayidx6 = getelementptr inbounds double* %A, i64 1
%3 = load double* %arrayidx6, align 8
%add7 = fadd double %conv5, %3
store double %add7, double* %arrayidx6, align 8
ret i32 undef
}
declare i32 @bar(...)