mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-26 13:10:34 +00:00
SLPVectorization: Add a basic support for cross-basic block slp vectorization.
We collect gather sequences when we vectorize basic blocks. Gather sequences are excellent hints for vectorization of other basic blocks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184444 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
63b8e299e4
commit
d69d9f20bc
@ -99,7 +99,10 @@ struct SLPVectorizer : public FunctionPass {
|
||||
}
|
||||
|
||||
// Try to hoist some of the scalarization code to the preheader.
|
||||
if (BBChanged) hoistGatherSequence(LI, BB, R);
|
||||
if (BBChanged) {
|
||||
hoistGatherSequence(LI, BB, R);
|
||||
Changed |= vectorizeUsingGatherHints(R.getGatherSeqInstructions());
|
||||
}
|
||||
|
||||
Changed |= BBChanged;
|
||||
}
|
||||
@ -130,8 +133,10 @@ private:
|
||||
/// \brief Try to vectorize a chain that starts at two arithmetic instrs.
|
||||
bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
|
||||
|
||||
/// \brief Try to vectorize a list of operands.
|
||||
bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R);
|
||||
/// \brief Try to vectorize a list of operands. If \p NeedExtracts is true
|
||||
/// then we calculate the cost of extracting the scalars from the vector.
|
||||
/// \returns true if a value was vectorized.
|
||||
bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool NeedExtracts);
|
||||
|
||||
/// \brief Try to vectorize a chain that may start at the operands of \V;
|
||||
bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
|
||||
@ -143,6 +148,13 @@ private:
|
||||
/// all of the sources are loop invariant.
|
||||
void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);
|
||||
|
||||
/// \brief Try to vectorize additional sequences in different basic blocks
|
||||
/// based on values that we gathered in previous blocks. The list \p Gathers
|
||||
/// holds the gather InsertElement instructions that were generated during
|
||||
/// vectorization.
|
||||
/// \returns True if some code was vectorized.
|
||||
bool vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers);
|
||||
|
||||
/// \brief Scan the basic block and look for patterns that are likely to start
|
||||
/// a vectorization chain.
|
||||
bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R);
|
||||
@ -179,10 +191,11 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
|
||||
bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
|
||||
if (!A || !B) return false;
|
||||
Value *VL[] = { A, B };
|
||||
return tryToVectorizeList(VL, R);
|
||||
return tryToVectorizeList(VL, R, true);
|
||||
}
|
||||
|
||||
bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
|
||||
bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
|
||||
bool NeedExtracts) {
|
||||
if (VL.size() < 2)
|
||||
return false;
|
||||
|
||||
@ -204,7 +217,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
|
||||
}
|
||||
|
||||
int Cost = R.getTreeCost(VL);
|
||||
int ExtrCost = R.getScalarizationCost(VL);
|
||||
int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0;
|
||||
DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
|
||||
" Cost of extract:" << ExtrCost << ".\n");
|
||||
if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
|
||||
@ -307,7 +320,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
||||
}
|
||||
|
||||
if (Incoming.size() > 1)
|
||||
Changed |= tryToVectorizeList(Incoming, R);
|
||||
Changed |= tryToVectorizeList(Incoming, R, true);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
@ -329,6 +342,51 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) {
|
||||
SmallVector<Value*, 4> Seq;
|
||||
bool Changed = false;
|
||||
for (int i = 0, e = Gathers.size(); i < e; ++i) {
|
||||
InsertElementInst *IEI = dyn_cast_or_null<InsertElementInst>(Gathers[i]);
|
||||
|
||||
if (IEI) {
|
||||
if (Instruction *I = dyn_cast<Instruction>(IEI->getOperand(1)))
|
||||
Seq.push_back(I);
|
||||
} else {
|
||||
|
||||
if (!Seq.size())
|
||||
continue;
|
||||
|
||||
Instruction *I = cast<Instruction>(Seq[0]);
|
||||
BasicBlock *BB = I->getParent();
|
||||
|
||||
DEBUG(dbgs()<<"SLP: Inspecting a gather list of size " << Seq.size() <<
|
||||
" in " << BB->getName() << ".\n");
|
||||
|
||||
// Check if the gathered values have multiple uses. If they only have one
|
||||
// user then we know that the insert/extract pair will go away.
|
||||
bool HasMultipleUsers = false;
|
||||
for (int i=0; e = Seq.size(), i < e; ++i) {
|
||||
if (!Seq[i]->hasOneUse()) {
|
||||
HasMultipleUsers = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
BoUpSLP BO(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));
|
||||
|
||||
if (tryToVectorizeList(Seq, BO, HasMultipleUsers)) {
|
||||
DEBUG(dbgs()<<"SLP: Vectorized a gather list of len " << Seq.size() <<
|
||||
" in " << BB->getName() << ".\n");
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
Seq.clear();
|
||||
}
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
|
||||
BoUpSLP &R) {
|
||||
// Check if this block is inside a loop.
|
||||
@ -344,12 +402,14 @@ void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
|
||||
// Mark the insertion point for the block.
|
||||
Instruction *Location = PreHeader->getTerminator();
|
||||
|
||||
BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions();
|
||||
for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end();
|
||||
BoUpSLP::InstrList &Gathers = R.getGatherSeqInstructions();
|
||||
for (BoUpSLP::InstrList::iterator it = Gathers.begin(), e = Gathers.end();
|
||||
it != e; ++it) {
|
||||
InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it);
|
||||
InsertElementInst *Insert = dyn_cast_or_null<InsertElementInst>(*it);
|
||||
|
||||
// The InsertElement sequence can be simplified into a constant.
|
||||
// Also Ignore NULL pointers because they are only here to separate
|
||||
// sequences.
|
||||
if (!Insert)
|
||||
continue;
|
||||
|
||||
|
@ -731,9 +731,13 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
|
||||
// Remember that this instruction is used as part of a 'gather' sequence.
|
||||
// The caller of the bottom-up slp vectorizer can try to hoist the sequence
|
||||
// if the users are outside of the basic block.
|
||||
GatherInstructions.push_back(Vec);
|
||||
if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(Vec))
|
||||
GatherInstructions.push_back(IEI);
|
||||
}
|
||||
|
||||
// Mark the end of the gather sequence.
|
||||
GatherInstructions.push_back(0);
|
||||
|
||||
for (unsigned i = 0; i < Ty->getNumElements(); ++i)
|
||||
VectorizedValues[VL[i]] = Vec;
|
||||
|
||||
|
@ -34,6 +34,7 @@ class Loop;
|
||||
/// Bottom Up SLP vectorization utility class.
|
||||
struct BoUpSLP {
|
||||
typedef SmallVector<Value*, 8> ValueList;
|
||||
typedef SmallVector<Instruction*, 16> InstrList;
|
||||
typedef SmallPtrSet<Value*, 16> ValueSet;
|
||||
typedef SmallVector<StoreInst*, 8> StoreList;
|
||||
static const int max_cost = 1<<20;
|
||||
@ -78,7 +79,7 @@ struct BoUpSLP {
|
||||
/// \returns the list of new instructions that were added in order to collect
|
||||
/// scalars into vectors. This list can be used to further optimize the gather
|
||||
/// sequences.
|
||||
ValueList &getGatherSeqInstructions() {return GatherInstructions; }
|
||||
InstrList &getGatherSeqInstructions() {return GatherInstructions; }
|
||||
|
||||
private:
|
||||
/// \brief This method contains the recursive part of getTreeCost.
|
||||
@ -166,7 +167,9 @@ private:
|
||||
/// A list of instructions that are used when gathering scalars into vectors.
|
||||
/// In many cases these instructions can be hoisted outside of the BB.
|
||||
/// Iterating over this list is faster than calling LICM.
|
||||
ValueList GatherInstructions;
|
||||
/// Notice: We insert NULL ptrs to separate between the different gather
|
||||
/// sequences.
|
||||
InstrList GatherInstructions;
|
||||
|
||||
/// Instruction builder to construct the vectorized tree.
|
||||
IRBuilder<> Builder;
|
||||
|
54
test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
Normal file
54
test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
; int foo(double *A, float *B, int g) {
|
||||
; float B0 = B[0];
|
||||
; float B1 = B[1]; <----- BasicBlock #1
|
||||
; B0 += 5;
|
||||
; B1 += 8;
|
||||
;
|
||||
; if (g) bar();
|
||||
;
|
||||
; A[0] += B0; <------- BasicBlock #3
|
||||
; A[1] += B1;
|
||||
; }
|
||||
|
||||
|
||||
;CHECK: @foo
|
||||
;CHECK: load <2 x float>
|
||||
;CHECK: fadd <2 x float>
|
||||
;CHECK: call i32
|
||||
;CHECK: load <2 x double>
|
||||
;CHECK: fadd <2 x double>
|
||||
;CHECK: store <2 x double>
|
||||
;CHECK: ret
|
||||
define i32 @foo(double* nocapture %A, float* nocapture %B, i32 %g) {
|
||||
entry:
|
||||
%0 = load float* %B, align 4
|
||||
%arrayidx1 = getelementptr inbounds float* %B, i64 1
|
||||
%1 = load float* %arrayidx1, align 4
|
||||
%add = fadd float %0, 5.000000e+00
|
||||
%add2 = fadd float %1, 8.000000e+00
|
||||
%tobool = icmp eq i32 %g, 0
|
||||
br i1 %tobool, label %if.end, label %if.then
|
||||
|
||||
if.then:
|
||||
%call = tail call i32 (...)* @bar()
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%conv = fpext float %add to double
|
||||
%2 = load double* %A, align 8
|
||||
%add4 = fadd double %conv, %2
|
||||
store double %add4, double* %A, align 8
|
||||
%conv5 = fpext float %add2 to double
|
||||
%arrayidx6 = getelementptr inbounds double* %A, i64 1
|
||||
%3 = load double* %arrayidx6, align 8
|
||||
%add7 = fadd double %conv5, %3
|
||||
store double %add7, double* %arrayidx6, align 8
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
declare i32 @bar(...)
|
Loading…
Reference in New Issue
Block a user