mirror of
https://github.com/RPCS3/llvm.git
synced 2025-04-03 05:41:42 +00:00
SLPVectorizer: Make it a function pass and add code for hoisting the vector-gather sequence out of loops.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179562 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c9363ef67a
commit
e9a4411db4
@ -213,10 +213,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
|
|||||||
|
|
||||||
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
|
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
|
||||||
|
|
||||||
if (SLPVectorize) {
|
if (SLPVectorize)
|
||||||
MPM.add(createSLPVectorizerPass());
|
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
|
||||||
MPM.add(createEarlyCSEPass());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (BBVectorize) {
|
if (BBVectorize) {
|
||||||
MPM.add(createBBVectorizePass());
|
MPM.add(createBBVectorizePass());
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include "llvm/Analysis/ScalarEvolution.h"
|
#include "llvm/Analysis/ScalarEvolution.h"
|
||||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||||
#include "llvm/Analysis/Verifier.h"
|
#include "llvm/Analysis/Verifier.h"
|
||||||
|
#include "llvm/Analysis/LoopInfo.h"
|
||||||
#include "llvm/IR/DataLayout.h"
|
#include "llvm/IR/DataLayout.h"
|
||||||
#include "llvm/IR/Instructions.h"
|
#include "llvm/IR/Instructions.h"
|
||||||
#include "llvm/IR/IntrinsicInst.h"
|
#include "llvm/IR/IntrinsicInst.h"
|
||||||
@ -45,13 +46,13 @@ SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
|
|||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
/// The SLPVectorizer Pass.
|
/// The SLPVectorizer Pass.
|
||||||
struct SLPVectorizer : public BasicBlockPass {
|
struct SLPVectorizer : public FunctionPass {
|
||||||
typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
|
typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
|
||||||
|
|
||||||
/// Pass identification, replacement for typeid
|
/// Pass identification, replacement for typeid
|
||||||
static char ID;
|
static char ID;
|
||||||
|
|
||||||
explicit SLPVectorizer() : BasicBlockPass(ID) {
|
explicit SLPVectorizer() : FunctionPass(ID) {
|
||||||
initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
|
initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,183 +60,257 @@ struct SLPVectorizer : public BasicBlockPass {
|
|||||||
DataLayout *DL;
|
DataLayout *DL;
|
||||||
TargetTransformInfo *TTI;
|
TargetTransformInfo *TTI;
|
||||||
AliasAnalysis *AA;
|
AliasAnalysis *AA;
|
||||||
|
LoopInfo *LI;
|
||||||
|
|
||||||
/// \brief Collect memory references and sort them according to their base
|
virtual bool runOnFunction(Function &F) {
|
||||||
/// object. We sort the stores to their base objects to reduce the cost of the
|
|
||||||
/// quadratic search on the stores. TODO: We can further reduce this cost
|
|
||||||
/// if we flush the chain creation every time we run into a memory barrier.
|
|
||||||
bool collectStores(BasicBlock *BB, BoUpSLP &R) {
|
|
||||||
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
|
|
||||||
StoreInst *SI = dyn_cast<StoreInst>(it);
|
|
||||||
if (!SI)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Check that the pointer points to scalars.
|
|
||||||
if (SI->getValueOperand()->getType()->isAggregateType())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// Find the base of the GEP.
|
|
||||||
Value *Ptr = SI->getPointerOperand();
|
|
||||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
|
|
||||||
Ptr = GEP->getPointerOperand();
|
|
||||||
|
|
||||||
// Save the store locations.
|
|
||||||
StoreRefs[Ptr].push_back(SI);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
|
|
||||||
if (!A || !B) return false;
|
|
||||||
BoUpSLP::ValueList VL;
|
|
||||||
VL.push_back(A);
|
|
||||||
VL.push_back(B);
|
|
||||||
int Cost = R.getTreeCost(VL);
|
|
||||||
int ExtrCost = R.getScalarizationCost(VL);
|
|
||||||
DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
|
|
||||||
" Cost of extract:" << ExtrCost << ".\n");
|
|
||||||
if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
|
|
||||||
DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
|
|
||||||
R.vectorizeArith(VL);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
|
|
||||||
if (!V) return false;
|
|
||||||
// Try to vectorize V.
|
|
||||||
if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
|
|
||||||
BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
|
|
||||||
// Try to skip B.
|
|
||||||
if (B && B->hasOneUse()) {
|
|
||||||
BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
|
|
||||||
BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
|
|
||||||
if (tryToVectorizePair(A, B0, R)) {
|
|
||||||
B->moveBefore(V);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (tryToVectorizePair(A, B1, R)) {
|
|
||||||
B->moveBefore(V);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to slip A.
|
|
||||||
if (A && A->hasOneUse()) {
|
|
||||||
BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
|
|
||||||
BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
|
|
||||||
if (tryToVectorizePair(A0, B, R)) {
|
|
||||||
A->moveBefore(V);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (tryToVectorizePair(A1, B, R)) {
|
|
||||||
A->moveBefore(V);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R) {
|
|
||||||
bool Changed = false;
|
|
||||||
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
|
|
||||||
if (isa<DbgInfoIntrinsic>(it)) continue;
|
|
||||||
|
|
||||||
// Try to vectorize reductions that use PHINodes.
|
|
||||||
if (PHINode *P = dyn_cast<PHINode>(it)) {
|
|
||||||
// Check that the PHI is a reduction PHI.
|
|
||||||
if (P->getNumIncomingValues() != 2) return Changed;
|
|
||||||
Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) :
|
|
||||||
(P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) :
|
|
||||||
0));
|
|
||||||
// Check if this is a Binary Operator.
|
|
||||||
BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
|
|
||||||
if (!BI)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Value *Inst = BI->getOperand(0);
|
|
||||||
if (Inst == P) Inst = BI->getOperand(1);
|
|
||||||
Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to vectorize trees that start at compare instructions.
|
|
||||||
if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
|
|
||||||
if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
|
|
||||||
Changed |= true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < 2; ++i)
|
|
||||||
if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
|
|
||||||
Changed |= tryToVectorize(BI, R);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return Changed;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool vectorizeStoreChains(BoUpSLP &R) {
|
|
||||||
bool Changed = false;
|
|
||||||
// Attempt to sort and vectorize each of the store-groups.
|
|
||||||
for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
|
|
||||||
it != e; ++it) {
|
|
||||||
if (it->second.size() < 2)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<
|
|
||||||
it->second.size() << ".\n");
|
|
||||||
|
|
||||||
Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
|
|
||||||
}
|
|
||||||
return Changed;
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual bool runOnBasicBlock(BasicBlock &BB) {
|
|
||||||
SE = &getAnalysis<ScalarEvolution>();
|
SE = &getAnalysis<ScalarEvolution>();
|
||||||
DL = getAnalysisIfAvailable<DataLayout>();
|
DL = getAnalysisIfAvailable<DataLayout>();
|
||||||
TTI = &getAnalysis<TargetTransformInfo>();
|
TTI = &getAnalysis<TargetTransformInfo>();
|
||||||
AA = &getAnalysis<AliasAnalysis>();
|
AA = &getAnalysis<AliasAnalysis>();
|
||||||
|
LI = &getAnalysis<LoopInfo>();
|
||||||
|
|
||||||
StoreRefs.clear();
|
StoreRefs.clear();
|
||||||
|
bool Changed = false;
|
||||||
|
|
||||||
// Must have DataLayout. We can't require it because some tests run w/o
|
// Must have DataLayout. We can't require it because some tests run w/o
|
||||||
// triple.
|
// triple.
|
||||||
if (!DL)
|
if (!DL)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Use the bollom up slp vectorizer to construct chains that start with
|
for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
|
||||||
// he store instructions.
|
BasicBlock *BB = it;
|
||||||
BoUpSLP R(&BB, SE, DL, TTI, AA);
|
bool BBChanged = false;
|
||||||
|
|
||||||
// Vectorize trees that end at reductions.
|
// Use the bollom up slp vectorizer to construct chains that start with
|
||||||
bool Changed = vectorizeReductions(&BB, R);
|
// he store instructions.
|
||||||
|
BoUpSLP R(BB, SE, DL, TTI, AA);
|
||||||
|
|
||||||
// Vectorize trees that end at stores.
|
// Vectorize trees that end at reductions.
|
||||||
if (collectStores(&BB, R)) {
|
BBChanged |= vectorizeReductions(BB, R);
|
||||||
DEBUG(dbgs()<<"SLP: Found stores to vectorize.\n");
|
|
||||||
Changed |= vectorizeStoreChains(R);
|
// Vectorize trees that end at stores.
|
||||||
|
if (collectStores(BB, R)) {
|
||||||
|
DEBUG(dbgs()<<"SLP: Found stores to vectorize.\n");
|
||||||
|
BBChanged |= vectorizeStoreChains(R);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to hoist some of the scalarization code to the preheader.
|
||||||
|
if (BBChanged) hoistGatherSequence(LI, BB, R);
|
||||||
|
|
||||||
|
Changed |= BBChanged;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Changed) {
|
if (Changed) {
|
||||||
DEBUG(dbgs()<<"SLP: vectorized \""<<BB.getParent()->getName()<<"\"\n");
|
DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n");
|
||||||
DEBUG(verifyFunction(*BB.getParent()));
|
DEBUG(verifyFunction(F));
|
||||||
}
|
}
|
||||||
return Changed;
|
return Changed;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||||
BasicBlockPass::getAnalysisUsage(AU);
|
FunctionPass::getAnalysisUsage(AU);
|
||||||
AU.addRequired<ScalarEvolution>();
|
AU.addRequired<ScalarEvolution>();
|
||||||
AU.addRequired<AliasAnalysis>();
|
AU.addRequired<AliasAnalysis>();
|
||||||
AU.addRequired<TargetTransformInfo>();
|
AU.addRequired<TargetTransformInfo>();
|
||||||
|
AU.addRequired<LoopInfo>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
/// \brief Collect memory references and sort them according to their base
|
||||||
|
/// object. We sort the stores to their base objects to reduce the cost of the
|
||||||
|
/// quadratic search on the stores. TODO: We can further reduce this cost
|
||||||
|
/// if we flush the chain creation every time we run into a memory barrier.
|
||||||
|
bool collectStores(BasicBlock *BB, BoUpSLP &R);
|
||||||
|
|
||||||
|
/// \brief Try to vectorize a chain that starts at two arithmetic instrs.
|
||||||
|
bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
|
||||||
|
|
||||||
|
/// \brief Try to vectorize a chain that may start at the operands of \V;
|
||||||
|
bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
|
||||||
|
|
||||||
|
/// \brief Vectorize the stores that were collected in StoreRefs.
|
||||||
|
bool vectorizeStoreChains(BoUpSLP &R);
|
||||||
|
|
||||||
|
/// \brief Try to hoist gather sequences outside of the loop in cases where
|
||||||
|
/// all of the sources are loop invariant.
|
||||||
|
void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);
|
||||||
|
|
||||||
|
/// \brief Scan the basic block and look for reductions that may start a
|
||||||
|
/// vectorization chain.
|
||||||
|
bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
StoreListMap StoreRefs;
|
StoreListMap StoreRefs;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bool SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
|
||||||
|
StoreRefs.clear();
|
||||||
|
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
|
||||||
|
StoreInst *SI = dyn_cast<StoreInst>(it);
|
||||||
|
if (!SI)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Check that the pointer points to scalars.
|
||||||
|
if (SI->getValueOperand()->getType()->isAggregateType())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Find the base of the GEP.
|
||||||
|
Value *Ptr = SI->getPointerOperand();
|
||||||
|
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
|
||||||
|
Ptr = GEP->getPointerOperand();
|
||||||
|
|
||||||
|
// Save the store locations.
|
||||||
|
StoreRefs[Ptr].push_back(SI);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
|
||||||
|
if (!A || !B) return false;
|
||||||
|
BoUpSLP::ValueList VL;
|
||||||
|
VL.push_back(A);
|
||||||
|
VL.push_back(B);
|
||||||
|
int Cost = R.getTreeCost(VL);
|
||||||
|
int ExtrCost = R.getScalarizationCost(VL);
|
||||||
|
DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
|
||||||
|
" Cost of extract:" << ExtrCost << ".\n");
|
||||||
|
if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
|
||||||
|
DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
|
||||||
|
R.vectorizeArith(VL);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
|
||||||
|
if (!V) return false;
|
||||||
|
// Try to vectorize V.
|
||||||
|
if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
|
||||||
|
BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
|
||||||
|
// Try to skip B.
|
||||||
|
if (B && B->hasOneUse()) {
|
||||||
|
BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
|
||||||
|
BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
|
||||||
|
if (tryToVectorizePair(A, B0, R)) {
|
||||||
|
B->moveBefore(V);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (tryToVectorizePair(A, B1, R)) {
|
||||||
|
B->moveBefore(V);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to slip A.
|
||||||
|
if (A && A->hasOneUse()) {
|
||||||
|
BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
|
||||||
|
BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
|
||||||
|
if (tryToVectorizePair(A0, B, R)) {
|
||||||
|
A->moveBefore(V);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (tryToVectorizePair(A1, B, R)) {
|
||||||
|
A->moveBefore(V);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SLPVectorizer::vectorizeReductions(BasicBlock *BB, BoUpSLP &R) {
|
||||||
|
bool Changed = false;
|
||||||
|
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
|
||||||
|
if (isa<DbgInfoIntrinsic>(it)) continue;
|
||||||
|
|
||||||
|
// Try to vectorize reductions that use PHINodes.
|
||||||
|
if (PHINode *P = dyn_cast<PHINode>(it)) {
|
||||||
|
// Check that the PHI is a reduction PHI.
|
||||||
|
if (P->getNumIncomingValues() != 2) return Changed;
|
||||||
|
Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) :
|
||||||
|
(P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) :
|
||||||
|
0));
|
||||||
|
// Check if this is a Binary Operator.
|
||||||
|
BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
|
||||||
|
if (!BI)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
Value *Inst = BI->getOperand(0);
|
||||||
|
if (Inst == P) Inst = BI->getOperand(1);
|
||||||
|
Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to vectorize trees that start at compare instructions.
|
||||||
|
if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
|
||||||
|
if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
|
||||||
|
Changed |= true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 2; ++i)
|
||||||
|
if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
|
||||||
|
Changed |= tryToVectorize(BI, R);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Changed;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
|
||||||
|
bool Changed = false;
|
||||||
|
// Attempt to sort and vectorize each of the store-groups.
|
||||||
|
for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
|
||||||
|
it != e; ++it) {
|
||||||
|
if (it->second.size() < 2)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<
|
||||||
|
it->second.size() << ".\n");
|
||||||
|
|
||||||
|
Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
|
||||||
|
}
|
||||||
|
return Changed;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
|
||||||
|
BoUpSLP &R) {
|
||||||
|
// Check if this block is inside a loop.
|
||||||
|
Loop *L = LI->getLoopFor(BB);
|
||||||
|
if (!L)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Check if it has a preheader.
|
||||||
|
BasicBlock *PreHeader = L->getLoopPreheader();
|
||||||
|
if (!PreHeader)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Mark the insertion point for the block.
|
||||||
|
Instruction *Location = PreHeader->getTerminator();
|
||||||
|
|
||||||
|
BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions();
|
||||||
|
for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end();
|
||||||
|
it != e; ++it) {
|
||||||
|
InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it);
|
||||||
|
|
||||||
|
// The InsertElement sequence can be simplified into a constant.
|
||||||
|
if (!Insert)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// If the vector or the element that we insert into it are
|
||||||
|
// instructions that are defined in this basic block then we can't
|
||||||
|
// hoist this instruction.
|
||||||
|
Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
|
||||||
|
Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
|
||||||
|
if (CurrVec && L->contains(CurrVec)) continue;
|
||||||
|
if (NewElem && L->contains(NewElem)) continue;
|
||||||
|
|
||||||
|
// We can hoist this instruction. Move it to the pre-header.
|
||||||
|
Insert->moveBefore(Location);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // end anonymous namespace
|
} // end anonymous namespace
|
||||||
|
|
||||||
char SLPVectorizer::ID = 0;
|
char SLPVectorizer::ID = 0;
|
||||||
|
@ -511,8 +511,15 @@ Instruction *BoUpSLP::GetLastInstr(ValueList &VL, unsigned VF) {
|
|||||||
Value *BoUpSLP::Scalarize(ValueList &VL, VectorType *Ty) {
|
Value *BoUpSLP::Scalarize(ValueList &VL, VectorType *Ty) {
|
||||||
IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
|
IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
|
||||||
Value *Vec = UndefValue::get(Ty);
|
Value *Vec = UndefValue::get(Ty);
|
||||||
for (unsigned i=0; i < Ty->getNumElements(); ++i)
|
for (unsigned i=0; i < Ty->getNumElements(); ++i) {
|
||||||
|
// Generate the 'InsertElement' instruction.
|
||||||
Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
|
Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
|
||||||
|
// Remember that this instruction is used as part of a 'gather' sequence.
|
||||||
|
// The caller of the bottom-up slp vectorizer can try to hoist the sequence
|
||||||
|
// if the users are outside of the basic block.
|
||||||
|
GatherInstructions.push_back(Vec);
|
||||||
|
}
|
||||||
|
|
||||||
return Vec;
|
return Vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,6 +71,11 @@ struct BoUpSLP {
|
|||||||
/// \brief Vectorize a group of scalars into a vector tree.
|
/// \brief Vectorize a group of scalars into a vector tree.
|
||||||
void vectorizeArith(ValueList &Operands);
|
void vectorizeArith(ValueList &Operands);
|
||||||
|
|
||||||
|
/// \returns the list of new instructions that were added in order to collect
|
||||||
|
/// scalars into vectors. This list can be used to further optimize the gather
|
||||||
|
/// sequences.
|
||||||
|
ValueList &getGatherSeqInstructions() {return GatherInstructions; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// \brief This method contains the recursive part of getTreeCost.
|
/// \brief This method contains the recursive part of getTreeCost.
|
||||||
int getTreeCost_rec(ValueList &VL, unsigned Depth);
|
int getTreeCost_rec(ValueList &VL, unsigned Depth);
|
||||||
@ -107,11 +112,11 @@ private:
|
|||||||
|
|
||||||
/// \returns a vector from a collection of scalars in \p VL.
|
/// \returns a vector from a collection of scalars in \p VL.
|
||||||
Value *Scalarize(ValueList &VL, VectorType *Ty);
|
Value *Scalarize(ValueList &VL, VectorType *Ty);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Maps instructions to numbers and back.
|
/// Maps instructions to numbers and back.
|
||||||
SmallDenseMap<Value*, int> InstrIdx;
|
SmallDenseMap<Value*, int> InstrIdx;
|
||||||
// Maps integers to Instructions.
|
/// Maps integers to Instructions.
|
||||||
std::vector<Instruction*> InstrVec;
|
std::vector<Instruction*> InstrVec;
|
||||||
|
|
||||||
// -- containers that are used during getTreeCost -- //
|
// -- containers that are used during getTreeCost -- //
|
||||||
@ -121,21 +126,29 @@ private:
|
|||||||
/// NOTICE: The vectorization methods also use this set.
|
/// NOTICE: The vectorization methods also use this set.
|
||||||
ValueSet MustScalarize;
|
ValueSet MustScalarize;
|
||||||
|
|
||||||
// Contains a list of values that are used outside the current tree. This
|
/// Contains a list of values that are used outside the current tree. This
|
||||||
// set must be reset between runs.
|
/// set must be reset between runs.
|
||||||
ValueSet MultiUserVals;
|
ValueSet MultiUserVals;
|
||||||
// Maps values in the tree to the vector lanes that uses them. This map must
|
/// Maps values in the tree to the vector lanes that uses them. This map must
|
||||||
// be reset between runs of getCost.
|
/// be reset between runs of getCost.
|
||||||
std::map<Value*, int> LaneMap;
|
std::map<Value*, int> LaneMap;
|
||||||
// A list of instructions to ignore while sinking
|
/// A list of instructions to ignore while sinking
|
||||||
// memory instructions. This map must be reset between runs of getCost.
|
/// memory instructions. This map must be reset between runs of getCost.
|
||||||
SmallPtrSet<Value *, 8> MemBarrierIgnoreList;
|
SmallPtrSet<Value *, 8> MemBarrierIgnoreList;
|
||||||
|
|
||||||
// -- containers that are used during vectorizeTree -- //
|
// -- Containers that are used during vectorizeTree -- //
|
||||||
// Maps between the first scalar to the vector. This map must be reset between
|
|
||||||
// runs.
|
/// Maps between the first scalar to the vector. This map must be reset
|
||||||
|
///between runs.
|
||||||
DenseMap<Value*, Value*> VectorizedValues;
|
DenseMap<Value*, Value*> VectorizedValues;
|
||||||
|
|
||||||
|
// -- Containers that are used after vectorization by the caller -- //
|
||||||
|
|
||||||
|
/// A list of instructions that are used when gathering scalars into vectors.
|
||||||
|
/// In many cases these instructions can be hoisted outside of the BB.
|
||||||
|
/// Iterating over this list is faster than calling LICM.
|
||||||
|
ValueList GatherInstructions;
|
||||||
|
|
||||||
// Analysis and block reference.
|
// Analysis and block reference.
|
||||||
BasicBlock *BB;
|
BasicBlock *BB;
|
||||||
ScalarEvolution *SE;
|
ScalarEvolution *SE;
|
||||||
|
59
test/Transforms/SLPVectorizer/X86/hoist.ll
Normal file
59
test/Transforms/SLPVectorizer/X86/hoist.ll
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
|
||||||
|
target triple = "i386-apple-macosx10.9.0"
|
||||||
|
|
||||||
|
;int foo(int *A, int n, int k) {
|
||||||
|
; for (int i=0; i < 10000; i+=4) {
|
||||||
|
; A[i] += n;
|
||||||
|
; A[i+1] += k;
|
||||||
|
; A[i+2] += n;
|
||||||
|
; A[i+3] += k;
|
||||||
|
; }
|
||||||
|
;}
|
||||||
|
|
||||||
|
; preheader:
|
||||||
|
;CHECK: entry
|
||||||
|
;CHECK-NEXT: insertelement
|
||||||
|
;CHECK-NEXT: insertelement
|
||||||
|
;CHECK-NEXT: insertelement
|
||||||
|
;CHECK-NEXT: insertelement
|
||||||
|
; loop body:
|
||||||
|
;CHECK: phi
|
||||||
|
;CHECK: load <4 x i32>
|
||||||
|
;CHECK: add <4 x i32>
|
||||||
|
;CHECK: store <4 x i32>
|
||||||
|
;CHECK: ret
|
||||||
|
define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %entry, %for.body
|
||||||
|
%i.024 = phi i32 [ 0, %entry ], [ %add10, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds i32* %A, i32 %i.024
|
||||||
|
%0 = load i32* %arrayidx, align 4
|
||||||
|
%add = add nsw i32 %0, %n
|
||||||
|
store i32 %add, i32* %arrayidx, align 4
|
||||||
|
%add121 = or i32 %i.024, 1
|
||||||
|
%arrayidx2 = getelementptr inbounds i32* %A, i32 %add121
|
||||||
|
%1 = load i32* %arrayidx2, align 4
|
||||||
|
%add3 = add nsw i32 %1, %k
|
||||||
|
store i32 %add3, i32* %arrayidx2, align 4
|
||||||
|
%add422 = or i32 %i.024, 2
|
||||||
|
%arrayidx5 = getelementptr inbounds i32* %A, i32 %add422
|
||||||
|
%2 = load i32* %arrayidx5, align 4
|
||||||
|
%add6 = add nsw i32 %2, %n
|
||||||
|
store i32 %add6, i32* %arrayidx5, align 4
|
||||||
|
%add723 = or i32 %i.024, 3
|
||||||
|
%arrayidx8 = getelementptr inbounds i32* %A, i32 %add723
|
||||||
|
%3 = load i32* %arrayidx8, align 4
|
||||||
|
%add9 = add nsw i32 %3, %k
|
||||||
|
store i32 %add9, i32* %arrayidx8, align 4
|
||||||
|
%add10 = add nsw i32 %i.024, 4
|
||||||
|
%cmp = icmp slt i32 %add10, 10000
|
||||||
|
br i1 %cmp, label %for.body, label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.body
|
||||||
|
ret i32 undef
|
||||||
|
}
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user