Extended LoadStoreVectorizer to vectorize subchains.

Summary:
LSV used to abort vectorizing a chain for interleaved load/store accesses that alias.
Allow a valid prefix of the chain to be vectorized, mark just the prefix and retry vectorizing the remaining chain.

Reviewers: llvm-commits, jlebar, arsenm

Subscribers: mzolotukhin

Differential Revision: http://reviews.llvm.org/D22119

llvm-svn: 275317
This commit is contained in:
Alina Sbirlea 2016-07-13 21:20:01 +00:00
parent c2773e97d2
commit 640a61cd8b
2 changed files with 121 additions and 71 deletions

View File

@ -106,11 +106,13 @@ private:
std::pair<ArrayRef<Value *>, ArrayRef<Value *>>
splitOddVectorElts(ArrayRef<Value *> Chain, unsigned ElementSizeBits);
/// Checks if there are any instructions which may affect the memory accessed
/// in the chain between \p From and \p To. The elements of \p Chain should be
/// all loads or all stores.
bool isVectorizable(ArrayRef<Value *> Chain, BasicBlock::iterator From,
BasicBlock::iterator To);
/// Checks for instructions which may affect the memory accessed
/// in the chain between \p From and \p To. Returns Index, where
/// \p Chain[0, Index) is the largest vectorizable chain prefix.
/// The elements of \p Chain should be all loads or all stores.
unsigned getVectorizablePrefixEndIdx(ArrayRef<Value *> Chain,
BasicBlock::iterator From,
BasicBlock::iterator To);
/// Collects load and store instructions to vectorize.
void collectInstructions(BasicBlock *BB);
@ -123,10 +125,12 @@ private:
bool vectorizeInstructions(ArrayRef<Value *> Instrs);
/// Vectorizes the load instructions in Chain.
bool vectorizeLoadChain(ArrayRef<Value *> Chain);
bool vectorizeLoadChain(ArrayRef<Value *> Chain,
SmallPtrSet<Value *, 16> *InstructionsProcessed);
/// Vectorizes the store instructions in Chain.
bool vectorizeStoreChain(ArrayRef<Value *> Chain);
bool vectorizeStoreChain(ArrayRef<Value *> Chain,
SmallPtrSet<Value *, 16> *InstructionsProcessed);
/// Check if this load/store access is misaligned accesses
bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
@ -421,50 +425,53 @@ Vectorizer::splitOddVectorElts(ArrayRef<Value *> Chain,
return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
}
bool Vectorizer::isVectorizable(ArrayRef<Value *> Chain,
BasicBlock::iterator From,
BasicBlock::iterator To) {
unsigned Vectorizer::getVectorizablePrefixEndIdx(ArrayRef<Value *> Chain,
BasicBlock::iterator From,
BasicBlock::iterator To) {
SmallVector<std::pair<Value *, unsigned>, 16> MemoryInstrs;
SmallVector<std::pair<Value *, unsigned>, 16> ChainInstrs;
unsigned Idx = 0;
for (auto I = From, E = To; I != E; ++I, ++Idx) {
unsigned InstrIdx = 0;
for (auto I = From; I != To; ++I, ++InstrIdx) {
if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
if (!is_contained(Chain, &*I))
MemoryInstrs.push_back({&*I, Idx});
MemoryInstrs.push_back({&*I, InstrIdx});
else
ChainInstrs.push_back({&*I, Idx});
ChainInstrs.push_back({&*I, InstrIdx});
} else if (I->mayHaveSideEffects()) {
DEBUG(dbgs() << "LSV: Found side-effecting operation: " << *I << '\n');
return false;
return 0;
}
}
assert(Chain.size() == ChainInstrs.size() &&
"All instructions in the Chain must exist in [From, To).");
for (auto EntryMem : MemoryInstrs) {
Value *V = EntryMem.first;
unsigned VIdx = EntryMem.second;
for (auto EntryChain : ChainInstrs) {
Value *VV = EntryChain.first;
unsigned VVIdx = EntryChain.second;
if (isa<LoadInst>(V) && isa<LoadInst>(VV))
unsigned ChainIdx = 0;
for (auto EntryChain : ChainInstrs) {
Value *ChainInstrValue = EntryChain.first;
unsigned ChainInstrIdx = EntryChain.second;
for (auto EntryMem : MemoryInstrs) {
Value *MemInstrValue = EntryMem.first;
unsigned MemInstrIdx = EntryMem.second;
if (isa<LoadInst>(MemInstrValue) && isa<LoadInst>(ChainInstrValue))
continue;
// We can ignore the alias as long as the load comes before the store,
// because that means we won't be moving the load past the store to
// vectorize it (the vectorized load is inserted at the location of the
// first load in the chain).
if (isa<StoreInst>(V) && isa<LoadInst>(VV) && VVIdx < VIdx)
if (isa<StoreInst>(MemInstrValue) && isa<LoadInst>(ChainInstrValue) &&
ChainInstrIdx < MemInstrIdx)
continue;
// Same case, but in reverse.
if (isa<LoadInst>(V) && isa<StoreInst>(VV) && VVIdx > VIdx)
if (isa<LoadInst>(MemInstrValue) && isa<StoreInst>(ChainInstrValue) &&
ChainInstrIdx > MemInstrIdx)
continue;
Instruction *M0 = cast<Instruction>(V);
Instruction *M1 = cast<Instruction>(VV);
Instruction *M0 = cast<Instruction>(MemInstrValue);
Instruction *M1 = cast<Instruction>(ChainInstrValue);
if (!AA.isNoAlias(MemoryLocation::get(M0), MemoryLocation::get(M1))) {
DEBUG({
@ -473,17 +480,17 @@ bool Vectorizer::isVectorizable(ArrayRef<Value *> Chain,
dbgs() << "LSV: Found alias.\n"
" Aliasing instruction and pointer:\n"
<< *V << " aliases " << *Ptr0 << '\n'
<< *MemInstrValue << " aliases " << *Ptr0 << '\n'
<< " Aliased instruction and pointer:\n"
<< *VV << " aliases " << *Ptr1 << '\n';
<< *ChainInstrValue << " aliases " << *Ptr1 << '\n';
});
return false;
return ChainIdx;
}
}
ChainIdx++;
}
return true;
return Chain.size();
}
void Vectorizer::collectInstructions(BasicBlock *BB) {
@ -614,10 +621,19 @@ bool Vectorizer::vectorizeInstructions(ArrayRef<Value *> Instrs) {
}
bool Changed = false;
SmallPtrSet<Value *, 16> VectorizedValues;
SmallPtrSet<Value *, 16> InstructionsProcessed;
for (int Head : Heads) {
if (Tails.count(Head))
if (InstructionsProcessed.count(Instrs[Head]))
continue;
bool longerChainExists = false;
for (unsigned TIt = 0; TIt < Tails.size(); TIt++)
if (Head == Tails[TIt] &&
!InstructionsProcessed.count(Instrs[Heads[TIt]])) {
longerChainExists = true;
break;
}
if (longerChainExists)
continue;
// We found an instr that starts a chain. Now follow the chain and try to
@ -625,7 +641,7 @@ bool Vectorizer::vectorizeInstructions(ArrayRef<Value *> Instrs) {
SmallVector<Value *, 16> Operands;
int I = Head;
while (I != -1 && (Tails.count(I) || Heads.count(I))) {
if (VectorizedValues.count(Instrs[I]))
if (InstructionsProcessed.count(Instrs[I]))
break;
Operands.push_back(Instrs[I]);
@ -634,20 +650,18 @@ bool Vectorizer::vectorizeInstructions(ArrayRef<Value *> Instrs) {
bool Vectorized = false;
if (isa<LoadInst>(*Operands.begin()))
Vectorized = vectorizeLoadChain(Operands);
Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed);
else
Vectorized = vectorizeStoreChain(Operands);
Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed);
// Mark the vectorized instructions so that we don't vectorize them again.
if (Vectorized)
VectorizedValues.insert(Operands.begin(), Operands.end());
Changed |= Vectorized;
}
return Changed;
}
bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
bool Vectorizer::vectorizeStoreChain(
ArrayRef<Value *> Chain, SmallPtrSet<Value *, 16> *InstructionsProcessed) {
StoreInst *S0 = cast<StoreInst>(Chain[0]);
// If the vector has an int element, default to int for the whole load.
@ -670,8 +684,28 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
unsigned VF = VecRegSize / Sz;
unsigned ChainSize = Chain.size();
if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2)
if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
InstructionsProcessed->insert(Chain.begin(), Chain.end());
return false;
}
BasicBlock::iterator First, Last;
std::tie(First, Last) = getBoundaryInstrs(Chain);
unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last);
if (StopChain == 0) {
// There exists a side effect instruction, no vectorization possible.
InstructionsProcessed->insert(Chain.begin(), Chain.end());
return false;
}
if (StopChain == 1) {
// Failed after the first instruction. Discard it and try the smaller chain.
InstructionsProcessed->insert(Chain.front());
return false;
}
// Update Chain to the valid vectorizable subchain.
Chain = Chain.slice(0, StopChain);
ChainSize = Chain.size();
// Store size should be 1B, 2B or multiple of 4B.
// TODO: Target hook for size constraint?
@ -680,11 +714,12 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
DEBUG(dbgs() << "LSV: Size should be 1B, 2B "
"or multiple of 4B. Splitting.\n");
if (SzInBytes == 3)
return vectorizeStoreChain(Chain.slice(0, ChainSize - 1));
return vectorizeStoreChain(Chain.slice(0, ChainSize - 1),
InstructionsProcessed);
auto Chains = splitOddVectorElts(Chain, Sz);
return vectorizeStoreChain(Chains.first) |
vectorizeStoreChain(Chains.second);
return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
vectorizeStoreChain(Chains.second, InstructionsProcessed);
}
VectorType *VecTy;
@ -700,8 +735,8 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
if (ChainSize > VF) {
DEBUG(dbgs() << "LSV: Vector factor is too big."
" Creating two separate arrays.\n");
return vectorizeStoreChain(Chain.slice(0, VF)) |
vectorizeStoreChain(Chain.slice(VF));
return vectorizeStoreChain(Chain.slice(0, VF), InstructionsProcessed) |
vectorizeStoreChain(Chain.slice(VF), InstructionsProcessed);
}
DEBUG({
@ -710,6 +745,10 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
V->dump();
});
// We won't try again to vectorize the elements of the chain, regardless of
// whether we succeed below.
InstructionsProcessed->insert(Chain.begin(), Chain.end());
// Check alignment restrictions.
unsigned Alignment = getAlignment(S0);
@ -729,12 +768,6 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
}
}
BasicBlock::iterator First, Last;
std::tie(First, Last) = getBoundaryInstrs(Chain);
if (!isVectorizable(Chain, First, Last))
return false;
// Set insert point.
Builder.SetInsertPoint(&*Last);
@ -782,7 +815,8 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
return true;
}
bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
bool Vectorizer::vectorizeLoadChain(
ArrayRef<Value *> Chain, SmallPtrSet<Value *, 16> *InstructionsProcessed) {
LoadInst *L0 = cast<LoadInst>(Chain[0]);
// If the vector has an int element, default to int for the whole load.
@ -805,8 +839,28 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
unsigned VF = VecRegSize / Sz;
unsigned ChainSize = Chain.size();
if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2)
if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
InstructionsProcessed->insert(Chain.begin(), Chain.end());
return false;
}
BasicBlock::iterator First, Last;
std::tie(First, Last) = getBoundaryInstrs(Chain);
unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last);
if (StopChain == 0) {
// There exists a side effect instruction, no vectorization possible.
InstructionsProcessed->insert(Chain.begin(), Chain.end());
return false;
}
if (StopChain == 1) {
// Failed after the first instruction. Discard it and try the smaller chain.
InstructionsProcessed->insert(Chain.front());
return false;
}
// Update Chain to the valid vectorizable subchain.
Chain = Chain.slice(0, StopChain);
ChainSize = Chain.size();
// Load size should be 1B, 2B or multiple of 4B.
// TODO: Should size constraint be a target hook?
@ -815,9 +869,11 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
DEBUG(dbgs() << "LSV: Size should be 1B, 2B "
"or multiple of 4B. Splitting.\n");
if (SzInBytes == 3)
return vectorizeLoadChain(Chain.slice(0, ChainSize - 1));
return vectorizeLoadChain(Chain.slice(0, ChainSize - 1),
InstructionsProcessed);
auto Chains = splitOddVectorElts(Chain, Sz);
return vectorizeLoadChain(Chains.first) | vectorizeLoadChain(Chains.second);
return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
vectorizeLoadChain(Chains.second, InstructionsProcessed);
}
VectorType *VecTy;
@ -833,10 +889,14 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
if (ChainSize > VF) {
DEBUG(dbgs() << "LSV: Vector factor is too big. "
"Creating two separate arrays.\n");
return vectorizeLoadChain(Chain.slice(0, VF)) |
vectorizeLoadChain(Chain.slice(VF));
return vectorizeLoadChain(Chain.slice(0, VF), InstructionsProcessed) |
vectorizeLoadChain(Chain.slice(VF), InstructionsProcessed);
}
// We won't try again to vectorize the elements of the chain, regardless of
// whether we succeed below.
InstructionsProcessed->insert(Chain.begin(), Chain.end());
// Check alignment restrictions.
unsigned Alignment = getAlignment(L0);
@ -862,12 +922,6 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
V->dump();
});
BasicBlock::iterator First, Last;
std::tie(First, Last) = getBoundaryInstrs(Chain);
if (!isVectorizable(Chain, First, Last))
return false;
// Set insert point.
Builder.SetInsertPoint(&*First);

View File

@ -48,8 +48,7 @@ define void @interleave_3L_2S_1L(i32* noalias %ptr) {
; CHECK-LABEL: @chain_suffix(
; CHECK: load i32
; CHECK: store <2 x i32>
; CHECK: load i32
; CHECK: load i32
; CHECK: load <2 x i32>
define void @chain_suffix(i32* noalias %ptr) {
%next.gep = getelementptr i32, i32* %ptr, i64 0
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
@ -66,12 +65,9 @@ define void @chain_suffix(i32* noalias %ptr) {
; CHECK-LABEL: @chain_prefix_suffix(
; CHECK: load i32
; CHECK: load i32
; CHECK: load <2 x i32>
; CHECK: store <2 x i32>
; CHECK: load i32
; CHECK: load i32
; CHECK: load i32
; CHECK: load <3 x i32>
define void @chain_prefix_suffix(i32* noalias %ptr) {
%next.gep = getelementptr i32, i32* %ptr, i64 0
%next.gep1 = getelementptr i32, i32* %ptr, i64 1