mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-27 11:55:49 +00:00
Extended LoadStoreVectorizer to vectorize subchains.
Summary: LSV used to abort vectorizing a chain for interleaved load/store accesses that alias. Allow a valid prefix of the chain to be vectorized, mark just the prefix and retry vectorizing the remaining chain. Reviewers: llvm-commits, jlebar, arsenm Subscribers: mzolotukhin Differential Revision: http://reviews.llvm.org/D22119 llvm-svn: 275317
This commit is contained in:
parent
c2773e97d2
commit
640a61cd8b
@ -106,11 +106,13 @@ private:
|
||||
std::pair<ArrayRef<Value *>, ArrayRef<Value *>>
|
||||
splitOddVectorElts(ArrayRef<Value *> Chain, unsigned ElementSizeBits);
|
||||
|
||||
/// Checks if there are any instructions which may affect the memory accessed
|
||||
/// in the chain between \p From and \p To. The elements of \p Chain should be
|
||||
/// all loads or all stores.
|
||||
bool isVectorizable(ArrayRef<Value *> Chain, BasicBlock::iterator From,
|
||||
BasicBlock::iterator To);
|
||||
/// Checks for instructions which may affect the memory accessed
|
||||
/// in the chain between \p From and \p To. Returns Index, where
|
||||
/// \p Chain[0, Index) is the largest vectorizable chain prefix.
|
||||
/// The elements of \p Chain should be all loads or all stores.
|
||||
unsigned getVectorizablePrefixEndIdx(ArrayRef<Value *> Chain,
|
||||
BasicBlock::iterator From,
|
||||
BasicBlock::iterator To);
|
||||
|
||||
/// Collects load and store instructions to vectorize.
|
||||
void collectInstructions(BasicBlock *BB);
|
||||
@ -123,10 +125,12 @@ private:
|
||||
bool vectorizeInstructions(ArrayRef<Value *> Instrs);
|
||||
|
||||
/// Vectorizes the load instructions in Chain.
|
||||
bool vectorizeLoadChain(ArrayRef<Value *> Chain);
|
||||
bool vectorizeLoadChain(ArrayRef<Value *> Chain,
|
||||
SmallPtrSet<Value *, 16> *InstructionsProcessed);
|
||||
|
||||
/// Vectorizes the store instructions in Chain.
|
||||
bool vectorizeStoreChain(ArrayRef<Value *> Chain);
|
||||
bool vectorizeStoreChain(ArrayRef<Value *> Chain,
|
||||
SmallPtrSet<Value *, 16> *InstructionsProcessed);
|
||||
|
||||
/// Check if this load/store access is misaligned accesses
|
||||
bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
|
||||
@ -421,50 +425,53 @@ Vectorizer::splitOddVectorElts(ArrayRef<Value *> Chain,
|
||||
return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
|
||||
}
|
||||
|
||||
bool Vectorizer::isVectorizable(ArrayRef<Value *> Chain,
|
||||
BasicBlock::iterator From,
|
||||
BasicBlock::iterator To) {
|
||||
unsigned Vectorizer::getVectorizablePrefixEndIdx(ArrayRef<Value *> Chain,
|
||||
BasicBlock::iterator From,
|
||||
BasicBlock::iterator To) {
|
||||
SmallVector<std::pair<Value *, unsigned>, 16> MemoryInstrs;
|
||||
SmallVector<std::pair<Value *, unsigned>, 16> ChainInstrs;
|
||||
|
||||
unsigned Idx = 0;
|
||||
for (auto I = From, E = To; I != E; ++I, ++Idx) {
|
||||
unsigned InstrIdx = 0;
|
||||
for (auto I = From; I != To; ++I, ++InstrIdx) {
|
||||
if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
|
||||
if (!is_contained(Chain, &*I))
|
||||
MemoryInstrs.push_back({&*I, Idx});
|
||||
MemoryInstrs.push_back({&*I, InstrIdx});
|
||||
else
|
||||
ChainInstrs.push_back({&*I, Idx});
|
||||
ChainInstrs.push_back({&*I, InstrIdx});
|
||||
} else if (I->mayHaveSideEffects()) {
|
||||
DEBUG(dbgs() << "LSV: Found side-effecting operation: " << *I << '\n');
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
assert(Chain.size() == ChainInstrs.size() &&
|
||||
"All instructions in the Chain must exist in [From, To).");
|
||||
|
||||
for (auto EntryMem : MemoryInstrs) {
|
||||
Value *V = EntryMem.first;
|
||||
unsigned VIdx = EntryMem.second;
|
||||
for (auto EntryChain : ChainInstrs) {
|
||||
Value *VV = EntryChain.first;
|
||||
unsigned VVIdx = EntryChain.second;
|
||||
if (isa<LoadInst>(V) && isa<LoadInst>(VV))
|
||||
unsigned ChainIdx = 0;
|
||||
for (auto EntryChain : ChainInstrs) {
|
||||
Value *ChainInstrValue = EntryChain.first;
|
||||
unsigned ChainInstrIdx = EntryChain.second;
|
||||
for (auto EntryMem : MemoryInstrs) {
|
||||
Value *MemInstrValue = EntryMem.first;
|
||||
unsigned MemInstrIdx = EntryMem.second;
|
||||
if (isa<LoadInst>(MemInstrValue) && isa<LoadInst>(ChainInstrValue))
|
||||
continue;
|
||||
|
||||
// We can ignore the alias as long as the load comes before the store,
|
||||
// because that means we won't be moving the load past the store to
|
||||
// vectorize it (the vectorized load is inserted at the location of the
|
||||
// first load in the chain).
|
||||
if (isa<StoreInst>(V) && isa<LoadInst>(VV) && VVIdx < VIdx)
|
||||
if (isa<StoreInst>(MemInstrValue) && isa<LoadInst>(ChainInstrValue) &&
|
||||
ChainInstrIdx < MemInstrIdx)
|
||||
continue;
|
||||
|
||||
// Same case, but in reverse.
|
||||
if (isa<LoadInst>(V) && isa<StoreInst>(VV) && VVIdx > VIdx)
|
||||
if (isa<LoadInst>(MemInstrValue) && isa<StoreInst>(ChainInstrValue) &&
|
||||
ChainInstrIdx > MemInstrIdx)
|
||||
continue;
|
||||
|
||||
Instruction *M0 = cast<Instruction>(V);
|
||||
Instruction *M1 = cast<Instruction>(VV);
|
||||
Instruction *M0 = cast<Instruction>(MemInstrValue);
|
||||
Instruction *M1 = cast<Instruction>(ChainInstrValue);
|
||||
|
||||
if (!AA.isNoAlias(MemoryLocation::get(M0), MemoryLocation::get(M1))) {
|
||||
DEBUG({
|
||||
@ -473,17 +480,17 @@ bool Vectorizer::isVectorizable(ArrayRef<Value *> Chain,
|
||||
|
||||
dbgs() << "LSV: Found alias.\n"
|
||||
" Aliasing instruction and pointer:\n"
|
||||
<< *V << " aliases " << *Ptr0 << '\n'
|
||||
<< *MemInstrValue << " aliases " << *Ptr0 << '\n'
|
||||
<< " Aliased instruction and pointer:\n"
|
||||
<< *VV << " aliases " << *Ptr1 << '\n';
|
||||
<< *ChainInstrValue << " aliases " << *Ptr1 << '\n';
|
||||
});
|
||||
|
||||
return false;
|
||||
return ChainIdx;
|
||||
}
|
||||
}
|
||||
ChainIdx++;
|
||||
}
|
||||
|
||||
return true;
|
||||
return Chain.size();
|
||||
}
|
||||
|
||||
void Vectorizer::collectInstructions(BasicBlock *BB) {
|
||||
@ -614,10 +621,19 @@ bool Vectorizer::vectorizeInstructions(ArrayRef<Value *> Instrs) {
|
||||
}
|
||||
|
||||
bool Changed = false;
|
||||
SmallPtrSet<Value *, 16> VectorizedValues;
|
||||
SmallPtrSet<Value *, 16> InstructionsProcessed;
|
||||
|
||||
for (int Head : Heads) {
|
||||
if (Tails.count(Head))
|
||||
if (InstructionsProcessed.count(Instrs[Head]))
|
||||
continue;
|
||||
bool longerChainExists = false;
|
||||
for (unsigned TIt = 0; TIt < Tails.size(); TIt++)
|
||||
if (Head == Tails[TIt] &&
|
||||
!InstructionsProcessed.count(Instrs[Heads[TIt]])) {
|
||||
longerChainExists = true;
|
||||
break;
|
||||
}
|
||||
if (longerChainExists)
|
||||
continue;
|
||||
|
||||
// We found an instr that starts a chain. Now follow the chain and try to
|
||||
@ -625,7 +641,7 @@ bool Vectorizer::vectorizeInstructions(ArrayRef<Value *> Instrs) {
|
||||
SmallVector<Value *, 16> Operands;
|
||||
int I = Head;
|
||||
while (I != -1 && (Tails.count(I) || Heads.count(I))) {
|
||||
if (VectorizedValues.count(Instrs[I]))
|
||||
if (InstructionsProcessed.count(Instrs[I]))
|
||||
break;
|
||||
|
||||
Operands.push_back(Instrs[I]);
|
||||
@ -634,20 +650,18 @@ bool Vectorizer::vectorizeInstructions(ArrayRef<Value *> Instrs) {
|
||||
|
||||
bool Vectorized = false;
|
||||
if (isa<LoadInst>(*Operands.begin()))
|
||||
Vectorized = vectorizeLoadChain(Operands);
|
||||
Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed);
|
||||
else
|
||||
Vectorized = vectorizeStoreChain(Operands);
|
||||
Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed);
|
||||
|
||||
// Mark the vectorized instructions so that we don't vectorize them again.
|
||||
if (Vectorized)
|
||||
VectorizedValues.insert(Operands.begin(), Operands.end());
|
||||
Changed |= Vectorized;
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
|
||||
bool Vectorizer::vectorizeStoreChain(
|
||||
ArrayRef<Value *> Chain, SmallPtrSet<Value *, 16> *InstructionsProcessed) {
|
||||
StoreInst *S0 = cast<StoreInst>(Chain[0]);
|
||||
|
||||
// If the vector has an int element, default to int for the whole load.
|
||||
@ -670,8 +684,28 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
|
||||
unsigned VF = VecRegSize / Sz;
|
||||
unsigned ChainSize = Chain.size();
|
||||
|
||||
if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2)
|
||||
if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
|
||||
InstructionsProcessed->insert(Chain.begin(), Chain.end());
|
||||
return false;
|
||||
}
|
||||
|
||||
BasicBlock::iterator First, Last;
|
||||
std::tie(First, Last) = getBoundaryInstrs(Chain);
|
||||
unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last);
|
||||
if (StopChain == 0) {
|
||||
// There exists a side effect instruction, no vectorization possible.
|
||||
InstructionsProcessed->insert(Chain.begin(), Chain.end());
|
||||
return false;
|
||||
}
|
||||
if (StopChain == 1) {
|
||||
// Failed after the first instruction. Discard it and try the smaller chain.
|
||||
InstructionsProcessed->insert(Chain.front());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update Chain to the valid vectorizable subchain.
|
||||
Chain = Chain.slice(0, StopChain);
|
||||
ChainSize = Chain.size();
|
||||
|
||||
// Store size should be 1B, 2B or multiple of 4B.
|
||||
// TODO: Target hook for size constraint?
|
||||
@ -680,11 +714,12 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
|
||||
DEBUG(dbgs() << "LSV: Size should be 1B, 2B "
|
||||
"or multiple of 4B. Splitting.\n");
|
||||
if (SzInBytes == 3)
|
||||
return vectorizeStoreChain(Chain.slice(0, ChainSize - 1));
|
||||
return vectorizeStoreChain(Chain.slice(0, ChainSize - 1),
|
||||
InstructionsProcessed);
|
||||
|
||||
auto Chains = splitOddVectorElts(Chain, Sz);
|
||||
return vectorizeStoreChain(Chains.first) |
|
||||
vectorizeStoreChain(Chains.second);
|
||||
return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
|
||||
vectorizeStoreChain(Chains.second, InstructionsProcessed);
|
||||
}
|
||||
|
||||
VectorType *VecTy;
|
||||
@ -700,8 +735,8 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
|
||||
if (ChainSize > VF) {
|
||||
DEBUG(dbgs() << "LSV: Vector factor is too big."
|
||||
" Creating two separate arrays.\n");
|
||||
return vectorizeStoreChain(Chain.slice(0, VF)) |
|
||||
vectorizeStoreChain(Chain.slice(VF));
|
||||
return vectorizeStoreChain(Chain.slice(0, VF), InstructionsProcessed) |
|
||||
vectorizeStoreChain(Chain.slice(VF), InstructionsProcessed);
|
||||
}
|
||||
|
||||
DEBUG({
|
||||
@ -710,6 +745,10 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
|
||||
V->dump();
|
||||
});
|
||||
|
||||
// We won't try again to vectorize the elements of the chain, regardless of
|
||||
// whether we succeed below.
|
||||
InstructionsProcessed->insert(Chain.begin(), Chain.end());
|
||||
|
||||
// Check alignment restrictions.
|
||||
unsigned Alignment = getAlignment(S0);
|
||||
|
||||
@ -729,12 +768,6 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
|
||||
}
|
||||
}
|
||||
|
||||
BasicBlock::iterator First, Last;
|
||||
std::tie(First, Last) = getBoundaryInstrs(Chain);
|
||||
|
||||
if (!isVectorizable(Chain, First, Last))
|
||||
return false;
|
||||
|
||||
// Set insert point.
|
||||
Builder.SetInsertPoint(&*Last);
|
||||
|
||||
@ -782,7 +815,8 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
|
||||
bool Vectorizer::vectorizeLoadChain(
|
||||
ArrayRef<Value *> Chain, SmallPtrSet<Value *, 16> *InstructionsProcessed) {
|
||||
LoadInst *L0 = cast<LoadInst>(Chain[0]);
|
||||
|
||||
// If the vector has an int element, default to int for the whole load.
|
||||
@ -805,8 +839,28 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
|
||||
unsigned VF = VecRegSize / Sz;
|
||||
unsigned ChainSize = Chain.size();
|
||||
|
||||
if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2)
|
||||
if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
|
||||
InstructionsProcessed->insert(Chain.begin(), Chain.end());
|
||||
return false;
|
||||
}
|
||||
|
||||
BasicBlock::iterator First, Last;
|
||||
std::tie(First, Last) = getBoundaryInstrs(Chain);
|
||||
unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last);
|
||||
if (StopChain == 0) {
|
||||
// There exists a side effect instruction, no vectorization possible.
|
||||
InstructionsProcessed->insert(Chain.begin(), Chain.end());
|
||||
return false;
|
||||
}
|
||||
if (StopChain == 1) {
|
||||
// Failed after the first instruction. Discard it and try the smaller chain.
|
||||
InstructionsProcessed->insert(Chain.front());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update Chain to the valid vectorizable subchain.
|
||||
Chain = Chain.slice(0, StopChain);
|
||||
ChainSize = Chain.size();
|
||||
|
||||
// Load size should be 1B, 2B or multiple of 4B.
|
||||
// TODO: Should size constraint be a target hook?
|
||||
@ -815,9 +869,11 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
|
||||
DEBUG(dbgs() << "LSV: Size should be 1B, 2B "
|
||||
"or multiple of 4B. Splitting.\n");
|
||||
if (SzInBytes == 3)
|
||||
return vectorizeLoadChain(Chain.slice(0, ChainSize - 1));
|
||||
return vectorizeLoadChain(Chain.slice(0, ChainSize - 1),
|
||||
InstructionsProcessed);
|
||||
auto Chains = splitOddVectorElts(Chain, Sz);
|
||||
return vectorizeLoadChain(Chains.first) | vectorizeLoadChain(Chains.second);
|
||||
return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
|
||||
vectorizeLoadChain(Chains.second, InstructionsProcessed);
|
||||
}
|
||||
|
||||
VectorType *VecTy;
|
||||
@ -833,10 +889,14 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
|
||||
if (ChainSize > VF) {
|
||||
DEBUG(dbgs() << "LSV: Vector factor is too big. "
|
||||
"Creating two separate arrays.\n");
|
||||
return vectorizeLoadChain(Chain.slice(0, VF)) |
|
||||
vectorizeLoadChain(Chain.slice(VF));
|
||||
return vectorizeLoadChain(Chain.slice(0, VF), InstructionsProcessed) |
|
||||
vectorizeLoadChain(Chain.slice(VF), InstructionsProcessed);
|
||||
}
|
||||
|
||||
// We won't try again to vectorize the elements of the chain, regardless of
|
||||
// whether we succeed below.
|
||||
InstructionsProcessed->insert(Chain.begin(), Chain.end());
|
||||
|
||||
// Check alignment restrictions.
|
||||
unsigned Alignment = getAlignment(L0);
|
||||
|
||||
@ -862,12 +922,6 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
|
||||
V->dump();
|
||||
});
|
||||
|
||||
BasicBlock::iterator First, Last;
|
||||
std::tie(First, Last) = getBoundaryInstrs(Chain);
|
||||
|
||||
if (!isVectorizable(Chain, First, Last))
|
||||
return false;
|
||||
|
||||
// Set insert point.
|
||||
Builder.SetInsertPoint(&*First);
|
||||
|
||||
|
@ -48,8 +48,7 @@ define void @interleave_3L_2S_1L(i32* noalias %ptr) {
|
||||
; CHECK-LABEL: @chain_suffix(
|
||||
; CHECK: load i32
|
||||
; CHECK: store <2 x i32>
|
||||
; CHECK: load i32
|
||||
; CHECK: load i32
|
||||
; CHECK: load <2 x i32>
|
||||
define void @chain_suffix(i32* noalias %ptr) {
|
||||
%next.gep = getelementptr i32, i32* %ptr, i64 0
|
||||
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
|
||||
@ -66,12 +65,9 @@ define void @chain_suffix(i32* noalias %ptr) {
|
||||
|
||||
|
||||
; CHECK-LABEL: @chain_prefix_suffix(
|
||||
; CHECK: load i32
|
||||
; CHECK: load i32
|
||||
; CHECK: load <2 x i32>
|
||||
; CHECK: store <2 x i32>
|
||||
; CHECK: load i32
|
||||
; CHECK: load i32
|
||||
; CHECK: load i32
|
||||
; CHECK: load <3 x i32>
|
||||
define void @chain_prefix_suffix(i32* noalias %ptr) {
|
||||
%next.gep = getelementptr i32, i32* %ptr, i64 0
|
||||
%next.gep1 = getelementptr i32, i32* %ptr, i64 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user