mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-03-04 10:30:30 +00:00
[LoadStoreVectorizer] Enable vectorization of stores in the presence of an aliasing load
Summary: The "getVectorizablePrefix" method would give up if it found an aliasing load for a store chain. In practice, the aliasing load can be treated as a memory barrier and all stores that precede it are a valid vectorizable prefix. Issue found by volkan in D26962. Testcase is a pruned version of the one in the original patch. Reviewers: jlebar, arsenm, tstellarAMD Subscribers: mzolotukhin, wdng, nhaehnle, anna, volkan, llvm-commits Differential Revision: https://reviews.llvm.org/D27008 llvm-svn: 287781
This commit is contained in:
parent
7277c2fa1f
commit
40a0781572
@ -477,10 +477,23 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
|
|||||||
|
|
||||||
// Loop until we find an instruction in ChainInstrs that we can't vectorize.
|
// Loop until we find an instruction in ChainInstrs that we can't vectorize.
|
||||||
unsigned ChainInstrIdx = 0;
|
unsigned ChainInstrIdx = 0;
|
||||||
|
Instruction *BarrierMemoryInstr = nullptr;
|
||||||
|
|
||||||
for (unsigned E = ChainInstrs.size(); ChainInstrIdx < E; ++ChainInstrIdx) {
|
for (unsigned E = ChainInstrs.size(); ChainInstrIdx < E; ++ChainInstrIdx) {
|
||||||
Instruction *ChainInstr = ChainInstrs[ChainInstrIdx];
|
Instruction *ChainInstr = ChainInstrs[ChainInstrIdx];
|
||||||
bool AliasFound = false;
|
|
||||||
|
// If a barrier memory instruction was found, chain instructions that follow
|
||||||
|
// will not be added to the valid prefix.
|
||||||
|
if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, ChainInstr))
|
||||||
|
break;
|
||||||
|
|
||||||
|
// Check (in BB order) if any instruction prevents ChainInstr from being
|
||||||
|
// vectorized. Find and store the first such "conflicting" instruction.
|
||||||
for (Instruction *MemInstr : MemoryInstrs) {
|
for (Instruction *MemInstr : MemoryInstrs) {
|
||||||
|
// If a barrier memory instruction was found, do not check past it.
|
||||||
|
if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, MemInstr))
|
||||||
|
break;
|
||||||
|
|
||||||
if (isa<LoadInst>(MemInstr) && isa<LoadInst>(ChainInstr))
|
if (isa<LoadInst>(MemInstr) && isa<LoadInst>(ChainInstr))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -508,12 +521,21 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
|
|||||||
<< " " << *ChainInstr << '\n'
|
<< " " << *ChainInstr << '\n'
|
||||||
<< " " << *getPointerOperand(ChainInstr) << '\n';
|
<< " " << *getPointerOperand(ChainInstr) << '\n';
|
||||||
});
|
});
|
||||||
AliasFound = true;
|
// Save this aliasing memory instruction as a barrier, but allow other
|
||||||
|
// instructions that precede the barrier to be vectorized with this one.
|
||||||
|
BarrierMemoryInstr = MemInstr;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (AliasFound)
|
// Continue the search only for store chains, since vectorizing stores that
|
||||||
|
// precede an aliasing load is valid. Conversely, vectorizing loads is valid
|
||||||
|
// up to an aliasing store, but should not pull loads from further down in
|
||||||
|
// the basic block.
|
||||||
|
if (IsLoadChain && BarrierMemoryInstr) {
|
||||||
|
// The BarrierMemoryInstr is a store that precedes ChainInstr.
|
||||||
|
assert(OBB.dominates(BarrierMemoryInstr, ChainInstr));
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find the largest prefix of Chain whose elements are all in
|
// Find the largest prefix of Chain whose elements are all in
|
||||||
|
@ -61,13 +61,11 @@ entry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
; Here we have four stores, with an aliasing load before the last one. We can
|
; Here we have four stores, with an aliasing load before the last one. We can
|
||||||
; vectorize the first two stores as <2 x float>, but this vectorized store must
|
; vectorize the first three stores as <3 x float>, but this vectorized store must
|
||||||
; be inserted at the location of the second scalar store, not the fourth one.
|
; be inserted at the location of the third scalar store, not the fourth one.
|
||||||
;
|
;
|
||||||
; CHECK-LABEL: @insert_store_point_alias
|
; CHECK-LABEL: @insert_store_point_alias
|
||||||
; CHECK: store <2 x float>
|
; CHECK: store <3 x float>
|
||||||
; CHECK: store float
|
|
||||||
; CHECK-SAME: %a.idx.2
|
|
||||||
; CHECK: load float, float addrspace(1)* %a.idx.2
|
; CHECK: load float, float addrspace(1)* %a.idx.2
|
||||||
; CHECK: store float
|
; CHECK: store float
|
||||||
; CHECK-SAME: %a.idx.3
|
; CHECK-SAME: %a.idx.3
|
||||||
|
@ -0,0 +1,58 @@
|
|||||||
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S -o - %s | FileCheck %s
|
||||||
|
|
||||||
|
; Check that, in the presence of an aliasing load, the stores preceding the
|
||||||
|
; aliasing load are safe to vectorize.
|
||||||
|
|
||||||
|
; CHECK-LABEL: store_vectorize_with_alias
|
||||||
|
; CHECK: store <4 x float>
|
||||||
|
; CHECK: load <4 x float>
|
||||||
|
; CHECK: store <4 x float>
|
||||||
|
|
||||||
|
; Function Attrs: nounwind
|
||||||
|
define void @store_vectorize_with_alias(i8 addrspace(1)* %a, i8 addrspace(1)* %b) #0 {
|
||||||
|
bb:
|
||||||
|
%tmp = bitcast i8 addrspace(1)* %b to float addrspace(1)*
|
||||||
|
%tmp1 = load float, float addrspace(1)* %tmp, align 4
|
||||||
|
|
||||||
|
%tmp2 = bitcast i8 addrspace(1)* %a to float addrspace(1)*
|
||||||
|
store float %tmp1, float addrspace(1)* %tmp2, align 4
|
||||||
|
%tmp3 = getelementptr i8, i8 addrspace(1)* %a, i64 4
|
||||||
|
%tmp4 = bitcast i8 addrspace(1)* %tmp3 to float addrspace(1)*
|
||||||
|
store float %tmp1, float addrspace(1)* %tmp4, align 4
|
||||||
|
%tmp5 = getelementptr i8, i8 addrspace(1)* %a, i64 8
|
||||||
|
%tmp6 = bitcast i8 addrspace(1)* %tmp5 to float addrspace(1)*
|
||||||
|
store float %tmp1, float addrspace(1)* %tmp6, align 4
|
||||||
|
%tmp7 = getelementptr i8, i8 addrspace(1)* %a, i64 12
|
||||||
|
%tmp8 = bitcast i8 addrspace(1)* %tmp7 to float addrspace(1)*
|
||||||
|
store float %tmp1, float addrspace(1)* %tmp8, align 4
|
||||||
|
|
||||||
|
%tmp9 = getelementptr i8, i8 addrspace(1)* %b, i64 16
|
||||||
|
%tmp10 = bitcast i8 addrspace(1)* %tmp9 to float addrspace(1)*
|
||||||
|
%tmp11 = load float, float addrspace(1)* %tmp10, align 4
|
||||||
|
%tmp12 = getelementptr i8, i8 addrspace(1)* %b, i64 20
|
||||||
|
%tmp13 = bitcast i8 addrspace(1)* %tmp12 to float addrspace(1)*
|
||||||
|
%tmp14 = load float, float addrspace(1)* %tmp13, align 4
|
||||||
|
%tmp15 = getelementptr i8, i8 addrspace(1)* %b, i64 24
|
||||||
|
%tmp16 = bitcast i8 addrspace(1)* %tmp15 to float addrspace(1)*
|
||||||
|
%tmp17 = load float, float addrspace(1)* %tmp16, align 4
|
||||||
|
%tmp18 = getelementptr i8, i8 addrspace(1)* %b, i64 28
|
||||||
|
%tmp19 = bitcast i8 addrspace(1)* %tmp18 to float addrspace(1)*
|
||||||
|
%tmp20 = load float, float addrspace(1)* %tmp19, align 4
|
||||||
|
|
||||||
|
%tmp21 = getelementptr i8, i8 addrspace(1)* %a, i64 16
|
||||||
|
%tmp22 = bitcast i8 addrspace(1)* %tmp21 to float addrspace(1)*
|
||||||
|
store float %tmp11, float addrspace(1)* %tmp22, align 4
|
||||||
|
%tmp23 = getelementptr i8, i8 addrspace(1)* %a, i64 20
|
||||||
|
%tmp24 = bitcast i8 addrspace(1)* %tmp23 to float addrspace(1)*
|
||||||
|
store float %tmp14, float addrspace(1)* %tmp24, align 4
|
||||||
|
%tmp25 = getelementptr i8, i8 addrspace(1)* %a, i64 24
|
||||||
|
%tmp26 = bitcast i8 addrspace(1)* %tmp25 to float addrspace(1)*
|
||||||
|
store float %tmp17, float addrspace(1)* %tmp26, align 4
|
||||||
|
%tmp27 = getelementptr i8, i8 addrspace(1)* %a, i64 28
|
||||||
|
%tmp28 = bitcast i8 addrspace(1)* %tmp27 to float addrspace(1)*
|
||||||
|
store float %tmp20, float addrspace(1)* %tmp28, align 4
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { argmemonly nounwind }
|
Loading…
x
Reference in New Issue
Block a user