mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-04 01:11:44 +00:00
Revert "Revert "Fix merges of non-zero vector stores""
Reapply r239539. Don't assume the collected number of stores is the same vector size. Just take the first N stores to fill the vector. llvm-svn: 239825
This commit is contained in:
parent
134c99480b
commit
fbfa66ae3c
@ -388,6 +388,13 @@ namespace {
|
||||
unsigned SequenceNum;
|
||||
};
|
||||
|
||||
/// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
|
||||
/// constant build_vector of the stored constant values in Stores.
|
||||
SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
|
||||
SDLoc SL,
|
||||
ArrayRef<MemOpLink> Stores,
|
||||
EVT Ty) const;
|
||||
|
||||
/// This is a helper function for MergeConsecutiveStores. When the source
|
||||
/// elements of the consecutive stores are all constants or all extracted
|
||||
/// vector elements, try to merge them into one larger store.
|
||||
@ -10591,6 +10598,18 @@ struct BaseIndexOffset {
|
||||
};
|
||||
} // namespace
|
||||
|
||||
SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
|
||||
SDLoc SL,
|
||||
ArrayRef<MemOpLink> Stores,
|
||||
EVT Ty) const {
|
||||
SmallVector<SDValue, 8> BuildVector;
|
||||
|
||||
for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I)
|
||||
BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue());
|
||||
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector);
|
||||
}
|
||||
|
||||
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
|
||||
SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
|
||||
unsigned NumElem, bool IsConstantSrc, bool UseVector) {
|
||||
@ -10621,12 +10640,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
|
||||
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
|
||||
assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
|
||||
if (IsConstantSrc) {
|
||||
// A vector store with a constant source implies that the constant is
|
||||
// zero; we only handle merging stores of constant zeros because the zero
|
||||
// can be materialized without a load.
|
||||
// It may be beneficial to loosen this restriction to allow non-zero
|
||||
// store merging.
|
||||
StoredVal = DAG.getConstant(0, DL, Ty);
|
||||
StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty);
|
||||
} else {
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
for (unsigned i = 0; i < NumElem ; ++i) {
|
||||
|
@ -89,7 +89,11 @@ define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}merge_global_store_2_constants_f32_i32:
|
||||
; GCN: buffer_store_dwordx2
|
||||
; SI-DAG: s_mov_b32 [[SLO:s[0-9]+]], 4.0
|
||||
; SI-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b{{$}}
|
||||
; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[SLO]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[SHI]]
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
|
||||
define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 {
|
||||
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
|
||||
%out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
|
||||
@ -99,7 +103,11 @@ define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}merge_global_store_4_constants_i32:
|
||||
; GCN: buffer_store_dwordx4
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x14d{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x1c8{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x7b{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x4d2{{$}}
|
||||
; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 {
|
||||
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
|
||||
%out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
|
||||
@ -530,6 +538,95 @@ define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}merge_global_store_5_constants_i32:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 9{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HI4:[0-9]+]], -12{{$}}
|
||||
; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI4]]{{\]}}
|
||||
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 11{{$}}
|
||||
; GCN: buffer_store_dword v[[HI]]
|
||||
define void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) {
|
||||
store i32 9, i32 addrspace(1)* %out, align 4
|
||||
%idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
|
||||
store i32 12, i32 addrspace(1)* %idx1, align 4
|
||||
%idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
|
||||
store i32 16, i32 addrspace(1)* %idx2, align 4
|
||||
%idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
|
||||
store i32 -12, i32 addrspace(1)* %idx3, align 4
|
||||
%idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
|
||||
store i32 11, i32 addrspace(1)* %idx4, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}merge_global_store_6_constants_i32:
|
||||
; GCN: buffer_store_dwordx4
|
||||
; GCN: buffer_store_dwordx2
|
||||
define void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) {
|
||||
store i32 13, i32 addrspace(1)* %out, align 4
|
||||
%idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
|
||||
store i32 15, i32 addrspace(1)* %idx1, align 4
|
||||
%idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
|
||||
store i32 62, i32 addrspace(1)* %idx2, align 4
|
||||
%idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
|
||||
store i32 63, i32 addrspace(1)* %idx3, align 4
|
||||
%idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
|
||||
store i32 11, i32 addrspace(1)* %idx4, align 4
|
||||
%idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
|
||||
store i32 123, i32 addrspace(1)* %idx5, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}merge_global_store_7_constants_i32:
|
||||
; GCN: buffer_store_dwordx4
|
||||
; GCN: buffer_store_dwordx2
|
||||
; GCN: buffer_store_dword v
|
||||
define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) {
|
||||
store i32 34, i32 addrspace(1)* %out, align 4
|
||||
%idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
|
||||
store i32 999, i32 addrspace(1)* %idx1, align 4
|
||||
%idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
|
||||
store i32 65, i32 addrspace(1)* %idx2, align 4
|
||||
%idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
|
||||
store i32 33, i32 addrspace(1)* %idx3, align 4
|
||||
%idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
|
||||
store i32 98, i32 addrspace(1)* %idx4, align 4
|
||||
%idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
|
||||
store i32 91, i32 addrspace(1)* %idx5, align 4
|
||||
%idx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 6
|
||||
store i32 212, i32 addrspace(1)* %idx6, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}merge_global_store_8_constants_i32:
|
||||
; XGCN: buffer_store_dwordx4
|
||||
; XGCN: buffer_store_dwordx4
|
||||
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
|
||||
store i32 34, i32 addrspace(1)* %out, align 4
|
||||
%idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
|
||||
store i32 999, i32 addrspace(1)* %idx1, align 4
|
||||
%idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
|
||||
store i32 65, i32 addrspace(1)* %idx2, align 4
|
||||
%idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
|
||||
store i32 33, i32 addrspace(1)* %idx3, align 4
|
||||
%idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
|
||||
store i32 98, i32 addrspace(1)* %idx4, align 4
|
||||
%idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
|
||||
store i32 91, i32 addrspace(1)* %idx5, align 4
|
||||
%idx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 6
|
||||
store i32 212, i32 addrspace(1)* %idx6, align 4
|
||||
%idx7 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 7
|
||||
store i32 999, i32 addrspace(1)* %idx7, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.AMDGPU.barrier.local() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -3,6 +3,7 @@
|
||||
; CHECK: merge_stores_can
|
||||
; CHECK: callq foo
|
||||
; CHECK: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: movl 36(%rsp), %ebp
|
||||
; CHECK-NEXT: movups %xmm0
|
||||
; CHECK: callq foo
|
||||
; CHECK: ret
|
||||
|
Loading…
Reference in New Issue
Block a user