mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-27 07:12:06 +00:00
AMDGPU/SI: Set INDEX_STRIDE for scratch coalescing
Summary: Mesa and other users must set this to enable coalescing: - STRIDE = 0 - SWIZZLE_ENABLE = 1 This makes one particular compute shader 8x faster. Reviewers: tstellarAMD, arsenm Subscribers: arsenm, kzhuravl Differential Revision: http://reviews.llvm.org/D21136 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272556 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4133ef3efb
commit
760c36c5ae
@ -3095,7 +3095,9 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
|
||||
|
||||
uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
|
||||
|
||||
Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT);
|
||||
Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT) |
|
||||
// IndexStride = 64
|
||||
(UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT);
|
||||
|
||||
// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
|
||||
// Clear them unless we want a huge stride.
|
||||
|
@ -547,8 +547,9 @@ namespace AMDGPU {
|
||||
int getAtomicNoRetOp(uint16_t Opcode);
|
||||
|
||||
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
|
||||
const uint64_t RSRC_TID_ENABLE = 1LL << 55;
|
||||
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = 51;
|
||||
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
|
||||
const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
|
||||
const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
|
||||
} // End namespace AMDGPU
|
||||
|
||||
namespace SI {
|
||||
|
@ -12,8 +12,8 @@
|
||||
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
|
||||
; GCN-DAG: ; fixup A - offset: 4, value: SCRATCH_RSRC_DWORD1, kind: FK_Data_4
|
||||
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -1
|
||||
; CI-DAG: s_mov_b32 s{{[0-9]+}}, 0x88f000
|
||||
; VI-DAG: s_mov_b32 s{{[0-9]+}}, 0x880000
|
||||
; CI-DAG: s_mov_b32 s{{[0-9]+}}, 0xe8f000
|
||||
; VI-DAG: s_mov_b32 s{{[0-9]+}}, 0xe80000
|
||||
|
||||
|
||||
; GCNHSA: .amd_kernel_code_t
|
||||
|
@ -5,8 +5,8 @@
|
||||
; GCN-DAG: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
|
||||
; GCN-DAG: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
|
||||
; GCN-DAG: s_mov_b32 s10, -1
|
||||
; CI-DAG: s_mov_b32 s11, 0x88f000
|
||||
; VI-DAG: s_mov_b32 s11, 0x880000
|
||||
; CI-DAG: s_mov_b32 s11, 0xe8f000
|
||||
; VI-DAG: s_mov_b32 s11, 0xe80000
|
||||
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
|
||||
@ -26,8 +26,8 @@ define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 {
|
||||
; GCN-DAG: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
|
||||
; GCN-DAG: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
|
||||
; GCN-DAG: s_mov_b32 s10, -1
|
||||
; CI-DAG: s_mov_b32 s11, 0x88f000
|
||||
; VI-DAG: s_mov_b32 s11, 0x880000
|
||||
; CI-DAG: s_mov_b32 s11, 0xe8f000
|
||||
; VI-DAG: s_mov_b32 s11, 0xe80000
|
||||
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen
|
||||
|
@ -23,8 +23,8 @@
|
||||
; GCNMESA-DAG: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GCNMESA--DAG: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GCNMESA-DAG: s_mov_b32 s14, -1
|
||||
; SIMESA-DAG: s_mov_b32 s15, 0x88f000
|
||||
; VIMESA-DAG: s_mov_b32 s15, 0x880000
|
||||
; SIMESA-DAG: s_mov_b32 s15, 0xe8f000
|
||||
; VIMESA-DAG: s_mov_b32 s15, 0xe80000
|
||||
|
||||
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
|
||||
|
@ -16,8 +16,8 @@
|
||||
; GCN-DAG: s_mov_b32 s16, SCRATCH_RSRC_DWORD0
|
||||
; GCN-DAG: s_mov_b32 s17, SCRATCH_RSRC_DWORD1
|
||||
; GCN-DAG: s_mov_b32 s18, -1
|
||||
; SI-DAG: s_mov_b32 s19, 0x88f000
|
||||
; VI-DAG: s_mov_b32 s19, 0x880000
|
||||
; SI-DAG: s_mov_b32 s19, 0xe8f000
|
||||
; VI-DAG: s_mov_b32 s19, 0xe80000
|
||||
|
||||
; s13 is offset system SGPR
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, off, s[16:19], s13 offset:{{[0-9]+}} ; 16-byte Folded Spill
|
||||
|
Loading…
x
Reference in New Issue
Block a user