AMDGPU: Buffer descriptor changes for GFX9

Reviewers: arsenm

Subscribers: qcolombet, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, dstuttard, tpr

Differential Revision: https://reviews.llvm.org/D31158

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298397 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Marek Olsak 2017-03-21 17:00:39 +00:00
parent fe4c8daa0f
commit 1f6c4f9203
5 changed files with 23 additions and 8 deletions

View File

@ -3693,10 +3693,13 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
if (ST.isAmdHsaOS()) {
RsrcDataFormat |= (1ULL << 56);
// Set ATC = 1. GFX9 doesn't have this bit.
if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS)
RsrcDataFormat |= (1ULL << 56);
if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
// Set MTYPE = 2
// Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this.
// BTW, it disables TC L2 and therefore decreases performance.
if (ST.getGeneration() == SISubtarget::VOLCANIC_ISLANDS)
RsrcDataFormat |= (2ULL << 59);
}
@ -3708,11 +3711,14 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
AMDGPU::RSRC_TID_ENABLE |
0xffffffff; // Size;
uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
// GFX9 doesn't have ELEMENT_SIZE.
if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) {
uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
}
Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT) |
// IndexStride = 64
(UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT);
// IndexStride = 64.
Rsrc23 |= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
// Clear them unless we want a huge stride.

View File

@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s
; RUN: llc -march=amdgcn -mcpu=carrizo --show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
; RUN: llc -march=amdgcn -mcpu=gfx900 --show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=ALL %s
; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa < %s -mattr=-flat-for-global | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s
; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s
@ -14,6 +15,7 @@
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -1
; CI-DAG: s_mov_b32 s{{[0-9]+}}, 0xe8f000
; VI-DAG: s_mov_b32 s{{[0-9]+}}, 0xe80000
; GFX9-DAG: s_mov_b32 s{{[0-9]+}}, 0xe00000
; GCNHSA: .amd_kernel_code_t

View File

@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s
; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=ALL %s
; ALL-LABEL: {{^}}large_alloca_pixel_shader:
; GCN-DAG: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
@ -7,6 +8,7 @@
; GCN-DAG: s_mov_b32 s10, -1
; CI-DAG: s_mov_b32 s11, 0xe8f000
; VI-DAG: s_mov_b32 s11, 0xe80000
; GFX9-DAG: s_mov_b32 s11, 0xe00000
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
@ -28,6 +30,7 @@ define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 {
; GCN-DAG: s_mov_b32 s10, -1
; CI-DAG: s_mov_b32 s11, 0xe8f000
; VI-DAG: s_mov_b32 s11, 0xe80000
; GFX9-DAG: s_mov_b32 s11, 0xe00000
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen

View File

@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s
; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s
; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s
@ -21,10 +22,11 @@
; GCNMESA-DAG: s_mov_b32 s16, s3
; GCNMESA-DAG: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GCNMESA--DAG: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GCNMESA-DAG: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GCNMESA-DAG: s_mov_b32 s14, -1
; SIMESA-DAG: s_mov_b32 s15, 0xe8f000
; VIMESA-DAG: s_mov_b32 s15, 0xe80000
; GFX9MESA-DAG: s_mov_b32 s15, 0xe00000
; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill

View File

@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; This ends up using all 255 registers and requires register
; scavenging which will fail to find an unsued register.
@ -18,6 +19,7 @@
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -1
; SI-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe8f000
; VI-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe80000
; GFX9-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe00000
; OFFREG is offset system SGPR
; GCN: buffer_store_dword {{v[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], s[[OFFREG]] offset:{{[0-9]+}} ; 4-byte Folded Spill