mirror of
https://github.com/RPCSX/llvm.git
synced 2025-03-01 09:26:22 +00:00
AMDGPU/SI: Don't reserve XNACK when it's disabled
Summary: This frees 2 additional scalar registers. These are results from all of my 3 patches combined: Polaris: Spilled SGPRs: 2231 -> 1517 (-32.00 %) Tonga: Spilled SGPRs: 3829 -> 2608 (-31.89 %) Spilled VGPRs: 100 -> 84 (-16.00 %) Tonga even spills SGPRs via VGPRs to scratch. That's a compute shader limited to 64 VGPRs. Reviewers: tstellarAMD Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye Differential Revision: https://reviews.llvm.org/D27151 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289262 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
36d5f19e1d
commit
29e2dd8cf4
@ -73,6 +73,13 @@ def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
|
||||
"Support unaligned scratch loads and stores"
|
||||
>;
|
||||
|
||||
// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
|
||||
// XNACK. The current default kernel driver setting is:
|
||||
// - graphics ring: XNACK disabled
|
||||
// - compute ring: XNACK enabled
|
||||
//
|
||||
// If XNACK is enabled, the VMEM latency can be worse.
|
||||
// If XNACK is disabled, the 2 SGPRs can be used for general purposes.
|
||||
def FeatureXNACK : SubtargetFeature<"xnack",
|
||||
"EnableXNACK",
|
||||
"true",
|
||||
|
@ -1188,7 +1188,7 @@ unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST,
|
||||
return 4; // FLAT_SCRATCH, VCC (in that order)
|
||||
}
|
||||
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
if (ST.isXNACKEnabled())
|
||||
return 4; // XNACK, VCC (in that order)
|
||||
|
||||
return 2; // VCC.
|
||||
|
@ -9,10 +9,10 @@
|
||||
; FIXME: Should be ablo to skip this copying of the private segment
|
||||
; buffer because all the SGPR spills are to VGPRs.
|
||||
|
||||
; ALL: s_mov_b64 s[6:7], s[2:3]
|
||||
; ALL: s_mov_b64 s[4:5], s[0:1]
|
||||
; ALL: s_mov_b64 s[10:11], s[2:3]
|
||||
; ALL: s_mov_b64 s[8:9], s[0:1]
|
||||
; ALL: SGPRBlocks: 1
|
||||
; ALL: NumSGPRsForWavesPerEU: 12
|
||||
; ALL: NumSGPRsForWavesPerEU: 14
|
||||
define void @max_12_sgprs(i32 addrspace(1)* %out1,
|
||||
|
||||
i32 addrspace(1)* %out2,
|
||||
@ -46,9 +46,9 @@ define void @max_12_sgprs(i32 addrspace(1)* %out1,
|
||||
; TOSGPR: SGPRBlocks: 1
|
||||
; TOSGPR: NumSGPRsForWavesPerEU: 16
|
||||
|
||||
; TOSMEM: s_mov_b64 s[6:7], s[2:3]
|
||||
; TOSMEM: s_mov_b32 s9, s13
|
||||
; TOSMEM: s_mov_b64 s[4:5], s[0:1]
|
||||
; TOSMEM: s_mov_b64 s[10:11], s[2:3]
|
||||
; TOSMEM: s_mov_b64 s[8:9], s[0:1]
|
||||
; TOSMEM: s_mov_b32 s7, s13
|
||||
|
||||
; TOSMEM: SGPRBlocks: 1
|
||||
; TOSMEM: NumSGPRsForWavesPerEU: 16
|
||||
|
@ -1,5 +1,9 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
|
||||
|
||||
; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
|
||||
|
||||
; RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=stoney -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user