AMDGPU/SI: Don't reserve XNACK when it's disabled

Summary:
This frees 2 additional scalar registers.

These are results from all of my 3 patches combined:

  Polaris:
    Spilled SGPRs: 2231 -> 1517 (-32.00 %)

  Tonga:
    Spilled SGPRs: 3829 -> 2608 (-31.89 %)
    Spilled VGPRs: 100 -> 84 (-16.00 %)

  Tonga even spills SGPRs via VGPRs to scratch. That's a compute shader
  limited to 64 VGPRs.

Reviewers: tstellarAMD

Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye

Differential Revision: https://reviews.llvm.org/D27151

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289262 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Marek Olsak 2016-12-09 19:49:54 +00:00
parent 36d5f19e1d
commit 29e2dd8cf4
4 changed files with 18 additions and 7 deletions

View File

@ -73,6 +73,13 @@ def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
"Support unaligned scratch loads and stores"
>;
// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
// XNACK. The current default kernel driver setting is:
// - graphics ring: XNACK disabled
// - compute ring: XNACK enabled
//
// If XNACK is enabled, the VMEM latency can be worse.
// If XNACK is disabled, the 2 SGPRs can be used for general purposes.
def FeatureXNACK : SubtargetFeature<"xnack",
"EnableXNACK",
"true",

View File

@ -1188,7 +1188,7 @@ unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST,
return 4; // FLAT_SCRATCH, VCC (in that order)
}
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
if (ST.isXNACKEnabled())
return 4; // XNACK, VCC (in that order)
return 2; // VCC.

View File

@ -9,10 +9,10 @@
; FIXME: Should be ablo to skip this copying of the private segment
; buffer because all the SGPR spills are to VGPRs.
; ALL: s_mov_b64 s[6:7], s[2:3]
; ALL: s_mov_b64 s[4:5], s[0:1]
; ALL: s_mov_b64 s[10:11], s[2:3]
; ALL: s_mov_b64 s[8:9], s[0:1]
; ALL: SGPRBlocks: 1
; ALL: NumSGPRsForWavesPerEU: 12
; ALL: NumSGPRsForWavesPerEU: 14
define void @max_12_sgprs(i32 addrspace(1)* %out1,
i32 addrspace(1)* %out2,
@ -46,9 +46,9 @@ define void @max_12_sgprs(i32 addrspace(1)* %out1,
; TOSGPR: SGPRBlocks: 1
; TOSGPR: NumSGPRsForWavesPerEU: 16
; TOSMEM: s_mov_b64 s[6:7], s[2:3]
; TOSMEM: s_mov_b32 s9, s13
; TOSMEM: s_mov_b64 s[4:5], s[0:1]
; TOSMEM: s_mov_b64 s[10:11], s[2:3]
; TOSMEM: s_mov_b64 s[8:9], s[0:1]
; TOSMEM: s_mov_b32 s7, s13
; TOSMEM: SGPRBlocks: 1
; TOSMEM: NumSGPRsForWavesPerEU: 16

View File

@ -1,5 +1,9 @@
; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=stoney -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s