mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-16 07:49:43 +00:00
Update *_TMPRING_SIZE.WAVESIZE for GFX11
The encoding of COMPUTE_TMPRING_SIZE.WAVESIZE and SPI_TMPRING_SIZE.WAVESIZE has changed in GFX11: it is now in units of 64 dwords instead of 256 dwords, and the field has been widened from 13 bits to 15 bits. Depends on D126989 Reviewed By: rampitec, arsenm, #amdgpu Differential Revision: https://reviews.llvm.org/D127248
This commit is contained in:
parent
ed0288f7c4
commit
ff85d61a6e
@ -695,7 +695,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||
ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
|
||||
|
||||
const uint64_t MaxScratchPerWorkitem =
|
||||
GCNSubtarget::MaxWaveScratchSize / STM.getWavefrontSize();
|
||||
STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
|
||||
if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
|
||||
DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
|
||||
ProgInfo.ScratchSize,
|
||||
@ -879,15 +879,14 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||
ProgInfo.LDSBlocks =
|
||||
alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
|
||||
|
||||
// Scratch is allocated in 256 dword blocks.
|
||||
unsigned ScratchAlignShift = 10;
|
||||
// Scratch is allocated in 64-dword or 256-dword blocks.
|
||||
unsigned ScratchAlignShift =
|
||||
STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 8 : 10;
|
||||
// We need to program the hardware with the amount of scratch memory that
|
||||
// is used by the entire wave. ProgInfo.ScratchSize is the amount of
|
||||
// scratch memory used per thread.
|
||||
ProgInfo.ScratchBlocks =
|
||||
alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(),
|
||||
1ULL << ScratchAlignShift) >>
|
||||
ScratchAlignShift;
|
||||
ProgInfo.ScratchBlocks = divideCeil(
|
||||
ProgInfo.ScratchSize * STM.getWavefrontSize(), 1ULL << ScratchAlignShift);
|
||||
|
||||
if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
|
||||
ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
|
||||
@ -946,6 +945,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) {
|
||||
void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
|
||||
const SIProgramInfo &CurrentProgramInfo) {
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
||||
unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
|
||||
|
||||
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
|
||||
@ -957,7 +957,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
|
||||
OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc2);
|
||||
|
||||
OutStreamer->emitInt32(R_00B860_COMPUTE_TMPRING_SIZE);
|
||||
OutStreamer->emitInt32(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks));
|
||||
OutStreamer->emitInt32(
|
||||
STM.getGeneration() >= AMDGPUSubtarget::GFX11
|
||||
? S_00B860_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks)
|
||||
: S_00B860_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
|
||||
|
||||
// TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
|
||||
// 0" comment but I don't see a corresponding field in the register spec.
|
||||
@ -966,8 +969,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
|
||||
OutStreamer->emitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
|
||||
S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
|
||||
OutStreamer->emitInt32(R_0286E8_SPI_TMPRING_SIZE);
|
||||
OutStreamer->emitIntValue(
|
||||
S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
|
||||
OutStreamer->emitInt32(
|
||||
STM.getGeneration() >= AMDGPUSubtarget::GFX11
|
||||
? S_0286E8_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks)
|
||||
: S_0286E8_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
|
||||
}
|
||||
|
||||
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
|
||||
|
@ -201,9 +201,6 @@ private:
|
||||
SIFrameLowering FrameLowering;
|
||||
|
||||
public:
|
||||
// See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
|
||||
static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
|
||||
|
||||
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
const GCNTargetMachine &TM);
|
||||
~GCNSubtarget() override;
|
||||
@ -266,9 +263,19 @@ public:
|
||||
return (Generation)Gen;
|
||||
}
|
||||
|
||||
unsigned getMaxWaveScratchSize() const {
|
||||
// See COMPUTE_TMPRING_SIZE.WAVESIZE.
|
||||
if (getGeneration() < GFX11) {
|
||||
// 13-bit field in units of 256-dword.
|
||||
return (256 * 4) * ((1 << 13) - 1);
|
||||
}
|
||||
// 15-bit field in units of 64-dword.
|
||||
return (64 * 4) * ((1 << 15) - 1);
|
||||
}
|
||||
|
||||
/// Return the number of high bits known to be zero for a frame index.
|
||||
unsigned getKnownHighZeroBitsForFrameIndex() const {
|
||||
return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
|
||||
return countLeadingZeros(getMaxWaveScratchSize()) + getWavefrontSizeLog2();
|
||||
}
|
||||
|
||||
int getLDSBankCount() const {
|
||||
|
@ -1036,10 +1036,12 @@ enum Offset_COV5 : unsigned {
|
||||
#define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6)
|
||||
|
||||
#define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860
|
||||
#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
|
||||
#define S_00B860_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12)
|
||||
#define S_00B860_WAVESIZE_GFX11Plus(x) (((x) & 0x7FFF) << 12)
|
||||
|
||||
#define R_0286E8_SPI_TMPRING_SIZE 0x0286E8
|
||||
#define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12)
|
||||
#define S_0286E8_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12)
|
||||
#define S_0286E8_WAVESIZE_GFX11Plus(x) (((x) & 0x7FFF) << 12)
|
||||
|
||||
#define R_028B54_VGT_SHADER_STAGES_EN 0x028B54
|
||||
#define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21)
|
||||
|
@ -1,10 +1,15 @@
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
|
||||
|
||||
; Check SPI_TMPRING_SIZE.WAVESIZE = 5
|
||||
; SPI_TMPRING_SIZE.WAVESIZE = 5
|
||||
; GFX10: .long 165608
|
||||
; GFX10-NEXT: .long 20480
|
||||
|
||||
; SPI_TMPRING_SIZE.WAVESIZE = 17
|
||||
; GFX11: .long 165608
|
||||
; GFX11-NEXT: .long 69632
|
||||
|
||||
; GCN-LABEL: {{^}}scratch_ps:
|
||||
; GCN: s_load_dwordx2 s[4:5], s[0:1], 0x0{{$}}
|
||||
; GCN-DAG: s_mov_b32 s6, -1{{$}}
|
||||
|
Loading…
x
Reference in New Issue
Block a user