mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-08 04:51:23 +00:00
R600/SI: Add subtarget feature to enable VGPR spilling for all shader types
This is disabled by default, but can be enabled with the subtarget feature: 'vgpr-spilling' git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226597 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5d96beaab5
commit
46846844ee
@ -92,6 +92,11 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
|
|||||||
"true",
|
"true",
|
||||||
"Support flat address space">;
|
"Support flat address space">;
|
||||||
|
|
||||||
|
def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
|
||||||
|
"EnableVGPRSpilling",
|
||||||
|
"true",
|
||||||
|
"Enable spilling of VGPRs to scratch memory">;
|
||||||
|
|
||||||
class SubtargetFeatureFetchLimit <string Value> :
|
class SubtargetFeatureFetchLimit <string Value> :
|
||||||
SubtargetFeature <"fetch"#Value,
|
SubtargetFeature <"fetch"#Value,
|
||||||
"TexVTXClauseSize",
|
"TexVTXClauseSize",
|
||||||
|
@ -423,6 +423,7 @@ static unsigned getRsrcReg(unsigned ShaderType) {
|
|||||||
|
|
||||||
void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
|
void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
|
||||||
const SIProgramInfo &KernelInfo) {
|
const SIProgramInfo &KernelInfo) {
|
||||||
|
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
|
||||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
unsigned RsrcReg = getRsrcReg(MFI->getShaderType());
|
unsigned RsrcReg = getRsrcReg(MFI->getShaderType());
|
||||||
|
|
||||||
@ -443,6 +444,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
|
|||||||
OutStreamer.EmitIntValue(RsrcReg, 4);
|
OutStreamer.EmitIntValue(RsrcReg, 4);
|
||||||
OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
|
OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
|
||||||
S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
|
S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
|
||||||
|
if (STM.isVGPRSpillingEnabled(MFI)) {
|
||||||
|
OutStreamer.EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
|
||||||
|
OutStreamer.EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (MFI->getShaderType() == ShaderType::PIXEL) {
|
if (MFI->getShaderType() == ShaderType::PIXEL) {
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include "R600MachineScheduler.h"
|
#include "R600MachineScheduler.h"
|
||||||
#include "SIISelLowering.h"
|
#include "SIISelLowering.h"
|
||||||
#include "SIInstrInfo.h"
|
#include "SIInstrInfo.h"
|
||||||
|
#include "SIMachineFunctionInfo.h"
|
||||||
#include "llvm/ADT/SmallString.h"
|
#include "llvm/ADT/SmallString.h"
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
@ -78,6 +79,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
|
|||||||
FlatAddressSpace(false), EnableIRStructurizer(true),
|
FlatAddressSpace(false), EnableIRStructurizer(true),
|
||||||
EnablePromoteAlloca(false), EnableIfCvt(true),
|
EnablePromoteAlloca(false), EnableIfCvt(true),
|
||||||
EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
|
EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
|
||||||
|
EnableVGPRSpilling(false),
|
||||||
DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
|
DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
|
||||||
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
||||||
64 * 16, // Maximum stack alignment (long16)
|
64 * 16, // Maximum stack alignment (long16)
|
||||||
@ -113,3 +115,8 @@ unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
|
|||||||
case SEA_ISLANDS: return 12;
|
case SEA_ISLANDS: return 12;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AMDGPUSubtarget::isVGPRSpillingEnabled(
|
||||||
|
const SIMachineFunctionInfo *MFI) const {
|
||||||
|
return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
|
||||||
|
}
|
||||||
|
@ -30,6 +30,8 @@
|
|||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
|
|
||||||
|
class SIMachineFunctionInfo;
|
||||||
|
|
||||||
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
|
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -63,6 +65,7 @@ private:
|
|||||||
unsigned WavefrontSize;
|
unsigned WavefrontSize;
|
||||||
bool CFALUBug;
|
bool CFALUBug;
|
||||||
int LocalMemorySize;
|
int LocalMemorySize;
|
||||||
|
bool EnableVGPRSpilling;
|
||||||
|
|
||||||
const DataLayout DL;
|
const DataLayout DL;
|
||||||
AMDGPUFrameLowering FrameLowering;
|
AMDGPUFrameLowering FrameLowering;
|
||||||
@ -224,6 +227,7 @@ public:
|
|||||||
bool isAmdHsaOS() const {
|
bool isAmdHsaOS() const {
|
||||||
return TargetTriple.getOS() == Triple::AMDHSA;
|
return TargetTriple.getOS() == Triple::AMDHSA;
|
||||||
}
|
}
|
||||||
|
bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // End namespace llvm
|
} // End namespace llvm
|
||||||
|
@ -163,5 +163,8 @@ namespace SIOutMods {
|
|||||||
#define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860
|
#define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860
|
||||||
#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
|
#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
|
||||||
|
|
||||||
|
#define R_0286E8_SPI_TMPRING_SIZE 0x0286E8
|
||||||
|
#define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12)
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -588,6 +588,12 @@ SDValue SITargetLowering::LowerFormalArguments(
|
|||||||
|
|
||||||
InVals.push_back(Val);
|
InVals.push_back(Val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Info->getShaderType() != ShaderType::COMPUTE) {
|
||||||
|
unsigned ScratchIdx = CCInfo.getFirstUnallocated(
|
||||||
|
AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs());
|
||||||
|
Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
|
||||||
|
}
|
||||||
return Chain;
|
return Chain;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -430,15 +430,6 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
|
|||||||
return AMDGPU::COPY;
|
return AMDGPU::COPY;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool shouldTryToSpillVGPRs(MachineFunction *MF) {
|
|
||||||
|
|
||||||
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
|
||||||
|
|
||||||
// FIXME: Implement spilling for other shader types.
|
|
||||||
return MFI->getShaderType() == ShaderType::COMPUTE;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||||
MachineBasicBlock::iterator MI,
|
MachineBasicBlock::iterator MI,
|
||||||
unsigned SrcReg, bool isKill,
|
unsigned SrcReg, bool isKill,
|
||||||
@ -462,7 +453,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
|||||||
case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
|
case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
|
||||||
case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
|
case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
|
||||||
}
|
}
|
||||||
} else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
|
} else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
|
||||||
MFI->setHasSpilledVGPRs();
|
MFI->setHasSpilledVGPRs();
|
||||||
|
|
||||||
switch(RC->getSize() * 8) {
|
switch(RC->getSize() * 8) {
|
||||||
@ -499,6 +490,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|||||||
const TargetRegisterClass *RC,
|
const TargetRegisterClass *RC,
|
||||||
const TargetRegisterInfo *TRI) const {
|
const TargetRegisterInfo *TRI) const {
|
||||||
MachineFunction *MF = MBB.getParent();
|
MachineFunction *MF = MBB.getParent();
|
||||||
|
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||||
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
|
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
|
||||||
DebugLoc DL = MBB.findDebugLoc(MI);
|
DebugLoc DL = MBB.findDebugLoc(MI);
|
||||||
int Opcode = -1;
|
int Opcode = -1;
|
||||||
@ -511,7 +503,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|||||||
case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
|
case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
|
||||||
case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
|
case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
|
||||||
}
|
}
|
||||||
} else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
|
} else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
|
||||||
switch(RC->getSize() * 8) {
|
switch(RC->getSize() * 8) {
|
||||||
case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
|
case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
|
||||||
case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
|
case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
|
||||||
|
@ -50,6 +50,7 @@ public:
|
|||||||
unsigned NumUserSGPRs;
|
unsigned NumUserSGPRs;
|
||||||
std::map<unsigned, unsigned> LaneVGPRs;
|
std::map<unsigned, unsigned> LaneVGPRs;
|
||||||
unsigned LDSWaveSpillSize;
|
unsigned LDSWaveSpillSize;
|
||||||
|
unsigned ScratchOffsetReg;
|
||||||
bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
|
bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
|
||||||
unsigned getTIDReg() const { return TIDReg; };
|
unsigned getTIDReg() const { return TIDReg; };
|
||||||
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
|
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
|
||||||
|
@ -424,6 +424,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
|
|||||||
case SIRegisterInfo::TGID_Z:
|
case SIRegisterInfo::TGID_Z:
|
||||||
return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2);
|
return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2);
|
||||||
case SIRegisterInfo::SCRATCH_WAVE_OFFSET:
|
case SIRegisterInfo::SCRATCH_WAVE_OFFSET:
|
||||||
|
if (MFI->getShaderType() != ShaderType::COMPUTE)
|
||||||
|
return MFI->ScratchOffsetReg;
|
||||||
return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);
|
return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);
|
||||||
case SIRegisterInfo::SCRATCH_PTR:
|
case SIRegisterInfo::SCRATCH_PTR:
|
||||||
return AMDGPU::SGPR2_SGPR3;
|
return AMDGPU::SGPR2_SGPR3;
|
||||||
|
Loading…
Reference in New Issue
Block a user