AMDGPU/SI: Add new target attribute InitialPSInputAddr

Summary:
This allows Mesa to pass initial SPI_PS_INPUT_ADDR to LLVM.
The register assigns VGPR locations to PS inputs, while the ENA register
determines whether or not they are loaded.

Mesa needs to set some inputs as not-movable, so that a pixel shader prolog
binary appended at the beginning can assume where some inputs are.

v2: Make PSInputAddr private, because there is never enough silly getters
    and setters for people to read.

Reviewers: tstellarAMD, arsenm

Subscribers: arsenm

Differential Revision: http://reviews.llvm.org/D16030

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257591 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Marek Olsak 2016-01-13 11:45:36 +00:00
parent bc61f352af
commit d2b0d84d0f
7 changed files with 55 additions and 15 deletions

View File

@ -587,7 +587,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
OutStreamer->EmitIntValue(MFI->PSInputAddr, 4);
OutStreamer->EmitIntValue(MFI->PSInputEna, 4);
OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
}
}

View File

@ -137,7 +137,7 @@ namespace SIOutMods {
#define C_00B84C_EXCP_EN
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
#define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0
#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
#define S_00B848_VGPRS(x) (((x) & 0x3F) << 0)

View File

@ -601,14 +601,18 @@ SDValue SITargetLowering::LowerFormalArguments(
assert((PSInputNum <= 15) && "Too many PS inputs!");
if (!Arg.Used) {
if (!Arg.Used && !Info->isPSInputAllocated(PSInputNum)) {
// We can safely skip PS inputs
Skipped.set(i);
++PSInputNum;
continue;
}
Info->PSInputAddr |= 1 << PSInputNum++;
Info->markPSInputAllocated(PSInputNum);
if (Arg.Used)
Info->PSInputEna |= 1 << PSInputNum;
++PSInputNum;
}
// Second split vertices into their elements
@ -638,11 +642,18 @@ SDValue SITargetLowering::LowerFormalArguments(
*DAG.getContext());
// At least one interpolation mode must be enabled or else the GPU will hang.
//
// Check PSInputAddr instead of PSInputEna. The idea is that if the user set
// PSInputAddr, the user wants to enable some bits after the compilation
// based on run-time states. Since we can't know what the final PSInputEna
// will look like, so we shouldn't do anything here and the user should take
// responsibility for the correct programming.
if (Info->getShaderType() == ShaderType::PIXEL &&
(Info->PSInputAddr & 0x7F) == 0) {
Info->PSInputAddr |= 1;
(Info->getPSInputAddr() & 0x7F) == 0) {
CCInfo.AllocateReg(AMDGPU::VGPR0);
CCInfo.AllocateReg(AMDGPU::VGPR1);
Info->markPSInputAllocated(0);
Info->PSInputEna |= 1;
}
if (Info->getShaderType() == ShaderType::COMPUTE) {

View File

@ -46,8 +46,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
LDSWaveSpillSize(0),
PSInputAddr(0),
LDSWaveSpillSize(0),
PSInputEna(0),
NumUserSGPRs(0),
NumSystemSGPRs(0),
HasSpilledSGPRs(false),
@ -72,6 +73,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
const Function *F = MF.getFunction();
PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
if (getShaderType() == ShaderType::COMPUTE)

View File

@ -57,10 +57,13 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction {
unsigned WorkGroupInfoSystemSGPR;
unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
// Graphics info.
unsigned PSInputAddr;
public:
// FIXME: Make private
unsigned LDSWaveSpillSize;
unsigned PSInputAddr;
unsigned PSInputEna;
std::map<unsigned, unsigned> LaneVGPRs;
unsigned ScratchOffsetReg;
unsigned NumUserSGPRs;
@ -273,6 +276,18 @@ public:
HasSpilledVGPRs = Spill;
}
unsigned getPSInputAddr() const {
return PSInputAddr;
}
bool isPSInputAllocated(unsigned Index) const {
return PSInputAddr & (1 << Index);
}
void markPSInputAllocated(unsigned Index) {
PSInputAddr |= 1 << Index;
}
unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
};

View File

@ -106,20 +106,27 @@ bool isReadOnlySegment(const GlobalValue *GV) {
return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
}
static const char ShaderTypeAttribute[] = "ShaderType";
unsigned getShaderType(const Function &F) {
Attribute A = F.getFnAttribute(ShaderTypeAttribute);
unsigned ShaderType = ShaderType::COMPUTE;
static unsigned getIntegerAttribute(const Function &F, const char *Name,
unsigned Default) {
Attribute A = F.getFnAttribute(Name);
unsigned Result = Default;
if (A.isStringAttribute()) {
StringRef Str = A.getValueAsString();
if (Str.getAsInteger(0, ShaderType)) {
if (Str.getAsInteger(0, Result)) {
LLVMContext &Ctx = F.getContext();
Ctx.emitError("can't parse shader type");
}
}
return ShaderType;
return Result;
}
unsigned getShaderType(const Function &F) {
return getIntegerAttribute(F, "ShaderType", ShaderType::COMPUTE);
}
unsigned getInitialPSInputAddr(const Function &F) {
return getIntegerAttribute(F, "InitialPSInputAddr", 0);
}
bool isSI(const MCSubtargetInfo &STI) {

View File

@ -45,6 +45,8 @@ bool isGlobalSegment(const GlobalValue *GV);
bool isReadOnlySegment(const GlobalValue *GV);
unsigned getShaderType(const Function &F);
unsigned getInitialPSInputAddr(const Function &F);
bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);