mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-04 10:04:33 +00:00
AMDGPU: Make AMDGPUMachineFunction fields private
ABIArgOffset is a problem because properly fsetting the KernArgSize requires that the reserved area before the real kernel arguments be correctly aligned, which requires fixing clover. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@276766 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
cc67a0a36a
commit
d506595769
@ -311,7 +311,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
|
||||
|
||||
if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
|
||||
OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
|
||||
OutStreamer->EmitIntValue(alignTo(MFI->LDSSize, 4) >> 2, 4);
|
||||
OutStreamer->EmitIntValue(alignTo(MFI->getLDSSize(), 4) >> 2, 4);
|
||||
}
|
||||
}
|
||||
|
||||
@ -494,10 +494,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||
Ctx.diagnose(Diag);
|
||||
}
|
||||
|
||||
if (MFI->LDSSize > static_cast<unsigned>(STM.getLocalMemorySize())) {
|
||||
if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
|
||||
LLVMContext &Ctx = MF.getFunction()->getContext();
|
||||
DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "local memory",
|
||||
MFI->LDSSize, DS_Error);
|
||||
MFI->getLDSSize(), DS_Error);
|
||||
Ctx.diagnose(Diag);
|
||||
}
|
||||
|
||||
@ -531,7 +531,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||
unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
|
||||
MFI->getMaximumWorkGroupSize(MF);
|
||||
|
||||
ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize;
|
||||
ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
|
||||
ProgInfo.LDSBlocks =
|
||||
alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
|
||||
|
||||
@ -707,7 +707,8 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
|
||||
if (STM.isXNACKEnabled())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
|
||||
|
||||
header.kernarg_segment_byte_size = MFI->ABIArgOffset;
|
||||
// FIXME: Should use getKernArgSize
|
||||
header.kernarg_segment_byte_size = MFI->getABIArgOffset();
|
||||
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
|
||||
header.workitem_vgpr_count = KernelInfo.NumVGPR;
|
||||
header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
|
||||
|
@ -763,24 +763,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
|
||||
if (hasDefinedInitializer(GV))
|
||||
break;
|
||||
|
||||
unsigned Offset;
|
||||
if (MFI->LocalMemoryObjects.count(GV) == 0) {
|
||||
unsigned Align = GV->getAlignment();
|
||||
if (Align == 0)
|
||||
Align = DL.getABITypeAlignment(GV->getValueType());
|
||||
|
||||
/// TODO: We should sort these to minimize wasted space due to alignment
|
||||
/// padding. Currently the padding is decided by the first encountered use
|
||||
/// during lowering.
|
||||
Offset = MFI->LDSSize = alignTo(MFI->LDSSize, Align);
|
||||
MFI->LocalMemoryObjects[GV] = Offset;
|
||||
MFI->LDSSize += DL.getTypeAllocSize(GV->getValueType());
|
||||
} else {
|
||||
Offset = MFI->LocalMemoryObjects[GV];
|
||||
}
|
||||
|
||||
return DAG.getConstant(Offset, SDLoc(Op),
|
||||
getPointerTy(DL, AMDGPUAS::LOCAL_ADDRESS));
|
||||
unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
|
||||
return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
|
||||
}
|
||||
}
|
||||
|
||||
@ -2653,7 +2637,7 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
|
||||
|
||||
uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
|
||||
const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
|
||||
uint64_t ArgOffset = MFI->ABIArgOffset;
|
||||
uint64_t ArgOffset = MFI->getABIArgOffset();
|
||||
switch (Param) {
|
||||
case GRID_DIM:
|
||||
return ArgOffset;
|
||||
|
@ -1,23 +1,47 @@
|
||||
//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUMachineFunction.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// Pin the vtable to this file.
|
||||
void AMDGPUMachineFunction::anchor() {}
|
||||
|
||||
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
|
||||
MachineFunctionInfo(),
|
||||
LocalMemoryObjects(),
|
||||
KernArgSize(0),
|
||||
MaxKernArgAlign(0),
|
||||
LDSSize(0),
|
||||
ABIArgOffset(0),
|
||||
ScratchSize(0),
|
||||
IsKernel(MF.getFunction()->getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL ||
|
||||
MF.getFunction()->getCallingConv() == llvm::CallingConv::SPIR_KERNEL)
|
||||
{
|
||||
IsKernel(MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_KERNEL ||
|
||||
MF.getFunction()->getCallingConv() == CallingConv::SPIR_KERNEL) {
|
||||
// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
|
||||
// except reserved size is not correctly aligned.
|
||||
}
|
||||
|
||||
bool AMDGPUMachineFunction::isKernel() const
|
||||
{
|
||||
return IsKernel;
|
||||
unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
|
||||
const GlobalValue &GV) {
|
||||
auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0));
|
||||
if (!Entry.second)
|
||||
return Entry.first->second;
|
||||
|
||||
unsigned Align = GV.getAlignment();
|
||||
if (Align == 0)
|
||||
Align = DL.getABITypeAlignment(GV.getValueType());
|
||||
|
||||
/// TODO: We should sort these to minimize wasted space due to alignment
|
||||
/// padding. Currently the padding is decided by the first encountered use
|
||||
/// during lowering.
|
||||
unsigned Offset = LDSSize = alignTo(LDSSize, Align);
|
||||
|
||||
Entry.first->second = Offset;
|
||||
LDSSize += DL.getTypeAllocSize(GV.getValueType());
|
||||
|
||||
return Offset;
|
||||
}
|
||||
|
@ -11,15 +11,26 @@
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
|
||||
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include <map>
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUMachineFunction : public MachineFunctionInfo {
|
||||
/// A map to keep track of local memory objects and their offsets within the
|
||||
/// local memory space.
|
||||
SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
|
||||
|
||||
uint64_t KernArgSize;
|
||||
unsigned MaxKernArgAlign;
|
||||
|
||||
virtual void anchor();
|
||||
/// Number of bytes in the LDS that are being used.
|
||||
unsigned LDSSize;
|
||||
|
||||
// FIXME: This should probably be removed.
|
||||
/// Start of implicit kernel args
|
||||
unsigned ABIArgOffset;
|
||||
|
||||
bool IsKernel;
|
||||
|
||||
public:
|
||||
AMDGPUMachineFunction(const MachineFunction &MF);
|
||||
@ -35,19 +46,27 @@ public:
|
||||
return Result;
|
||||
}
|
||||
|
||||
/// A map to keep track of local memory objects and their offsets within
|
||||
/// the local memory space.
|
||||
std::map<const GlobalValue *, unsigned> LocalMemoryObjects;
|
||||
/// Number of bytes in the LDS that are being used.
|
||||
unsigned LDSSize;
|
||||
uint64_t getKernArgSize() const {
|
||||
return KernArgSize;
|
||||
}
|
||||
|
||||
/// Start of implicit kernel args
|
||||
unsigned ABIArgOffset;
|
||||
void setABIArgOffset(unsigned NewOffset) {
|
||||
ABIArgOffset = NewOffset;
|
||||
}
|
||||
|
||||
bool isKernel() const;
|
||||
unsigned getABIArgOffset() const {
|
||||
return ABIArgOffset;
|
||||
}
|
||||
|
||||
unsigned ScratchSize;
|
||||
bool IsKernel;
|
||||
unsigned getLDSSize() const {
|
||||
return LDSSize;
|
||||
}
|
||||
|
||||
bool isKernel() const {
|
||||
return IsKernel;
|
||||
}
|
||||
|
||||
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalValue &GV);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1756,7 +1756,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||
|
||||
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
|
||||
unsigned PartOffset = VA.getLocMemOffset();
|
||||
unsigned Offset = 36 + VA.getLocMemOffset();
|
||||
unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset();
|
||||
|
||||
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
|
||||
SDValue Arg = DAG.getLoad(
|
||||
@ -1767,7 +1767,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||
|
||||
// 4 is the preferred alignment for the CONSTANT memory space.
|
||||
InVals.push_back(Arg);
|
||||
MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
|
||||
MFI->setABIArgOffset(Offset + MemVT.getStoreSize());
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
@ -770,7 +770,7 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
}
|
||||
|
||||
InVals.push_back(Arg);
|
||||
Info->ABIArgOffset = Offset + MemVT.getStoreSize();
|
||||
Info->setABIArgOffset(Offset + MemVT.getStoreSize());
|
||||
continue;
|
||||
}
|
||||
assert(VA.isRegLoc() && "Parameter must be in a register!");
|
||||
@ -1435,7 +1435,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
|
||||
.addOperand(MI.getOperand(0))
|
||||
.addImm(MFI->LDSSize);
|
||||
.addImm(MFI->getLDSSize());
|
||||
MI.eraseFromParent();
|
||||
return BB;
|
||||
}
|
||||
|
@ -807,7 +807,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
|
||||
}
|
||||
|
||||
// Add FrameIndex to LDS offset
|
||||
unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
|
||||
unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
|
||||
.addImm(LDSOffset)
|
||||
.addReg(TIDReg);
|
||||
|
@ -516,7 +516,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
|
||||
}
|
||||
}
|
||||
|
||||
if (NeedFlat && MFI->IsKernel) {
|
||||
if (NeedFlat && MFI->isKernel()) {
|
||||
// TODO: What to use with function calls?
|
||||
// We will need to Initialize the flat scratch register pair.
|
||||
if (NeedFlat)
|
||||
|
@ -26,9 +26,6 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
|
||||
cl::ReallyHidden,
|
||||
cl::init(true));
|
||||
|
||||
// Pin the vtable to this file.
|
||||
void SIMachineFunctionInfo::anchor() {}
|
||||
|
||||
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
||||
: AMDGPUMachineFunction(MF),
|
||||
TIDReg(AMDGPU::NoRegister),
|
||||
|
@ -28,7 +28,6 @@ class MachineRegisterInfo;
|
||||
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
|
||||
// FIXME: This should be removed and getPreloadedValue moved here.
|
||||
friend struct SIRegisterInfo;
|
||||
void anchor() override;
|
||||
|
||||
unsigned TIDReg;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user