AMDGPU: Make AMDGPUMachineFunction fields private

ABIArgOffset is a problem because properly fsetting the KernArgSize requires that the reserved area before the real kernel arguments be correctly aligned, which requires fixing clover. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@276766 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-04 10:04:33 +00:00 · 2016-07-26 16:45:58 +00:00 · 2016-07-26 16:45:58 +00:00 · d506595769
commit d506595769
parent cc67a0a36a
10 changed files with 80 additions and 56 deletions
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@ -311,7 +311,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {

  if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
    OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
-    OutStreamer->EmitIntValue(alignTo(MFI->LDSSize, 4) >> 2, 4);
+    OutStreamer->EmitIntValue(alignTo(MFI->getLDSSize(), 4) >> 2, 4);
  }
 }

@ -494,10 +494,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
    Ctx.diagnose(Diag);
  }

-  if (MFI->LDSSize > static_cast<unsigned>(STM.getLocalMemorySize())) {
+  if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
    LLVMContext &Ctx = MF.getFunction()->getContext();
    DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "local memory",
-                                     MFI->LDSSize, DS_Error);
+                                     MFI->getLDSSize(), DS_Error);
    Ctx.diagnose(Diag);
  }

@ -531,7 +531,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
  unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
                          MFI->getMaximumWorkGroupSize(MF);

-  ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize;
+  ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
  ProgInfo.LDSBlocks =
      alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;

@ -707,7 +707,8 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
  if (STM.isXNACKEnabled())
    header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;

-  header.kernarg_segment_byte_size = MFI->ABIArgOffset;
+  // FIXME: Should use getKernArgSize
+  header.kernarg_segment_byte_size = MFI->getABIArgOffset();
  header.wavefront_sgpr_count = KernelInfo.NumSGPR;
  header.workitem_vgpr_count = KernelInfo.NumVGPR;
  header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@ -763,24 +763,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
    if (hasDefinedInitializer(GV))
      break;

-    unsigned Offset;
-    if (MFI->LocalMemoryObjects.count(GV) == 0) {
-      unsigned Align = GV->getAlignment();
-      if (Align == 0)
-        Align = DL.getABITypeAlignment(GV->getValueType());
-
-      /// TODO: We should sort these to minimize wasted space due to alignment
-      /// padding. Currently the padding is decided by the first encountered use
-      /// during lowering.
-      Offset = MFI->LDSSize = alignTo(MFI->LDSSize, Align);
-      MFI->LocalMemoryObjects[GV] = Offset;
-      MFI->LDSSize += DL.getTypeAllocSize(GV->getValueType());
-    } else {
-      Offset = MFI->LocalMemoryObjects[GV];
-    }
-
-    return DAG.getConstant(Offset, SDLoc(Op),
-                           getPointerTy(DL, AMDGPUAS::LOCAL_ADDRESS));
+    unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
+    return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
  }
  }

@ -2653,7 +2637,7 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,

 uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
    const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
-  uint64_t ArgOffset = MFI->ABIArgOffset;
+  uint64_t ArgOffset = MFI->getABIArgOffset();
  switch (Param) {
  case GRID_DIM:
    return ArgOffset;
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@ -1,23 +1,47 @@
+//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
 #include "AMDGPUMachineFunction.h"
+#include "AMDGPUSubtarget.h"

 using namespace llvm;

-// Pin the vtable to this file.
-void AMDGPUMachineFunction::anchor() {}
-
 AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
  MachineFunctionInfo(),
+  LocalMemoryObjects(),
  KernArgSize(0),
  MaxKernArgAlign(0),
  LDSSize(0),
  ABIArgOffset(0),
-  ScratchSize(0),
-  IsKernel(MF.getFunction()->getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL ||
-           MF.getFunction()->getCallingConv() == llvm::CallingConv::SPIR_KERNEL)
-{
+  IsKernel(MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_KERNEL ||
+           MF.getFunction()->getCallingConv() == CallingConv::SPIR_KERNEL) {
+  // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
+  // except reserved size is not correctly aligned.
 }

-bool AMDGPUMachineFunction::isKernel() const
-{
-  return IsKernel;
+unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
+                                                  const GlobalValue &GV) {
+  auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0));
+  if (!Entry.second)
+    return Entry.first->second;
+
+  unsigned Align = GV.getAlignment();
+  if (Align == 0)
+    Align = DL.getABITypeAlignment(GV.getValueType());
+
+  /// TODO: We should sort these to minimize wasted space due to alignment
+  /// padding. Currently the padding is decided by the first encountered use
+  /// during lowering.
+  unsigned Offset = LDSSize = alignTo(LDSSize, Align);
+
+  Entry.first->second = Offset;
+  LDSSize += DL.getTypeAllocSize(GV.getValueType());
+
+  return Offset;
 }
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@ -11,15 +11,26 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H

 #include "llvm/CodeGen/MachineFunction.h"
-#include <map>
+#include "llvm/ADT/DenseMap.h"

 namespace llvm {

 class AMDGPUMachineFunction : public MachineFunctionInfo {
+  /// A map to keep track of local memory objects and their offsets within the
+  /// local memory space.
+  SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
+
  uint64_t KernArgSize;
  unsigned MaxKernArgAlign;

-  virtual void anchor();
+  /// Number of bytes in the LDS that are being used.
+  unsigned LDSSize;
+
+  // FIXME: This should probably be removed.
+  /// Start of implicit kernel args
+  unsigned ABIArgOffset;
+
+  bool IsKernel;

 public:
  AMDGPUMachineFunction(const MachineFunction &MF);
@ -35,19 +46,27 @@ public:
    return Result;
  }

-  /// A map to keep track of local memory objects and their offsets within
-  /// the local memory space.
-  std::map<const GlobalValue *, unsigned> LocalMemoryObjects;
-  /// Number of bytes in the LDS that are being used.
-  unsigned LDSSize;
+  uint64_t getKernArgSize() const {
+    return KernArgSize;
+  }

-  /// Start of implicit kernel args
-  unsigned ABIArgOffset;
+  void setABIArgOffset(unsigned NewOffset) {
+    ABIArgOffset = NewOffset;
+  }

-  bool isKernel() const;
+  unsigned getABIArgOffset() const {
+    return ABIArgOffset;
+  }

-  unsigned ScratchSize;
-  bool IsKernel;
+  unsigned getLDSSize() const {
+    return LDSSize;
+  }
+
+  bool isKernel() const {
+    return IsKernel;
+  }
+
+  unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalValue &GV);
 };

 }
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@ -1756,7 +1756,7 @@ SDValue R600TargetLowering::LowerFormalArguments(

    unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
    unsigned PartOffset = VA.getLocMemOffset();
-    unsigned Offset = 36 + VA.getLocMemOffset();
+    unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset();

    MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
    SDValue Arg = DAG.getLoad(
@ -1767,7 +1767,7 @@ SDValue R600TargetLowering::LowerFormalArguments(

    // 4 is the preferred alignment for the CONSTANT memory space.
    InVals.push_back(Arg);
-    MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
+    MFI->setABIArgOffset(Offset + MemVT.getStoreSize());
  }
  return Chain;
 }
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@ -770,7 +770,7 @@ SDValue SITargetLowering::LowerFormalArguments(
      }

      InVals.push_back(Arg);
-      Info->ABIArgOffset = Offset + MemVT.getStoreSize();
+      Info->setABIArgOffset(Offset + MemVT.getStoreSize());
      continue;
    }
    assert(VA.isRegLoc() && "Parameter must be in a register!");
@ -1435,7 +1435,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
    DebugLoc DL = MI.getDebugLoc();
    BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
      .addOperand(MI.getOperand(0))
-      .addImm(MFI->LDSSize);
+      .addImm(MFI->getLDSSize());
    MI.eraseFromParent();
    return BB;
  }
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@ -807,7 +807,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
  }

  // Add FrameIndex to LDS offset
-  unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
+  unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
  BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
          .addImm(LDSOffset)
          .addReg(TIDReg);
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@ -516,7 +516,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
    }
  }

-  if (NeedFlat && MFI->IsKernel) {
+  if (NeedFlat && MFI->isKernel()) {
    // TODO: What to use with function calls?
    // We will need to Initialize the flat scratch register pair.
    if (NeedFlat)
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@ -26,9 +26,6 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
  cl::ReallyHidden,
  cl::init(true));

-// Pin the vtable to this file.
-void SIMachineFunctionInfo::anchor() {}
-
 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
  : AMDGPUMachineFunction(MF),
    TIDReg(AMDGPU::NoRegister),
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@ -28,7 +28,6 @@ class MachineRegisterInfo;
 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
  // FIXME: This should be removed and getPreloadedValue moved here.
  friend struct SIRegisterInfo;
-  void anchor() override;

  unsigned TIDReg;