mirror of
https://github.com/RPCS3/llvm.git
synced 2026-01-31 01:25:19 +01:00
We have too many mechanisms for tracking the various offsets used for kernel arguments, so remove one. There's still a lot of confusion with these because there are two different "implicit" argument areas located at the beginning and end of the kernarg segment. Additionally, the offset was determined based on the memory size of the split element types. This would break in a future commit where v3i32 is decomposed into separate i32 pieces. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@335830 91177308-0d34-0410-b5e6-96231b3b80d8
91 lines
2.1 KiB
C++
91 lines
2.1 KiB
C++
//===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
|
|
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
|
|
|
|
#include "llvm/ADT/DenseMap.h"
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
|
|
namespace llvm {
|
|
|
|
class AMDGPUSubtarget;
|
|
|
|
class AMDGPUMachineFunction : public MachineFunctionInfo {
|
|
/// A map to keep track of local memory objects and their offsets within the
|
|
/// local memory space.
|
|
SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
|
|
|
|
protected:
|
|
uint64_t ExplicitKernArgSize;
|
|
unsigned MaxKernArgAlign;
|
|
|
|
/// Number of bytes in the LDS that are being used.
|
|
unsigned LDSSize;
|
|
|
|
// Kernels + shaders. i.e. functions called by the driver and not called
|
|
// by other functions.
|
|
bool IsEntryFunction;
|
|
|
|
bool NoSignedZerosFPMath;
|
|
|
|
// Function may be memory bound.
|
|
bool MemoryBound;
|
|
|
|
// Kernel may need limited waves per EU for better performance.
|
|
bool WaveLimiter;
|
|
|
|
public:
|
|
AMDGPUMachineFunction(const MachineFunction &MF);
|
|
|
|
uint64_t allocateKernArg(uint64_t Size, unsigned Align) {
|
|
assert(isPowerOf2_32(Align));
|
|
ExplicitKernArgSize = alignTo(ExplicitKernArgSize, Align);
|
|
|
|
uint64_t Result = ExplicitKernArgSize;
|
|
ExplicitKernArgSize += Size;
|
|
|
|
MaxKernArgAlign = std::max(Align, MaxKernArgAlign);
|
|
return Result;
|
|
}
|
|
|
|
uint64_t getExplicitKernArgSize() const {
|
|
return ExplicitKernArgSize;
|
|
}
|
|
|
|
unsigned getMaxKernArgAlign() const {
|
|
return MaxKernArgAlign;
|
|
}
|
|
|
|
unsigned getLDSSize() const {
|
|
return LDSSize;
|
|
}
|
|
|
|
bool isEntryFunction() const {
|
|
return IsEntryFunction;
|
|
}
|
|
|
|
bool hasNoSignedZerosFPMath() const {
|
|
return NoSignedZerosFPMath;
|
|
}
|
|
|
|
bool isMemoryBound() const {
|
|
return MemoryBound;
|
|
}
|
|
|
|
bool needsWaveLimiter() const {
|
|
return WaveLimiter;
|
|
}
|
|
|
|
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalValue &GV);
|
|
};
|
|
|
|
}
|
|
#endif
|