Files
archived-llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Matt Arsenault 2e48864110 AMDGPU: Cleanup subtarget features
Try to avoid mutually exclusive features. Don't use
a real default GPU, and use a fake "generic". The goal
is to make it easier to see which set of features are
incompatible between feature strings.

Most of the test changes are due to random scheduling changes
from not having a default fullspeed model.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310258 91177308-0d34-0410-b5e6-96231b3b80d8
2017-08-07 14:58:04 +00:00

845 lines
25 KiB
C++

//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUBaseInfo.h"
#include "AMDGPU.h"
#include "SIDefines.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstring>
#include <utility>
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#define GET_INSTRINFO_NAMED_OPS
#include "AMDGPUGenInstrInfo.inc"
#undef GET_INSTRINFO_NAMED_OPS
namespace {
/// \returns Bit mask for given bit \p Shift and bit \p Width.
unsigned getBitMask(unsigned Shift, unsigned Width) {
return ((1 << Width) - 1) << Shift;
}
/// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
///
/// \returns Packed \p Dst.
unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
Dst |= (Src << Shift) & getBitMask(Shift, Width);
return Dst;
}
/// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
///
/// \returns Unpacked bits.
unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
return (Src & getBitMask(Shift, Width)) >> Shift;
}
/// \returns Vmcnt bit shift (lower bits).
unsigned getVmcntBitShiftLo() { return 0; }
/// \returns Vmcnt bit width (lower bits).
unsigned getVmcntBitWidthLo() { return 4; }
/// \returns Expcnt bit shift.
unsigned getExpcntBitShift() { return 4; }
/// \returns Expcnt bit width.
unsigned getExpcntBitWidth() { return 3; }
/// \returns Lgkmcnt bit shift.
unsigned getLgkmcntBitShift() { return 8; }
/// \returns Lgkmcnt bit width.
unsigned getLgkmcntBitWidth() { return 4; }
/// \returns Vmcnt bit shift (higher bits).
unsigned getVmcntBitShiftHi() { return 14; }
/// \returns Vmcnt bit width (higher bits).
unsigned getVmcntBitWidthHi() { return 2; }
} // end namespace anonymous
namespace llvm {
static cl::opt<bool> EnablePackedInlinableLiterals(
"enable-packed-inlinable-literals",
cl::desc("Enable packed inlinable literals (v2f16, v2i16)"),
cl::init(false));
namespace AMDGPU {
namespace IsaInfo {
IsaVersion getIsaVersion(const FeatureBitset &Features) {
// SI.
if (Features.test(FeatureISAVersion6_0_0))
return {6, 0, 0};
if (Features.test(FeatureISAVersion6_0_1))
return {6, 0, 1};
// CI.
if (Features.test(FeatureISAVersion7_0_0))
return {7, 0, 0};
if (Features.test(FeatureISAVersion7_0_1))
return {7, 0, 1};
if (Features.test(FeatureISAVersion7_0_2))
return {7, 0, 2};
if (Features.test(FeatureISAVersion7_0_3))
return {7, 0, 3};
// VI.
if (Features.test(FeatureISAVersion8_0_0))
return {8, 0, 0};
if (Features.test(FeatureISAVersion8_0_1))
return {8, 0, 1};
if (Features.test(FeatureISAVersion8_0_2))
return {8, 0, 2};
if (Features.test(FeatureISAVersion8_0_3))
return {8, 0, 3};
if (Features.test(FeatureISAVersion8_0_4))
return {8, 0, 4};
if (Features.test(FeatureISAVersion8_1_0))
return {8, 1, 0};
// GFX9.
if (Features.test(FeatureISAVersion9_0_0))
return {9, 0, 0};
if (Features.test(FeatureISAVersion9_0_1))
return {9, 0, 1};
if (Features.test(FeatureISAVersion9_0_2))
return {9, 0, 2};
if (Features.test(FeatureISAVersion9_0_3))
return {9, 0, 3};
if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
return {0, 0, 0};
return {7, 0, 0};
}
unsigned getWavefrontSize(const FeatureBitset &Features) {
if (Features.test(FeatureWavefrontSize16))
return 16;
if (Features.test(FeatureWavefrontSize32))
return 32;
return 64;
}
unsigned getLocalMemorySize(const FeatureBitset &Features) {
if (Features.test(FeatureLocalMemorySize32768))
return 32768;
if (Features.test(FeatureLocalMemorySize65536))
return 65536;
return 0;
}
unsigned getEUsPerCU(const FeatureBitset &Features) {
return 4;
}
unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
unsigned FlatWorkGroupSize) {
if (!Features.test(FeatureGCN))
return 8;
unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
if (N == 1)
return 40;
N = 40 / N;
return std::min(N, 16u);
}
unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
}
unsigned getMaxWavesPerCU(const FeatureBitset &Features,
unsigned FlatWorkGroupSize) {
return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
}
unsigned getMinWavesPerEU(const FeatureBitset &Features) {
return 1;
}
unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
if (!Features.test(FeatureGCN))
return 8;
// FIXME: Need to take scratch memory into account.
return 10;
}
unsigned getMaxWavesPerEU(const FeatureBitset &Features,
unsigned FlatWorkGroupSize) {
return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
getEUsPerCU(Features)) / getEUsPerCU(Features);
}
unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
return 1;
}
unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
return 2048;
}
unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
unsigned FlatWorkGroupSize) {
return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
getWavefrontSize(Features);
}
unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
IsaVersion Version = getIsaVersion(Features);
if (Version.Major >= 8)
return 16;
return 8;
}
unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
return 8;
}
unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
IsaVersion Version = getIsaVersion(Features);
if (Version.Major >= 8)
return 800;
return 512;
}
unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
if (Features.test(FeatureSGPRInitBug))
return FIXED_NUM_SGPRS_FOR_INIT_BUG;
IsaVersion Version = getIsaVersion(Features);
if (Version.Major >= 8)
return 102;
return 104;
}
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU(Features))
return 0;
unsigned MinNumSGPRs =
alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
getSGPRAllocGranule(Features)) + 1;
return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
}
unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
bool Addressable) {
assert(WavesPerEU != 0);
IsaVersion Version = getIsaVersion(Features);
unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
getSGPRAllocGranule(Features));
unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
if (Version.Major >= 8 && !Addressable)
AddressableNumSGPRs = 112;
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
}
unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
return 4;
}
unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
return getVGPRAllocGranule(Features);
}
unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
return 256;
}
unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
return getTotalNumVGPRs(Features);
}
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU(Features))
return 0;
unsigned MinNumVGPRs =
alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
getVGPRAllocGranule(Features)) + 1;
return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
}
unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
getVGPRAllocGranule(Features));
unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
}
} // end namespace IsaInfo
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features) {
IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
memset(&Header, 0, sizeof(Header));
Header.amd_kernel_code_version_major = 1;
Header.amd_kernel_code_version_minor = 1;
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
Header.amd_machine_version_major = ISA.Major;
Header.amd_machine_version_minor = ISA.Minor;
Header.amd_machine_version_stepping = ISA.Stepping;
Header.kernel_code_entry_byte_offset = sizeof(Header);
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
Header.wavefront_size = 6;
// If the code object does not support indirect functions, then the value must
// be 0xffffffff.
Header.call_convention = -1;
// These alignment values are specified in powers of two, so alignment =
// 2^n. The minimum alignment is 2^4 = 16.
Header.kernarg_segment_alignment = 4;
Header.group_segment_alignment = 4;
Header.private_segment_alignment = 4;
}
bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
}
bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) {
return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS;
}
bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) {
return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS;
}
bool shouldEmitConstantsToTextSection(const Triple &TT) {
return TT.getOS() != Triple::AMDHSA;
}
int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
Attribute A = F.getFnAttribute(Name);
int Result = Default;
if (A.isStringAttribute()) {
StringRef Str = A.getValueAsString();
if (Str.getAsInteger(0, Result)) {
LLVMContext &Ctx = F.getContext();
Ctx.emitError("can't parse integer attribute " + Name);
}
}
return Result;
}
std::pair<int, int> getIntegerPairAttribute(const Function &F,
StringRef Name,
std::pair<int, int> Default,
bool OnlyFirstRequired) {
Attribute A = F.getFnAttribute(Name);
if (!A.isStringAttribute())
return Default;
LLVMContext &Ctx = F.getContext();
std::pair<int, int> Ints = Default;
std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
if (Strs.first.trim().getAsInteger(0, Ints.first)) {
Ctx.emitError("can't parse first integer attribute " + Name);
return Default;
}
if (Strs.second.trim().getAsInteger(0, Ints.second)) {
if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
Ctx.emitError("can't parse second integer attribute " + Name);
return Default;
}
}
return Ints;
}
unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
if (Version.Major < 9)
return VmcntLo;
unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
return VmcntLo | VmcntHi;
}
unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
return (1 << getExpcntBitWidth()) - 1;
}
unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
return (1 << getLgkmcntBitWidth()) - 1;
}
unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
if (Version.Major < 9)
return Waitcnt;
unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
return Waitcnt | VmcntHi;
}
unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
unsigned VmcntLo =
unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
if (Version.Major < 9)
return VmcntLo;
unsigned VmcntHi =
unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
VmcntHi <<= getVmcntBitWidthLo();
return VmcntLo | VmcntHi;
}
unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
Vmcnt = decodeVmcnt(Version, Waitcnt);
Expcnt = decodeExpcnt(Version, Waitcnt);
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
}
unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned Vmcnt) {
Waitcnt =
packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
if (Version.Major < 9)
return Waitcnt;
Vmcnt >>= getVmcntBitWidthLo();
return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
}
unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned Expcnt) {
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned Lgkmcnt) {
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
unsigned Waitcnt = getWaitcntBitMask(Version);
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
return Waitcnt;
}
unsigned getInitialPSInputAddr(const Function &F) {
return getIntegerAttribute(F, "InitialPSInputAddr", 0);
}
bool isShader(CallingConv::ID cc) {
switch(cc) {
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_HS:
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
return true;
default:
return false;
}
}
bool isCompute(CallingConv::ID cc) {
return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
}
bool isEntryFunctionCC(CallingConv::ID CC) {
switch (CC) {
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_HS:
return true;
default:
return false;
}
}
bool isSI(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
}
bool isCI(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
}
bool isVI(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
}
bool isGFX9(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
}
bool isGCN3Encoding(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
}
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
Reg == AMDGPU::SCC;
}
bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
if (*R == Reg1) return true;
}
return false;
}
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
switch(Reg) {
default: break;
case AMDGPU::FLAT_SCR:
assert(!isSI(STI));
return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
case AMDGPU::FLAT_SCR_LO:
assert(!isSI(STI));
return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
case AMDGPU::FLAT_SCR_HI:
assert(!isSI(STI));
return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
}
return Reg;
}
unsigned mc2PseudoReg(unsigned Reg) {
switch (Reg) {
case AMDGPU::FLAT_SCR_ci:
case AMDGPU::FLAT_SCR_vi:
return FLAT_SCR;
case AMDGPU::FLAT_SCR_LO_ci:
case AMDGPU::FLAT_SCR_LO_vi:
return AMDGPU::FLAT_SCR_LO;
case AMDGPU::FLAT_SCR_HI_ci:
case AMDGPU::FLAT_SCR_HI_vi:
return AMDGPU::FLAT_SCR_HI;
default:
return Reg;
}
}
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
OpType <= AMDGPU::OPERAND_SRC_LAST;
}
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
switch (OpType) {
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return true;
default:
return false;
}
}
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
}
// Avoid using MCRegisterClass::getSize, since that function will go away
// (move from MC* level to Target* level). Return size in bits.
unsigned getRegBitWidth(unsigned RCID) {
switch (RCID) {
case AMDGPU::SGPR_32RegClassID:
case AMDGPU::VGPR_32RegClassID:
case AMDGPU::VS_32RegClassID:
case AMDGPU::SReg_32RegClassID:
case AMDGPU::SReg_32_XM0RegClassID:
return 32;
case AMDGPU::SGPR_64RegClassID:
case AMDGPU::VS_64RegClassID:
case AMDGPU::SReg_64RegClassID:
case AMDGPU::VReg_64RegClassID:
return 64;
case AMDGPU::VReg_96RegClassID:
return 96;
case AMDGPU::SGPR_128RegClassID:
case AMDGPU::SReg_128RegClassID:
case AMDGPU::VReg_128RegClassID:
return 128;
case AMDGPU::SReg_256RegClassID:
case AMDGPU::VReg_256RegClassID:
return 256;
case AMDGPU::SReg_512RegClassID:
case AMDGPU::VReg_512RegClassID:
return 512;
default:
llvm_unreachable("Unexpected register class");
}
}
unsigned getRegBitWidth(const MCRegisterClass &RC) {
return getRegBitWidth(RC.getID());
}
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
unsigned OpNo) {
assert(OpNo < Desc.NumOperands);
unsigned RCID = Desc.OpInfo[OpNo].RegClass;
return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
}
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
if (Literal >= -16 && Literal <= 64)
return true;
uint64_t Val = static_cast<uint64_t>(Literal);
return (Val == DoubleToBits(0.0)) ||
(Val == DoubleToBits(1.0)) ||
(Val == DoubleToBits(-1.0)) ||
(Val == DoubleToBits(0.5)) ||
(Val == DoubleToBits(-0.5)) ||
(Val == DoubleToBits(2.0)) ||
(Val == DoubleToBits(-2.0)) ||
(Val == DoubleToBits(4.0)) ||
(Val == DoubleToBits(-4.0)) ||
(Val == 0x3fc45f306dc9c882 && HasInv2Pi);
}
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
if (Literal >= -16 && Literal <= 64)
return true;
// The actual type of the operand does not seem to matter as long
// as the bits match one of the inline immediate values. For example:
//
// -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
// so it is a legal inline immediate.
//
// 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
// floating-point, so it is a legal inline immediate.
uint32_t Val = static_cast<uint32_t>(Literal);
return (Val == FloatToBits(0.0f)) ||
(Val == FloatToBits(1.0f)) ||
(Val == FloatToBits(-1.0f)) ||
(Val == FloatToBits(0.5f)) ||
(Val == FloatToBits(-0.5f)) ||
(Val == FloatToBits(2.0f)) ||
(Val == FloatToBits(-2.0f)) ||
(Val == FloatToBits(4.0f)) ||
(Val == FloatToBits(-4.0f)) ||
(Val == 0x3e22f983 && HasInv2Pi);
}
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
if (!HasInv2Pi)
return false;
if (Literal >= -16 && Literal <= 64)
return true;
uint16_t Val = static_cast<uint16_t>(Literal);
return Val == 0x3C00 || // 1.0
Val == 0xBC00 || // -1.0
Val == 0x3800 || // 0.5
Val == 0xB800 || // -0.5
Val == 0x4000 || // 2.0
Val == 0xC000 || // -2.0
Val == 0x4400 || // 4.0
Val == 0xC400 || // -4.0
Val == 0x3118; // 1/2pi
}
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
assert(HasInv2Pi);
if (!EnablePackedInlinableLiterals)
return false;
int16_t Lo16 = static_cast<int16_t>(Literal);
int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
}
bool isArgPassedInSGPR(const Argument *A) {
const Function *F = A->getParent();
// Arguments to compute shaders are never a source of divergence.
CallingConv::ID CC = F->getCallingConv();
switch (CC) {
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
return true;
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_HS:
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
// For non-compute shaders, SGPR inputs are marked with either inreg or byval.
// Everything else is in VGPRs.
return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
default:
// TODO: Should calls support inreg for SGPR inputs?
return false;
}
}
// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
bool isUniformMMO(const MachineMemOperand *MMO) {
const Value *Ptr = MMO->getValue();
// UndefValue means this is a load of a kernel input. These are uniform.
// Sometimes LDS instructions have constant pointers.
// If Ptr is null, then that means this mem operand contains a
// PseudoSourceValue like GOT.
if (!Ptr || isa<UndefValue>(Ptr) ||
isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
return true;
if (const Argument *Arg = dyn_cast<Argument>(Ptr))
return isArgPassedInSGPR(Arg);
const Instruction *I = dyn_cast<Instruction>(Ptr);
return I && I->getMetadata("amdgpu.uniform");
}
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
if (isGCN3Encoding(ST))
return ByteOffset;
return ByteOffset >> 2;
}
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
return isGCN3Encoding(ST) ?
isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
}
} // end namespace AMDGPU
} // end namespace llvm
const unsigned AMDGPUAS::MAX_COMMON_ADDRESS;
const unsigned AMDGPUAS::GLOBAL_ADDRESS;
const unsigned AMDGPUAS::LOCAL_ADDRESS;
const unsigned AMDGPUAS::PARAM_D_ADDRESS;
const unsigned AMDGPUAS::PARAM_I_ADDRESS;
const unsigned AMDGPUAS::CONSTANT_BUFFER_0;
const unsigned AMDGPUAS::CONSTANT_BUFFER_1;
const unsigned AMDGPUAS::CONSTANT_BUFFER_2;
const unsigned AMDGPUAS::CONSTANT_BUFFER_3;
const unsigned AMDGPUAS::CONSTANT_BUFFER_4;
const unsigned AMDGPUAS::CONSTANT_BUFFER_5;
const unsigned AMDGPUAS::CONSTANT_BUFFER_6;
const unsigned AMDGPUAS::CONSTANT_BUFFER_7;
const unsigned AMDGPUAS::CONSTANT_BUFFER_8;
const unsigned AMDGPUAS::CONSTANT_BUFFER_9;
const unsigned AMDGPUAS::CONSTANT_BUFFER_10;
const unsigned AMDGPUAS::CONSTANT_BUFFER_11;
const unsigned AMDGPUAS::CONSTANT_BUFFER_12;
const unsigned AMDGPUAS::CONSTANT_BUFFER_13;
const unsigned AMDGPUAS::CONSTANT_BUFFER_14;
const unsigned AMDGPUAS::CONSTANT_BUFFER_15;
const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
namespace llvm {
namespace AMDGPU {
AMDGPUAS getAMDGPUAS(Triple T) {
auto Env = T.getEnvironmentName();
AMDGPUAS AS;
if (Env == "amdgiz" || Env == "amdgizcl") {
AS.FLAT_ADDRESS = 0;
AS.PRIVATE_ADDRESS = 5;
AS.REGION_ADDRESS = 4;
}
else {
AS.FLAT_ADDRESS = 4;
AS.PRIVATE_ADDRESS = 0;
AS.REGION_ADDRESS = 5;
}
return AS;
}
AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
return getAMDGPUAS(M.getTargetTriple());
}
AMDGPUAS getAMDGPUAS(const Module &M) {
return getAMDGPUAS(Triple(M.getTargetTriple()));
}
} // namespace AMDGPU
} // namespace llvm