mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-27 07:12:06 +00:00
AMDGPU: Merge initial gfx9 support
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295554 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d0fd4adddf
commit
83c857cd3a
@ -47,7 +47,7 @@ FunctionPass *createSIDebuggerInsertNopsPass();
|
||||
FunctionPass *createSIInsertWaitsPass();
|
||||
FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr);
|
||||
|
||||
ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
|
||||
ModulePass *createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM = nullptr);
|
||||
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
|
||||
extern char &AMDGPUAnnotateKernelFeaturesID;
|
||||
|
||||
|
@ -79,6 +79,12 @@ def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
|
||||
"Support unaligned scratch loads and stores"
|
||||
>;
|
||||
|
||||
def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
|
||||
"HasApertureRegs",
|
||||
"true",
|
||||
"Has Memory Aperture Base and Size Registers"
|
||||
>;
|
||||
|
||||
// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
|
||||
// XNACK. The current default kernel driver setting is:
|
||||
// - graphics ring: XNACK disabled
|
||||
@ -377,6 +383,15 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||
]
|
||||
>;
|
||||
|
||||
def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
|
||||
[FeatureFP64, FeatureLocalMemorySize65536,
|
||||
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
|
||||
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
|
||||
FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
|
||||
FeatureApertureRegs
|
||||
]
|
||||
>;
|
||||
|
||||
class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
|
||||
list<SubtargetFeature> Implies>
|
||||
: SubtargetFeature <
|
||||
@ -429,6 +444,9 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
|
||||
FeatureLDSBankCount16,
|
||||
FeatureXNACK]>;
|
||||
|
||||
def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>;
|
||||
def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Debugger related subtarget features.
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -534,10 +552,10 @@ def isVI : Predicate <
|
||||
"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
|
||||
AssemblerPredicate<"FeatureGCN3Encoding">;
|
||||
|
||||
// TODO: Either the name to be changed or we simply use IsCI!
|
||||
def isCIVI : Predicate <
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
|
||||
>, AssemblerPredicate<"FeatureCIInsts">;
|
||||
"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
|
||||
AssemblerPredicate<"FeatureCIInsts">;
|
||||
|
||||
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
@ -26,6 +27,7 @@ namespace {
|
||||
|
||||
class AMDGPUAnnotateKernelFeatures : public ModulePass {
|
||||
private:
|
||||
const TargetMachine *TM;
|
||||
static bool hasAddrSpaceCast(const Function &F);
|
||||
|
||||
void addAttrToCallers(Function *Intrin, StringRef AttrName);
|
||||
@ -34,7 +36,8 @@ private:
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }
|
||||
AMDGPUAnnotateKernelFeatures(const TargetMachine *TM_ = nullptr) :
|
||||
ModulePass(ID), TM(TM_) {}
|
||||
bool runOnModule(Module &M) override;
|
||||
StringRef getPassName() const override {
|
||||
return "AMDGPU Annotate Kernel Features";
|
||||
@ -211,7 +214,9 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
|
||||
if (F.hasFnAttribute("amdgpu-queue-ptr"))
|
||||
continue;
|
||||
|
||||
if (hasAddrSpaceCast(F))
|
||||
bool HasApertureRegs =
|
||||
TM && TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
|
||||
if (!HasApertureRegs && hasAddrSpaceCast(F))
|
||||
F.addFnAttr("amdgpu-queue-ptr");
|
||||
}
|
||||
}
|
||||
@ -219,6 +224,6 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
|
||||
return Changed;
|
||||
}
|
||||
|
||||
ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
|
||||
return new AMDGPUAnnotateKernelFeatures();
|
||||
ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM) {
|
||||
return new AMDGPUAnnotateKernelFeatures(TM);
|
||||
}
|
||||
|
@ -381,6 +381,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||
case AMDGPU::EXEC_HI:
|
||||
case AMDGPU::SCC:
|
||||
case AMDGPU::M0:
|
||||
case AMDGPU::SRC_SHARED_BASE:
|
||||
case AMDGPU::SRC_SHARED_LIMIT:
|
||||
case AMDGPU::SRC_PRIVATE_BASE:
|
||||
case AMDGPU::SRC_PRIVATE_LIMIT:
|
||||
continue;
|
||||
|
||||
case AMDGPU::VCC:
|
||||
|
@ -86,6 +86,7 @@ static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
|
||||
case AMDGPUSubtarget::SEA_ISLANDS:
|
||||
return SIEncodingFamily::SI;
|
||||
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
|
||||
case AMDGPUSubtarget::GFX9:
|
||||
return SIEncodingFamily::VI;
|
||||
|
||||
// FIXME: This should never be called for r600 GPUs.
|
||||
|
@ -93,6 +93,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
UnalignedScratchAccess(false),
|
||||
UnalignedBufferAccess(false),
|
||||
|
||||
HasApertureRegs(false),
|
||||
EnableXNACK(false),
|
||||
TrapHandler(false),
|
||||
DebuggerInsertNops(false),
|
||||
|
@ -51,6 +51,7 @@ public:
|
||||
SOUTHERN_ISLANDS,
|
||||
SEA_ISLANDS,
|
||||
VOLCANIC_ISLANDS,
|
||||
GFX9,
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -64,6 +65,8 @@ public:
|
||||
ISAVersion8_0_3,
|
||||
ISAVersion8_0_4,
|
||||
ISAVersion8_1_0,
|
||||
ISAVersion9_0_0,
|
||||
ISAVersion9_0_1
|
||||
};
|
||||
|
||||
enum TrapHandlerAbi {
|
||||
@ -103,6 +106,7 @@ protected:
|
||||
bool FlatForGlobal;
|
||||
bool UnalignedScratchAccess;
|
||||
bool UnalignedBufferAccess;
|
||||
bool HasApertureRegs;
|
||||
bool EnableXNACK;
|
||||
bool TrapHandler;
|
||||
bool DebuggerInsertNops;
|
||||
@ -330,6 +334,10 @@ public:
|
||||
return UnalignedScratchAccess;
|
||||
}
|
||||
|
||||
bool hasApertureRegs() const {
|
||||
return HasApertureRegs;
|
||||
}
|
||||
|
||||
bool isTrapHandlerEnabled() const {
|
||||
return TrapHandler;
|
||||
}
|
||||
@ -645,6 +653,14 @@ public:
|
||||
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
|
||||
}
|
||||
|
||||
bool hasSMovFedHazard() const {
|
||||
return getGeneration() >= AMDGPUSubtarget::GFX9;
|
||||
}
|
||||
|
||||
bool hasReadM0Hazard() const {
|
||||
return getGeneration() >= AMDGPUSubtarget::GFX9;
|
||||
}
|
||||
|
||||
unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
|
||||
|
||||
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
|
||||
@ -656,7 +672,13 @@ public:
|
||||
/// \returns True if waitcnt instruction is needed before barrier instruction,
|
||||
/// false otherwise.
|
||||
bool needWaitcntBeforeBarrier() const {
|
||||
return true;
|
||||
return getGeneration() < GFX9;
|
||||
}
|
||||
|
||||
/// \returns true if the flat_scratch register should be initialized with the
|
||||
/// pointer to the wave's scratch memory rather than a size and offset.
|
||||
bool flatScratchIsPointer() const {
|
||||
return getGeneration() >= GFX9;
|
||||
}
|
||||
|
||||
/// \returns SGPR allocation granularity supported by the subtarget.
|
||||
|
@ -598,7 +598,8 @@ bool GCNPassConfig::addPreISel() {
|
||||
|
||||
// FIXME: We need to run a pass to propagate the attributes when calls are
|
||||
// supported.
|
||||
addPass(&AMDGPUAnnotateKernelFeaturesID);
|
||||
const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
|
||||
addPass(createAMDGPUAnnotateKernelFeaturesPass(&TM));
|
||||
addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
|
||||
addPass(createSinkingPass());
|
||||
addPass(createSITypeRewriter());
|
||||
|
@ -71,6 +71,18 @@ static bool isRFE(unsigned Opcode) {
|
||||
return Opcode == AMDGPU::S_RFE_B64;
|
||||
}
|
||||
|
||||
static bool isSMovRel(unsigned Opcode) {
|
||||
return Opcode == AMDGPU::S_MOVRELS_B32 || AMDGPU::S_MOVRELS_B64 ||
|
||||
Opcode == AMDGPU::S_MOVRELD_B32 || AMDGPU::S_MOVRELD_B64;
|
||||
}
|
||||
|
||||
static bool isVInterp(unsigned Opcode) {
|
||||
return Opcode == AMDGPU::V_INTERP_P1_F32 ||
|
||||
Opcode == AMDGPU::V_INTERP_P1_F32_16bank ||
|
||||
Opcode == AMDGPU::V_INTERP_P2_F32 ||
|
||||
Opcode == AMDGPU::V_INTERP_MOV_F32;
|
||||
}
|
||||
|
||||
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
|
||||
const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
|
||||
AMDGPU::OpName::simm16);
|
||||
@ -108,6 +120,13 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
|
||||
if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
|
||||
return NoopHazard;
|
||||
|
||||
if ((isVInterp(MI->getOpcode()) || isSMovRel(MI->getOpcode())) &&
|
||||
checkReadM0Hazards(MI) > 0)
|
||||
return NoopHazard;
|
||||
|
||||
if (checkAnyInstHazards(MI) > 0)
|
||||
return NoopHazard;
|
||||
|
||||
return NoHazard;
|
||||
}
|
||||
|
||||
@ -116,11 +135,13 @@ unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
|
||||
}
|
||||
|
||||
unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
|
||||
int WaitStates = std::max(0, checkAnyInstHazards(MI));
|
||||
|
||||
if (SIInstrInfo::isSMRD(*MI))
|
||||
return std::max(0, checkSMRDHazards(MI));
|
||||
return std::max(WaitStates, checkSMRDHazards(MI));
|
||||
|
||||
if (SIInstrInfo::isVALU(*MI)) {
|
||||
int WaitStates = std::max(0, checkVALUHazards(MI));
|
||||
WaitStates = std::max(WaitStates, checkVALUHazards(MI));
|
||||
|
||||
if (SIInstrInfo::isVMEM(*MI))
|
||||
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
|
||||
@ -134,19 +155,25 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
|
||||
if (isRWLane(MI->getOpcode()))
|
||||
WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
|
||||
|
||||
if (isVInterp(MI->getOpcode()))
|
||||
WaitStates = std::max(WaitStates, checkReadM0Hazards(MI));
|
||||
|
||||
return WaitStates;
|
||||
}
|
||||
|
||||
if (isSGetReg(MI->getOpcode()))
|
||||
return std::max(0, checkGetRegHazards(MI));
|
||||
return std::max(WaitStates, checkGetRegHazards(MI));
|
||||
|
||||
if (isSSetReg(MI->getOpcode()))
|
||||
return std::max(0, checkSetRegHazards(MI));
|
||||
return std::max(WaitStates, checkSetRegHazards(MI));
|
||||
|
||||
if (isRFE(MI->getOpcode()))
|
||||
return std::max(0, checkRFEHazards(MI));
|
||||
return std::max(WaitStates, checkRFEHazards(MI));
|
||||
|
||||
return 0;
|
||||
if (isSMovRel(MI->getOpcode()))
|
||||
return std::max(WaitStates, checkReadM0Hazards(MI));
|
||||
|
||||
return WaitStates;
|
||||
}
|
||||
|
||||
void GCNHazardRecognizer::EmitNoop() {
|
||||
@ -508,3 +535,42 @@ int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
|
||||
int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
|
||||
return RFEWaitStates - WaitStatesNeeded;
|
||||
}
|
||||
|
||||
int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
|
||||
if (MI->isDebugValue())
|
||||
return 0;
|
||||
|
||||
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
||||
if (!ST.hasSMovFedHazard())
|
||||
return 0;
|
||||
|
||||
// Check for any instruction reading an SGPR after a write from
|
||||
// s_mov_fed_b32.
|
||||
int MovFedWaitStates = 1;
|
||||
int WaitStatesNeeded = 0;
|
||||
|
||||
for (const MachineOperand &Use : MI->uses()) {
|
||||
if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
|
||||
continue;
|
||||
auto IsHazardFn = [] (MachineInstr *MI) {
|
||||
return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
|
||||
};
|
||||
int WaitStatesNeededForUse =
|
||||
MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn);
|
||||
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
||||
}
|
||||
|
||||
return WaitStatesNeeded;
|
||||
}
|
||||
|
||||
int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
|
||||
if (!ST.hasReadM0Hazard())
|
||||
return 0;
|
||||
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
int SMovRelWaitStates = 1;
|
||||
auto IsHazardFn = [TII] (MachineInstr *MI) {
|
||||
return TII->isSALU(*MI);
|
||||
};
|
||||
return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn);
|
||||
}
|
||||
|
@ -52,6 +52,8 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
|
||||
int checkVALUHazards(MachineInstr *VALU);
|
||||
int checkRWLaneHazards(MachineInstr *RWLane);
|
||||
int checkRFEHazards(MachineInstr *RFE);
|
||||
int checkAnyInstHazards(MachineInstr *MI);
|
||||
int checkReadM0Hazards(MachineInstr *SMovRel);
|
||||
public:
|
||||
GCNHazardRecognizer(const MachineFunction &MF);
|
||||
// We can only issue one instruction per cycle.
|
||||
|
@ -187,3 +187,10 @@ def : ProcessorModel<"gfx810", SIQuarterSpeedModel,
|
||||
[FeatureISAVersion8_1_0]
|
||||
>;
|
||||
|
||||
def : ProcessorModel<"gfx900", SIQuarterSpeedModel,
|
||||
[FeatureGFX9, FeatureISAVersion9_0_0, FeatureLDSBankCount32]
|
||||
>;
|
||||
|
||||
def : ProcessorModel<"gfx901", SIQuarterSpeedModel,
|
||||
[FeatureGFX9, FeatureXNACK, FeatureISAVersion9_0_1, FeatureLDSBankCount32]
|
||||
>;
|
||||
|
@ -33,10 +33,12 @@ static ArrayRef<MCPhysReg> getAllSGPRs(const SISubtarget &ST,
|
||||
ST.getMaxNumSGPRs(MF));
|
||||
}
|
||||
|
||||
void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII,
|
||||
const SIRegisterInfo* TRI,
|
||||
void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST,
|
||||
MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const SIRegisterInfo* TRI = &TII->getRegisterInfo();
|
||||
|
||||
// We don't need this if we only have spills since there is no user facing
|
||||
// scratch.
|
||||
|
||||
@ -59,16 +61,28 @@ void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII,
|
||||
MRI.addLiveIn(FlatScratchInitReg);
|
||||
MBB.addLiveIn(FlatScratchInitReg);
|
||||
|
||||
// Copy the size in bytes.
|
||||
unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
|
||||
.addReg(FlatScrInitHi, RegState::Kill);
|
||||
|
||||
unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
|
||||
unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
|
||||
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
|
||||
|
||||
// Do a 64-bit pointer add.
|
||||
if (ST.flatScratchIsPointer()) {
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
|
||||
.addReg(FlatScrInitLo)
|
||||
.addReg(ScratchWaveOffsetReg);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
|
||||
.addReg(FlatScrInitHi)
|
||||
.addImm(0);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy the size in bytes.
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
|
||||
.addReg(FlatScrInitHi, RegState::Kill);
|
||||
|
||||
// Add wave offset in bytes to private base offset.
|
||||
// See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
|
||||
@ -229,7 +243,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
// emitted after frame indices are eliminated.
|
||||
|
||||
if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit())
|
||||
emitFlatScratchInit(TII, TRI, MF, MBB);
|
||||
emitFlatScratchInit(ST, MF, MBB);
|
||||
|
||||
// We need to insert initialization of the scratch resource descriptor.
|
||||
unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
|
||||
|
@ -36,8 +36,7 @@ public:
|
||||
RegScavenger *RS = nullptr) const override;
|
||||
|
||||
private:
|
||||
void emitFlatScratchInit(const SIInstrInfo *TII,
|
||||
const SIRegisterInfo* TRI,
|
||||
void emitFlatScratchInit(const SISubtarget &ST,
|
||||
MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const;
|
||||
|
||||
|
@ -532,7 +532,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
// in 8-bits, it can use a smaller encoding.
|
||||
if (!isUInt<32>(AM.BaseOffs / 4))
|
||||
return false;
|
||||
} else if (Subtarget->getGeneration() == SISubtarget::VOLCANIC_ISLANDS) {
|
||||
} else if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
|
||||
// On VI, these use the SMEM format and the offset is 20-bit in bytes.
|
||||
if (!isUInt<20>(AM.BaseOffs))
|
||||
return false;
|
||||
@ -2233,6 +2233,13 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
SDValue SITargetLowering::getSegmentAperture(unsigned AS,
|
||||
SelectionDAG &DAG) const {
|
||||
|
||||
if (Subtarget->hasApertureRegs()) { // Read from Aperture Registers directly.
|
||||
unsigned RegNo = (AS == AMDGPUAS::LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE :
|
||||
AMDGPU::SRC_PRIVATE_BASE;
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, RegNo, MVT::i32);
|
||||
}
|
||||
|
||||
SDLoc SL;
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
@ -133,6 +133,12 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
reserveRegisterTuples(Reserved, AMDGPU::EXEC);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
|
||||
|
||||
// Reserve the memory aperture registers.
|
||||
reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
|
||||
|
||||
// Reserve Trap Handler registers - support is not implemented in Codegen.
|
||||
reserveRegisterTuples(Reserved, AMDGPU::TBA);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::TMA);
|
||||
|
@ -44,6 +44,11 @@ def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
|
||||
def SCC : SIReg<"scc", 253>;
|
||||
def M0 : SIReg <"m0", 124>;
|
||||
|
||||
def SRC_SHARED_BASE : SIReg<"src_shared_base", 235>;
|
||||
def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>;
|
||||
def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
|
||||
def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
|
||||
|
||||
// Trap handler registers
|
||||
def TBA_LO : SIReg<"tba_lo", 108>;
|
||||
def TBA_HI : SIReg<"tba_hi", 109>;
|
||||
@ -260,7 +265,8 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
|
||||
// See comments in SIInstructions.td for more info.
|
||||
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
|
||||
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
|
||||
TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)> {
|
||||
TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
|
||||
SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
|
||||
let AllocationPriority = 7;
|
||||
}
|
||||
|
||||
|
@ -72,11 +72,11 @@ unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
|
||||
return (Src & getBitMask(Shift, Width)) >> Shift;
|
||||
}
|
||||
|
||||
/// \returns Vmcnt bit shift.
|
||||
unsigned getVmcntBitShift() { return 0; }
|
||||
/// \returns Vmcnt bit shift (lower bits).
|
||||
unsigned getVmcntBitShiftLo() { return 0; }
|
||||
|
||||
/// \returns Vmcnt bit width.
|
||||
unsigned getVmcntBitWidth() { return 4; }
|
||||
/// \returns Vmcnt bit width (lower bits).
|
||||
unsigned getVmcntBitWidthLo() { return 4; }
|
||||
|
||||
/// \returns Expcnt bit shift.
|
||||
unsigned getExpcntBitShift() { return 4; }
|
||||
@ -90,6 +90,12 @@ unsigned getLgkmcntBitShift() { return 8; }
|
||||
/// \returns Lgkmcnt bit width.
|
||||
unsigned getLgkmcntBitWidth() { return 4; }
|
||||
|
||||
/// \returns Vmcnt bit shift (higher bits).
|
||||
unsigned getVmcntBitShiftHi() { return 14; }
|
||||
|
||||
/// \returns Vmcnt bit width (higher bits).
|
||||
unsigned getVmcntBitWidthHi() { return 2; }
|
||||
|
||||
} // end namespace anonymous
|
||||
|
||||
namespace llvm {
|
||||
@ -120,6 +126,12 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
|
||||
if (Features.test(FeatureISAVersion8_1_0))
|
||||
return {8, 1, 0};
|
||||
|
||||
// GFX9.
|
||||
if (Features.test(FeatureISAVersion9_0_0))
|
||||
return {9, 0, 0};
|
||||
if (Features.test(FeatureISAVersion9_0_1))
|
||||
return {9, 0, 1};
|
||||
|
||||
if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
|
||||
return {0, 0, 0};
|
||||
return {7, 0, 0};
|
||||
@ -399,7 +411,12 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
|
||||
}
|
||||
|
||||
unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
|
||||
return (1 << getVmcntBitWidth()) - 1;
|
||||
unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
|
||||
if (Version.Major < 9)
|
||||
return VmcntLo;
|
||||
|
||||
unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
|
||||
return VmcntLo | VmcntHi;
|
||||
}
|
||||
|
||||
unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
|
||||
@ -411,14 +428,27 @@ unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
|
||||
}
|
||||
|
||||
unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
|
||||
unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
|
||||
unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
||||
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
|
||||
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
|
||||
return Vmcnt | Expcnt | Lgkmcnt;
|
||||
unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
|
||||
if (Version.Major < 9)
|
||||
return Waitcnt;
|
||||
|
||||
unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
|
||||
return Waitcnt | VmcntHi;
|
||||
}
|
||||
|
||||
unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
|
||||
return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
|
||||
unsigned VmcntLo =
|
||||
unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
||||
if (Version.Major < 9)
|
||||
return VmcntLo;
|
||||
|
||||
unsigned VmcntHi =
|
||||
unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
|
||||
VmcntHi <<= getVmcntBitWidthLo();
|
||||
return VmcntLo | VmcntHi;
|
||||
}
|
||||
|
||||
unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
|
||||
@ -438,7 +468,13 @@ void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||
|
||||
unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||
unsigned Vmcnt) {
|
||||
return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
|
||||
Waitcnt =
|
||||
packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
||||
if (Version.Major < 9)
|
||||
return Waitcnt;
|
||||
|
||||
Vmcnt >>= getVmcntBitWidthLo();
|
||||
return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
|
||||
}
|
||||
|
||||
unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||
|
@ -216,7 +216,8 @@ unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
|
||||
/// \p Lgkmcnt respectively.
|
||||
///
|
||||
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
|
||||
/// \p Vmcnt = \p Waitcnt[3:0]
|
||||
/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
|
||||
/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
|
||||
/// \p Expcnt = \p Waitcnt[6:4]
|
||||
/// \p Lgkmcnt = \p Waitcnt[11:8]
|
||||
void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||
@ -238,9 +239,11 @@ unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||
/// \p Version.
|
||||
///
|
||||
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
|
||||
/// Waitcnt[3:0] = \p Vmcnt
|
||||
/// Waitcnt[6:4] = \p Expcnt
|
||||
/// Waitcnt[11:8] = \p Lgkmcnt
|
||||
/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
|
||||
/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
|
||||
/// Waitcnt[6:4] = \p Expcnt
|
||||
/// Waitcnt[11:8] = \p Lgkmcnt
|
||||
/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
|
||||
///
|
||||
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
|
||||
/// isa \p Version.
|
||||
|
@ -1,14 +1,19 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=CI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=GFX9 %s
|
||||
|
||||
; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
|
||||
; HSA: enable_sgpr_private_segment_buffer = 1
|
||||
; HSA: enable_sgpr_dispatch_ptr = 0
|
||||
; HSA: enable_sgpr_queue_ptr = 1
|
||||
; CI: enable_sgpr_queue_ptr = 1
|
||||
; GFX9: enable_sgpr_queue_ptr = 0
|
||||
|
||||
; HSA-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
|
||||
; HSA-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
|
||||
; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
|
||||
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
|
||||
; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
|
||||
|
||||
; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
|
||||
; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base
|
||||
|
||||
; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
|
||||
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
|
||||
; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
|
||||
@ -17,6 +22,12 @@
|
||||
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
|
||||
|
||||
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
|
||||
|
||||
; At most 2 digits. Make sure src_shared_base is not counted as a high
|
||||
; number SGPR.
|
||||
|
||||
; CI: NumSgprs: {{[0-9][0-9]+}}
|
||||
; GFX9: NumSgprs: {{[0-9]+}}
|
||||
define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
|
||||
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
|
||||
store volatile i32 7, i32 addrspace(4)* %stof
|
||||
@ -26,12 +37,16 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
|
||||
; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast:
|
||||
; HSA: enable_sgpr_private_segment_buffer = 1
|
||||
; HSA: enable_sgpr_dispatch_ptr = 0
|
||||
; HSA: enable_sgpr_queue_ptr = 1
|
||||
; CI: enable_sgpr_queue_ptr = 1
|
||||
; GFX9: enable_sgpr_queue_ptr = 0
|
||||
|
||||
; HSA-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
|
||||
; HSA-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
|
||||
; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
|
||||
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
|
||||
; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
|
||||
|
||||
; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
|
||||
; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base
|
||||
|
||||
; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
|
||||
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
|
||||
; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
|
||||
@ -40,6 +55,9 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
|
||||
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
|
||||
|
||||
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
|
||||
|
||||
; CI: NumSgprs: {{[0-9][0-9]+}}
|
||||
; GFX9: NumSgprs: {{[0-9]+}}
|
||||
define void @use_private_to_flat_addrspacecast(i32* %ptr) #0 {
|
||||
%stof = addrspacecast i32* %ptr to i32 addrspace(4)*
|
||||
store volatile i32 7, i32 addrspace(4)* %stof
|
||||
@ -133,8 +151,10 @@ define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 {
|
||||
}
|
||||
|
||||
; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
|
||||
; HSA: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
|
||||
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
|
||||
; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
|
||||
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
|
||||
; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base
|
||||
|
||||
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
|
||||
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
|
||||
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
|
||||
@ -176,8 +196,11 @@ define void @cast_neg1_flat_to_group_addrspacecast() #0 {
|
||||
}
|
||||
|
||||
; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
|
||||
; HSA: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
|
||||
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
|
||||
; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
|
||||
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
|
||||
|
||||
; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], src_private_base
|
||||
|
||||
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
|
||||
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
|
||||
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
|
||||
@ -226,9 +249,13 @@ end:
|
||||
|
||||
; Check for prologue initializing special SGPRs pointing to scratch.
|
||||
; HSA-LABEL: {{^}}store_flat_scratch:
|
||||
; HSA-DAG: s_mov_b32 flat_scratch_lo, s9
|
||||
; HSA-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11
|
||||
; HSA: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
|
||||
; CI-DAG: s_mov_b32 flat_scratch_lo, s9
|
||||
; CI-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11
|
||||
; CI: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
|
||||
|
||||
; GFX9: s_add_u32 flat_scratch_lo, s6, s9
|
||||
; GFX9: s_addc_u32 flat_scratch_hi, s7, 0
|
||||
|
||||
; HSA: flat_store_dword
|
||||
; HSA: s_barrier
|
||||
; HSA: flat_load_dword
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-reserve-regs -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=+amdgpu-debugger-reserve-regs -verify-machineinstrs < %s | FileCheck %s
|
||||
; CHECK: reserved_vgpr_first = {{[0-9]+}}
|
||||
; CHECK-NEXT: reserved_vgpr_count = 4
|
||||
; CHECK: ReservedVGPRFirst: {{[0-9]+}}
|
||||
|
@ -13,6 +13,8 @@
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx804 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI804 %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx901 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX901 %s
|
||||
|
||||
; HSA: .hsa_code_object_version 2,1
|
||||
; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
|
||||
@ -24,3 +26,5 @@
|
||||
; HSA-VI803: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU"
|
||||
; HSA-VI804: .hsa_code_object_isa 8,0,4,"AMD","AMDGPU"
|
||||
; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU"
|
||||
; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
|
||||
; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU"
|
||||
|
@ -1,6 +1,7 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9
|
||||
|
||||
--- |
|
||||
define void @div_fmas() { ret void }
|
||||
@ -9,6 +10,37 @@
|
||||
define void @vmem_gt_8dw_store() { ret void }
|
||||
define void @readwrite_lane() { ret void }
|
||||
define void @rfe() { ret void }
|
||||
define void @s_mov_fed_b32() { ret void }
|
||||
define void @s_movrel() { ret void }
|
||||
define void @v_interp() { ret void }
|
||||
|
||||
define void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) {
|
||||
entry:
|
||||
%A.addr = alloca i32 addrspace(1)*, align 4
|
||||
store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
|
||||
call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !5, metadata !11), !dbg !12
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 268929)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
||||
!1 = !DIFile(filename: "test01.cl", directory: "/dev/null")
|
||||
!2 = !{}
|
||||
!3 = !{i32 2, !"Dwarf Version", i32 2}
|
||||
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!5 = !DILocalVariable(name: "A", arg: 1, scope: !6, file: !1, line: 1, type: !9)
|
||||
!6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
|
||||
!7 = !DISubroutineType(types: !8)
|
||||
!8 = !{null, !9}
|
||||
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 32)
|
||||
!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
|
||||
!11 = !DIExpression()
|
||||
!12 = !DILocation(line: 1, column: 30, scope: !6)
|
||||
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: div_fmas
|
||||
@ -331,3 +363,185 @@ body: |
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: s_mov_fed_b32
|
||||
|
||||
# GCN-LABEL: bb.0:
|
||||
# GCN: S_MOV_FED_B32
|
||||
# GFX9: S_NOP
|
||||
# GCN-NEXT: S_MOV_B32
|
||||
|
||||
# GCN-LABEL: bb.1:
|
||||
# GCN: S_MOV_FED_B32
|
||||
# GFX9: S_NOP
|
||||
# GCN-NEXT: V_MOV_B32
|
||||
name: s_mov_fed_b32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
%sgpr0 = S_MOV_FED_B32 %sgpr0
|
||||
%sgpr0 = S_MOV_B32 %sgpr0
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
%sgpr0 = S_MOV_FED_B32 %sgpr0
|
||||
%vgpr0 = V_MOV_B32_e32 %sgpr0, implicit %exec
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: s_movrel
|
||||
|
||||
# GCN-LABEL: bb.0:
|
||||
# GCN: S_MOV_B32
|
||||
# GFX9: S_NOP
|
||||
# GCN-NEXT: S_MOVRELS_B32
|
||||
|
||||
# GCN-LABEL: bb.1:
|
||||
# GCN: S_MOV_B32
|
||||
# GFX9: S_NOP
|
||||
# GCN-NEXT: S_MOVRELS_B64
|
||||
|
||||
# GCN-LABEL: bb.2:
|
||||
# GCN: S_MOV_B32
|
||||
# GFX9: S_NOP
|
||||
# GCN-NEXT: S_MOVRELD_B32
|
||||
|
||||
# GCN-LABEL: bb.3:
|
||||
# GCN: S_MOV_B32
|
||||
# GFX9: S_NOP
|
||||
# GCN-NEXT: S_MOVRELD_B64
|
||||
|
||||
name: s_movrel
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
%m0 = S_MOV_B32 0
|
||||
%sgpr0 = S_MOVRELS_B32 %sgpr0, implicit %m0
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
%m0 = S_MOV_B32 0
|
||||
%sgpr0_sgpr1 = S_MOVRELS_B64 %sgpr0_sgpr1, implicit %m0
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
successors: %bb.3
|
||||
%m0 = S_MOV_B32 0
|
||||
%sgpr0 = S_MOVRELD_B32 %sgpr0, implicit %m0
|
||||
S_BRANCH %bb.3
|
||||
|
||||
bb.3:
|
||||
%m0 = S_MOV_B32 0
|
||||
%sgpr0_sgpr1 = S_MOVRELD_B64 %sgpr0_sgpr1, implicit %m0
|
||||
S_ENDPGM
|
||||
...
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: name: v_interp
|
||||
|
||||
# GCN-LABEL: bb.0:
|
||||
# GCN: S_MOV_B32
|
||||
# GFX9: S_NOP
|
||||
# GCN-NEXT: V_INTERP_P1_F32
|
||||
|
||||
# GCN-LABEL: bb.1:
|
||||
# GCN: S_MOV_B32
|
||||
# GFX9: S_NOP
|
||||
# GCN-NEXT: V_INTERP_P2_F32
|
||||
|
||||
# GCN-LABEL: bb.2:
|
||||
# GCN: S_MOV_B32
|
||||
# GFX9: S_NOP
|
||||
# GCN-NEXT: V_INTERP_P1_F32_16bank
|
||||
|
||||
# GCN-LABEL: bb.3:
|
||||
# GCN: S_MOV_B32
|
||||
# GFX9: S_NOP
|
||||
# GCN-NEXT: V_INTERP_MOV_F32
|
||||
|
||||
name: v_interp
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
%m0 = S_MOV_B32 0
|
||||
%vgpr0 = V_INTERP_P1_F32 %vgpr0, 0, 0, implicit %m0, implicit %exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
%m0 = S_MOV_B32 0
|
||||
%vgpr0 = V_INTERP_P2_F32 %vgpr0, %vgpr1, 0, 0, implicit %m0, implicit %exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
successors: %bb.3
|
||||
%m0 = S_MOV_B32 0
|
||||
%vgpr0 = V_INTERP_P1_F32_16bank %vgpr0, 0, 0, implicit %m0, implicit %exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
bb.3:
|
||||
%m0 = S_MOV_B32 0
|
||||
%vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit %m0, implicit %exec
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
name: mov_fed_hazard_crash_on_dbg_value
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '%sgpr4_sgpr5' }
|
||||
- { reg: '%sgpr6_sgpr7' }
|
||||
- { reg: '%sgpr9' }
|
||||
- { reg: '%sgpr0_sgpr1_sgpr2_sgpr3' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 16
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 8
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
maxCallFrameSize: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
stack:
|
||||
- { id: 0, name: A.addr, offset: 0, size: 8, alignment: 8, local-offset: 0 }
|
||||
- { id: 1, offset: 8, size: 4, alignment: 4 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: %sgpr4_sgpr5, %sgpr6_sgpr7, %sgpr9, %sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
%flat_scr_lo = S_ADD_U32 %sgpr6, %sgpr9, implicit-def %scc
|
||||
%flat_scr_hi = S_ADDC_U32 %sgpr7, 0, implicit-def %scc, implicit %scc
|
||||
DBG_VALUE _, 2, !5, !11, debug-location !12
|
||||
%sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
||||
dead %sgpr6_sgpr7 = KILL %sgpr4_sgpr5
|
||||
%sgpr8 = S_MOV_B32 %sgpr5
|
||||
%vgpr0 = V_MOV_B32_e32 killed %sgpr8, implicit %exec
|
||||
BUFFER_STORE_DWORD_OFFSET %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr9, 4, 0, 0, 0, implicit %exec :: (store 4 into %ir.A.addr + 4)
|
||||
%sgpr8 = S_MOV_B32 %sgpr4, implicit killed %sgpr4_sgpr5
|
||||
%vgpr0 = V_MOV_B32_e32 killed %sgpr8, implicit %exec
|
||||
BUFFER_STORE_DWORD_OFFSET %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr9, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.A.addr)
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
|
@ -1,8 +1,11 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
|
||||
|
||||
; GCN-LABEL: {{^}}test_barrier:
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: s_waitcnt
|
||||
; GFX8: buffer_store_dword
|
||||
; GFX8: s_waitcnt
|
||||
; GFX9: flat_store_dword
|
||||
; GFX9-NOT: s_waitcnt
|
||||
; GCN: s_barrier
|
||||
define void @test_barrier(i32 addrspace(1)* %out) #0 {
|
||||
entry:
|
||||
|
71
test/MC/AMDGPU/sopp-gfx9.s
Normal file
71
test/MC/AMDGPU/sopp-gfx9.s
Normal file
@ -0,0 +1,71 @@
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=GFX9 %s
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// s_waitcnt
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
s_waitcnt 0
|
||||
// GFX9: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(0) & expcnt(0) & lgkmcnt(0)
|
||||
// GFX9: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
// GFX9: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(0), expcnt(0), lgkmcnt(0)
|
||||
// GFX9: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(1)
|
||||
// GFX9: s_waitcnt vmcnt(1) ; encoding: [0x71,0x0f,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(9)
|
||||
// GFX9: s_waitcnt vmcnt(9) ; encoding: [0x79,0x0f,0x8c,0xbf]
|
||||
|
||||
s_waitcnt expcnt(2)
|
||||
// GFX9: s_waitcnt expcnt(2) ; encoding: [0x2f,0xcf,0x8c,0xbf]
|
||||
|
||||
s_waitcnt lgkmcnt(3)
|
||||
// GFX9: s_waitcnt lgkmcnt(3) ; encoding: [0x7f,0xc3,0x8c,0xbf]
|
||||
|
||||
s_waitcnt lgkmcnt(9)
|
||||
// GFX9: s_waitcnt lgkmcnt(9) ; encoding: [0x7f,0xc9,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(0), expcnt(0)
|
||||
// GFX9: s_waitcnt vmcnt(0) expcnt(0) ; encoding: [0x00,0x0f,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(15)
|
||||
// GFX9: s_waitcnt vmcnt(15) ; encoding: [0x7f,0x0f,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(15) expcnt(6)
|
||||
// GFX9: s_waitcnt vmcnt(15) expcnt(6) ; encoding: [0x6f,0x0f,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(15) lgkmcnt(14)
|
||||
// GFX9: s_waitcnt vmcnt(15) lgkmcnt(14) ; encoding: [0x7f,0x0e,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(15) expcnt(6) lgkmcnt(14)
|
||||
// GFX9: s_waitcnt vmcnt(15) expcnt(6) lgkmcnt(14) ; encoding: [0x6f,0x0e,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(31)
|
||||
// GFX9: s_waitcnt vmcnt(31) ; encoding: [0x7f,0x4f,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(31) expcnt(6)
|
||||
// GFX9: s_waitcnt vmcnt(31) expcnt(6) ; encoding: [0x6f,0x4f,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(31) lgkmcnt(14)
|
||||
// GFX9: s_waitcnt vmcnt(31) lgkmcnt(14) ; encoding: [0x7f,0x4e,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(31) expcnt(6) lgkmcnt(14)
|
||||
// GFX9: s_waitcnt vmcnt(31) expcnt(6) lgkmcnt(14) ; encoding: [0x6f,0x4e,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(62)
|
||||
// GFX9: s_waitcnt vmcnt(62) ; encoding: [0x7e,0xcf,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(62) expcnt(6)
|
||||
// GFX9: s_waitcnt vmcnt(62) expcnt(6) ; encoding: [0x6e,0xcf,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(62) lgkmcnt(14)
|
||||
// GFX9: s_waitcnt vmcnt(62) lgkmcnt(14) ; encoding: [0x7e,0xce,0x8c,0xbf]
|
||||
|
||||
s_waitcnt vmcnt(62) expcnt(6) lgkmcnt(14)
|
||||
// GFX9: s_waitcnt vmcnt(62) expcnt(6) lgkmcnt(14) ; encoding: [0x6e,0xce,0x8c,0xbf]
|
Loading…
x
Reference in New Issue
Block a user