diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index aed8ce17b8c..4f3c6df3a22 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -562,6 +562,5 @@ include "Processors.td" include "AMDGPUInstrInfo.td" include "AMDGPUIntrinsics.td" include "AMDGPURegisterInfo.td" -include "AMDGPURegisterBanks.td" include "AMDGPUInstructions.td" include "AMDGPUCallingConv.td" diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 95d61b6240c..d53cc153dc9 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -14,13 +14,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPUCallLowering.h" -#include "AMDGPU.h" #include "AMDGPUISelLowering.h" -#include "AMDGPUSubtarget.h" -#include "SIISelLowering.h" -#include "SIRegisterInfo.h" -#include "SIMachineFunctionInfo.h" -#include "llvm/CodeGen/CallingConvLower.h" + #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -35,135 +30,13 @@ AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) } bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, - const Value *Val, unsigned VReg) const { - MIRBuilder.buildInstr(AMDGPU::S_ENDPGM); + const Value *Val, unsigned VReg) const { return true; } -unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, - Type *ParamTy, - unsigned Offset) const { - - MachineFunction &MF = MIRBuilder.getMF(); - const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const Function &F = *MF.getFunction(); - const DataLayout &DL = F.getParent()->getDataLayout(); - PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); - LLT PtrType(*PtrTy, DL); - unsigned DstReg = MRI.createGenericVirtualRegister(PtrType); - unsigned KernArgSegmentPtr = - TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); - unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr); - - unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); - MIRBuilder.buildConstant(OffsetReg, Offset); - - MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg); - - return DstReg; -} - -void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, - Type *ParamTy, unsigned Offset, - unsigned DstReg) const { - MachineFunction &MF = MIRBuilder.getMF(); - const Function &F = *MF.getFunction(); - const DataLayout &DL = F.getParent()->getDataLayout(); - PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); - MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); - unsigned TypeSize = DL.getTypeStoreSize(ParamTy); - unsigned Align = DL.getABITypeAlignment(ParamTy); - unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset); - - MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | - MachineMemOperand::MONonTemporal | - MachineMemOperand::MOInvariant, - TypeSize, Align); - - MIRBuilder.buildLoad(DstReg, PtrReg, *MMO); -} - bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef VRegs) const { - - MachineFunction &MF = MIRBuilder.getMF(); - const SISubtarget *Subtarget = static_cast(&MF.getSubtarget()); - MachineRegisterInfo &MRI = MF.getRegInfo(); - SIMachineFunctionInfo *Info = MF.getInfo(); - const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - const DataLayout &DL = F.getParent()->getDataLayout(); - - SmallVector ArgLocs; - CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); - - // FIXME: How should these inputs interact with inreg / custom SGPR inputs? - if (Info->hasPrivateSegmentBuffer()) { - unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI); - MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass); - CCInfo.AllocateReg(PrivateSegmentBufferReg); - } - - if (Info->hasDispatchPtr()) { - unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI); - // FIXME: Need to add reg as live-in - CCInfo.AllocateReg(DispatchPtrReg); - } - - if (Info->hasQueuePtr()) { - unsigned QueuePtrReg = Info->addQueuePtr(*TRI); - // FIXME: Need to add reg as live-in - CCInfo.AllocateReg(QueuePtrReg); - } - - if (Info->hasKernargSegmentPtr()) { - unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI); - const LLT P2 = LLT::pointer(2, 64); - unsigned VReg = MRI.createGenericVirtualRegister(P2); - MRI.addLiveIn(InputPtrReg, VReg); - MIRBuilder.getMBB().addLiveIn(InputPtrReg); - MIRBuilder.buildCopy(VReg, InputPtrReg); - CCInfo.AllocateReg(InputPtrReg); - } - - if (Info->hasDispatchID()) { - unsigned DispatchIDReg = Info->addDispatchID(*TRI); - // FIXME: Need to add reg as live-in - CCInfo.AllocateReg(DispatchIDReg); - } - - if (Info->hasFlatScratchInit()) { - unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI); - // FIXME: Need to add reg as live-in - CCInfo.AllocateReg(FlatScratchInitReg); - } - - unsigned NumArgs = F.arg_size(); - Function::const_arg_iterator CurOrigArg = F.arg_begin(); - const AMDGPUTargetLowering &TLI = *getTLI(); - for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) { - CurOrigArg->getType()->dump(); - MVT ValVT = TLI.getValueType(DL, CurOrigArg->getType()).getSimpleVT(); - ISD::ArgFlagsTy Flags; - Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType())); - CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(), - /*IsVarArg=*/false); - bool Res = - AssignFn(i, ValVT, ValVT, CCValAssign::Full, Flags, CCInfo); - assert(!Res && "Call operand has unhandled type"); - (void)Res; - } - - Function::const_arg_iterator Arg = F.arg_begin(); - for (unsigned i = 0; i != NumArgs; ++i, ++Arg) { - // FIXME: We should be getting DebugInfo from the arguments some how. - CCValAssign &VA = ArgLocs[i]; - lowerParameter(MIRBuilder, Arg->getType(), - VA.getLocMemOffset() + - Subtarget->getExplicitKernelArgOffset(MF), VRegs[i]); - } - + // TODO: Implement once there are generic loads/stores. return true; } diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h index b5f3fa5617b..9ae87c9397a 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -22,13 +22,6 @@ namespace llvm { class AMDGPUTargetLowering; class AMDGPUCallLowering: public CallLowering { - - unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, - unsigned Offset) const; - - void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy, - unsigned Offset, unsigned DstReg) const; - public: AMDGPUCallLowering(const AMDGPUTargetLowering &TLI); @@ -36,7 +29,6 @@ class AMDGPUCallLowering: public CallLowering { unsigned VReg) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef VRegs) const override; - CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; }; } // End of namespace llvm; #endif diff --git a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def deleted file mode 100644 index 5cb9036f482..00000000000 --- a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def +++ /dev/null @@ -1,62 +0,0 @@ -//===- AMDGPUGenRegisterBankInfo.def -----------------------------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file defines all the static objects used by AMDGPURegisterBankInfo. -/// \todo This should be generated by TableGen. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_BUILD_GLOBAL_ISEL -#error "You shouldn't build this" -#endif - -namespace llvm { -namespace AMDGPU { - -enum PartialMappingIdx { - None = - 1, - PM_SGPR32 = 0, - PM_SGPR64 = 1, - PM_VGPR32 = 2, - PM_VGPR64 = 3 -}; - -const RegisterBankInfo::PartialMapping PartMappings[] { - // StartIdx, Length, RegBank - {0, 32, SGPRRegBank}, - {0, 64, SGPRRegBank}, - {0, 32, VGPRRegBank}, - {0, 64, VGPRRegBank} -}; - -const RegisterBankInfo::ValueMapping ValMappings[] { - // SGPR 32-bit - {&PartMappings[0], 1}, - // SGPR 64-bit - {&PartMappings[1], 1}, - // VGPR 32-bit - {&PartMappings[2], 1}, - // VGPR 64-bit - {&PartMappings[3], 1} -}; - -enum ValueMappingIdx { - SGPRStartIdx = 0, - VGPRStartIdx = 2 -}; - -const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID, - unsigned Size) { - assert(Size % 32 == 0); - unsigned Idx = BankID == AMDGPU::SGPRRegBankID ? SGPRStartIdx : VGPRStartIdx; - Idx += (Size / 32) - 1; - return &ValMappings[Idx]; -} - -} // End AMDGPU namespace. -} // End llvm namespace. diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index e7f15bcb081..631670807d0 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -15,7 +15,6 @@ #include "AMDGPUISelLowering.h" #include "AMDGPU.h" -#include "AMDGPUCallLowering.h" #include "AMDGPUFrameLowering.h" #include "AMDGPUIntrinsicInfo.h" #include "AMDGPURegisterInfo.h" @@ -671,11 +670,6 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { // TargetLowering Callbacks //===---------------------------------------------------------------------===// -CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC, - bool IsVarArg) const { - return CC_AMDGPU; -} - /// The SelectionDAGBuilder will automatically promote function arguments /// with illegal types. However, this does not work for the AMDGPU targets /// since the function arguments are stored in memory as these illegal types. diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp deleted file mode 100644 index 6e5e9825eb8..00000000000 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ /dev/null @@ -1,418 +0,0 @@ -//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements the targeting of the InstructionSelector class for -/// AMDGPU. -/// \todo This should be generated by TableGen. -//===----------------------------------------------------------------------===// - -#include "AMDGPUInstructionSelector.h" -#include "AMDGPUInstrInfo.h" -#include "AMDGPURegisterBankInfo.h" -#include "AMDGPURegisterInfo.h" -#include "AMDGPUSubtarget.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Type.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -#define DEBUG_TYPE "amdgpu-isel" - -using namespace llvm; - -AMDGPUInstructionSelector::AMDGPUInstructionSelector( - const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI) - : InstructionSelector(), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) {} - -MachineOperand -AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, - unsigned SubIdx) const { - - MachineInstr *MI = MO.getParent(); - MachineBasicBlock *BB = MO.getParent()->getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - - if (MO.isReg()) { - unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); - unsigned Reg = MO.getReg(); - BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) - .addReg(Reg, 0, ComposedSubIdx); - - return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), - MO.isKill(), MO.isDead(), MO.isUndef(), - MO.isEarlyClobber(), 0, MO.isDebug(), - MO.isInternalRead()); - } - - assert(MO.isImm()); - - APInt Imm(64, MO.getImm()); - - switch (SubIdx) { - default: - llvm_unreachable("do not know to split immediate with this sub index."); - case AMDGPU::sub0: - return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); - case AMDGPU::sub1: - return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); - } -} - -bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); - unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - - if (Size != 64) - return false; - - DebugLoc DL = I.getDebugLoc(); - - BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) - .add(getSubOperand64(I.getOperand(1), AMDGPU::sub0)) - .add(getSubOperand64(I.getOperand(2), AMDGPU::sub0)); - - BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) - .add(getSubOperand64(I.getOperand(1), AMDGPU::sub1)) - .add(getSubOperand64(I.getOperand(2), AMDGPU::sub1)); - - BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg()) - .addReg(DstLo) - .addImm(AMDGPU::sub0) - .addReg(DstHi) - .addImm(AMDGPU::sub1); - - for (MachineOperand &MO : I.explicit_operands()) { - if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - continue; - RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI); - } - - I.eraseFromParent(); - return true; -} - -bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { - return selectG_ADD(I); -} - -bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - DebugLoc DL = I.getDebugLoc(); - - // FIXME: Select store instruction based on address space - MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD)) - .add(I.getOperand(1)) - .add(I.getOperand(0)) - .addImm(0) - .addImm(0) - .addImm(0); - - // Now that we selected an opcode, we need to constrain the register - // operands to use appropriate classes. - bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); - - I.eraseFromParent(); - return Ret; -} - -bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned DstReg = I.getOperand(0).getReg(); - unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); - - if (Size == 32) { - I.setDesc(TII.get(AMDGPU::S_MOV_B32)); - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); - } - - assert(Size == 64); - - DebugLoc DL = I.getDebugLoc(); - unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - const APInt &Imm = I.getOperand(1).getCImm()->getValue(); - - BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg) - .addImm(Imm.trunc(32).getZExtValue()); - - BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg) - .addImm(Imm.ashr(32).getZExtValue()); - - BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) - .addReg(LoReg) - .addImm(AMDGPU::sub0) - .addReg(HiReg) - .addImm(AMDGPU::sub1); - // We can't call constrainSelectedInstRegOperands here, because it doesn't - // work for target independent opcodes - I.eraseFromParent(); - return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); -} - -static bool isConstant(const MachineInstr &MI) { - return MI.getOpcode() == TargetOpcode::G_CONSTANT; -} - -void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, - const MachineRegisterInfo &MRI, SmallVectorImpl &AddrInfo) const { - - const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); - - assert(PtrMI); - - if (PtrMI->getOpcode() != TargetOpcode::G_GEP) - return; - - GEPInfo GEPInfo(*PtrMI); - - for (unsigned i = 1, e = 3; i < e; ++i) { - const MachineOperand &GEPOp = PtrMI->getOperand(i); - const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); - assert(OpDef); - if (isConstant(*OpDef)) { - // FIXME: Is it possible to have multiple Imm parts? Maybe if we - // are lacking other optimizations. - assert(GEPInfo.Imm == 0); - GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); - continue; - } - const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); - if (OpBank->getID() == AMDGPU::SGPRRegBankID) - GEPInfo.SgprParts.push_back(GEPOp.getReg()); - else - GEPInfo.VgprParts.push_back(GEPOp.getReg()); - } - - AddrInfo.push_back(GEPInfo); - getAddrModeInfo(*PtrMI, MRI, AddrInfo); -} - -static bool isInstrUniform(const MachineInstr &MI) { - if (!MI.hasOneMemOperand()) - return false; - - const MachineMemOperand *MMO = *MI.memoperands_begin(); - const Value *Ptr = MMO->getValue(); - - // UndefValue means this is a load of a kernel input. These are uniform. - // Sometimes LDS instructions have constant pointers. - // If Ptr is null, then that means this mem operand contains a - // PseudoSourceValue like GOT. - if (!Ptr || isa(Ptr) || isa(Ptr) || - isa(Ptr) || isa(Ptr)) - return true; - - const Instruction *I = dyn_cast(Ptr); - return I && I->getMetadata("amdgpu.uniform"); -} - -static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) { - - if (LoadSize == 32) - return BaseOpcode; - - switch (BaseOpcode) { - case AMDGPU::S_LOAD_DWORD_IMM: - switch (LoadSize) { - case 64: - return AMDGPU::S_LOAD_DWORDX2_IMM; - case 128: - return AMDGPU::S_LOAD_DWORDX4_IMM; - case 256: - return AMDGPU::S_LOAD_DWORDX8_IMM; - case 512: - return AMDGPU::S_LOAD_DWORDX16_IMM; - } - break; - case AMDGPU::S_LOAD_DWORD_IMM_ci: - switch (LoadSize) { - case 64: - return AMDGPU::S_LOAD_DWORDX2_IMM_ci; - case 128: - return AMDGPU::S_LOAD_DWORDX4_IMM_ci; - case 256: - return AMDGPU::S_LOAD_DWORDX8_IMM_ci; - case 512: - return AMDGPU::S_LOAD_DWORDX16_IMM_ci; - } - break; - case AMDGPU::S_LOAD_DWORD_SGPR: - switch (LoadSize) { - case 64: - return AMDGPU::S_LOAD_DWORDX2_SGPR; - case 128: - return AMDGPU::S_LOAD_DWORDX4_SGPR; - case 256: - return AMDGPU::S_LOAD_DWORDX8_SGPR; - case 512: - return AMDGPU::S_LOAD_DWORDX16_SGPR; - } - break; - } - llvm_unreachable("Invalid base smrd opcode or size"); -} - -bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef AddrInfo) const { - for (const GEPInfo &GEPInfo : AddrInfo) { - if (!GEPInfo.VgprParts.empty()) - return true; - } - return false; -} - -bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I, - ArrayRef AddrInfo) const { - - if (!I.hasOneMemOperand()) - return false; - - if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS) - return false; - - if (!isInstrUniform(I)) - return false; - - if (hasVgprParts(AddrInfo)) - return false; - - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - const SISubtarget &Subtarget = MF->getSubtarget(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned DstReg = I.getOperand(0).getReg(); - const DebugLoc &DL = I.getDebugLoc(); - unsigned Opcode; - unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); - - if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) { - - const GEPInfo &GEPInfo = AddrInfo[0]; - - unsigned PtrReg = GEPInfo.SgprParts[0]; - int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm); - if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) { - Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); - - MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) - .addReg(PtrReg) - .addImm(EncodedImm) - .addImm(0); // glc - return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); - } - - if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS && - isUInt<32>(EncodedImm)) { - Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize); - MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) - .addReg(PtrReg) - .addImm(EncodedImm) - .addImm(0); // glc - return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); - } - - if (isUInt<32>(GEPInfo.Imm)) { - Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize); - unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg) - .addImm(GEPInfo.Imm); - - MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) - .addReg(PtrReg) - .addReg(OffsetReg) - .addImm(0); // glc - return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); - } - } - - unsigned PtrReg = I.getOperand(1).getReg(); - Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); - MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) - .addReg(PtrReg) - .addImm(0) - .addImm(0); // glc - return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); -} - - -bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - DebugLoc DL = I.getDebugLoc(); - unsigned DstReg = I.getOperand(0).getReg(); - unsigned PtrReg = I.getOperand(1).getReg(); - unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); - unsigned Opcode; - - SmallVector AddrInfo; - - getAddrModeInfo(I, MRI, AddrInfo); - - if (selectSMRD(I, AddrInfo)) { - I.eraseFromParent(); - return true; - } - - switch (LoadSize) { - default: - llvm_unreachable("Load size not supported\n"); - case 32: - Opcode = AMDGPU::FLAT_LOAD_DWORD; - break; - case 64: - Opcode = AMDGPU::FLAT_LOAD_DWORDX2; - break; - } - - MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) - .add(I.getOperand(0)) - .addReg(PtrReg) - .addImm(0) - .addImm(0) - .addImm(0); - - bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); - I.eraseFromParent(); - return Ret; -} - -bool AMDGPUInstructionSelector::select(MachineInstr &I) const { - - if (!isPreISelGenericOpcode(I.getOpcode())) - return true; - - switch (I.getOpcode()) { - default: - break; - case TargetOpcode::G_ADD: - return selectG_ADD(I); - case TargetOpcode::G_CONSTANT: - return selectG_CONSTANT(I); - case TargetOpcode::G_GEP: - return selectG_GEP(I); - case TargetOpcode::G_LOAD: - return selectG_LOAD(I); - case TargetOpcode::G_STORE: - return selectG_STORE(I); - } - return false; -} diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h deleted file mode 100644 index 783f1408b3d..00000000000 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ /dev/null @@ -1,65 +0,0 @@ -//===- AMDGPUInstructionSelector --------------------------------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file declares the targeting of the InstructionSelector class for -/// AMDGPU. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H -#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H - -#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" - -namespace llvm { - -class AMDGPUInstrInfo; -class AMDGPURegisterBankInfo; -class MachineInstr; -class MachineOperand; -class MachineRegisterInfo; -class SIInstrInfo; -class SIRegisterInfo; -class SISubtarget; - -class AMDGPUInstructionSelector : public InstructionSelector { -public: - AMDGPUInstructionSelector(const SISubtarget &STI, - const AMDGPURegisterBankInfo &RBI); - - bool select(MachineInstr &I) const override; - -private: - struct GEPInfo { - const MachineInstr &GEP; - SmallVector SgprParts; - SmallVector VgprParts; - int64_t Imm; - GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { } - }; - - MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const; - bool selectG_CONSTANT(MachineInstr &I) const; - bool selectG_ADD(MachineInstr &I) const; - bool selectG_GEP(MachineInstr &I) const; - bool hasVgprParts(ArrayRef AddrInfo) const; - void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI, - SmallVectorImpl &AddrInfo) const; - bool selectSMRD(MachineInstr &I, ArrayRef AddrInfo) const; - bool selectG_LOAD(MachineInstr &I) const; - bool selectG_STORE(MachineInstr &I) const; - - const SIInstrInfo &TII; - const SIRegisterInfo &TRI; - const AMDGPURegisterBankInfo &RBI; -}; - -} // End llvm namespace. -#endif diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp deleted file mode 100644 index a2567a54902..00000000000 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ /dev/null @@ -1,62 +0,0 @@ -//===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements the targeting of the Machinelegalizer class for -/// AMDGPU. -/// \todo This should be generated by TableGen. -//===----------------------------------------------------------------------===// - -#include "AMDGPULegalizerInfo.h" -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/Target/TargetOpcodes.h" -#include "llvm/Support/Debug.h" - -using namespace llvm; - -#ifndef LLVM_BUILD_GLOBAL_ISEL -#error "You shouldn't build this" -#endif - -AMDGPULegalizerInfo::AMDGPULegalizerInfo() { - using namespace TargetOpcode; - - const LLT S32 = LLT::scalar(32); - const LLT S64 = LLT::scalar(64); - const LLT P1 = LLT::pointer(1, 64); - const LLT P2 = LLT::pointer(2, 64); - - setAction({G_CONSTANT, S64}, Legal); - - setAction({G_GEP, P1}, Legal); - setAction({G_GEP, P2}, Legal); - setAction({G_GEP, 1, S64}, Legal); - - setAction({G_LOAD, P1}, Legal); - setAction({G_LOAD, P2}, Legal); - setAction({G_LOAD, S32}, Legal); - setAction({G_LOAD, 1, P1}, Legal); - setAction({G_LOAD, 1, P2}, Legal); - - setAction({G_STORE, S32}, Legal); - setAction({G_STORE, 1, P1}, Legal); - - // FIXME: When RegBankSelect inserts copies, it will only create new - // registers with scalar types. This means we can end up with - // G_LOAD/G_STORE/G_GEP instruction with scalar types for their pointer - // operands. In assert builds, the instruction selector will assert - // if it sees a generic instruction which isn't legal, so we need to - // tell it that scalar types are legal for pointer operands - setAction({G_GEP, S64}, Legal); - setAction({G_LOAD, 1, S64}, Legal); - setAction({G_STORE, 1, S64}, Legal); - - computeTables(); -} diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h deleted file mode 100644 index 291e3361f16..00000000000 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ /dev/null @@ -1,30 +0,0 @@ -//===- AMDGPULegalizerInfo ---------------------------------------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file declares the targeting of the Machinelegalizer class for -/// AMDGPU. -/// \todo This should be generated by TableGen. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H -#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H - -#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" - -namespace llvm { - -class LLVMContext; - -/// This class provides the information for the target register banks. -class AMDGPULegalizerInfo : public LegalizerInfo { -public: - AMDGPULegalizerInfo(); -}; -} // End llvm namespace. -#endif diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp deleted file mode 100644 index 78a4442e3ad..00000000000 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ /dev/null @@ -1,228 +0,0 @@ -//===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements the targeting of the RegisterBankInfo class for -/// AMDGPU. -/// \todo This should be generated by TableGen. -//===----------------------------------------------------------------------===// - -#include "AMDGPURegisterBankInfo.h" -#include "AMDGPUInstrInfo.h" -#include "SIRegisterInfo.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" -#include "llvm/IR/Constants.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" - -#define GET_TARGET_REGBANK_IMPL -#include "AMDGPUGenRegisterBank.inc" - -// This file will be TableGen'ed at some point. -#include "AMDGPUGenRegisterBankInfo.def" - -using namespace llvm; - -#ifndef LLVM_BUILD_GLOBAL_ISEL -#error "You shouldn't build this" -#endif - -AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI) - : AMDGPUGenRegisterBankInfo(), - TRI(static_cast(&TRI)) { - - // HACK: Until this is fully tablegen'd - static bool AlreadyInit = false; - if (AlreadyInit) - return; - - AlreadyInit = true; - - const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID); - assert(&RBSGPR == &AMDGPU::SGPRRegBank); - - const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID); - assert(&RBVGPR == &AMDGPU::VGPRRegBank); - -} - -unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &A, - const RegisterBank &B, - unsigned Size) const { - return RegisterBankInfo::copyCost(A, B, Size); -} - -const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass( - const TargetRegisterClass &RC) const { - - if (TRI->isSGPRClass(&RC)) - return getRegBank(AMDGPU::SGPRRegBankID); - - return getRegBank(AMDGPU::VGPRRegBankID); -} - -RegisterBankInfo::InstructionMappings -AMDGPURegisterBankInfo::getInstrAlternativeMappings( - const MachineInstr &MI) const { - - const MachineFunction &MF = *MI.getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - - unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); - - InstructionMappings AltMappings; - switch (MI.getOpcode()) { - case TargetOpcode::G_LOAD: { - // FIXME: Should we be hard coding the size for these mappings? - InstructionMapping SSMapping(1, 1, - getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), - AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), - 2); // Num Operands - AltMappings.emplace_back(std::move(SSMapping)); - - InstructionMapping VVMapping(2, 1, - getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), - AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}), - 2); // Num Operands - AltMappings.emplace_back(std::move(VVMapping)); - - // FIXME: Should this be the pointer-size (64-bits) or the size of the - // register that will hold the bufffer resourc (128-bits). - InstructionMapping VSMapping(3, 1, - getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), - AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), - 2); // Num Operands - AltMappings.emplace_back(std::move(VSMapping)); - - return AltMappings; - - } - default: - break; - } - return RegisterBankInfo::getInstrAlternativeMappings(MI); -} - -void AMDGPURegisterBankInfo::applyMappingImpl( - const OperandsMapper &OpdMapper) const { - return applyDefaultMapping(OpdMapper); -} - -static bool isInstrUniform(const MachineInstr &MI) { - if (!MI.hasOneMemOperand()) - return false; - - const MachineMemOperand *MMO = *MI.memoperands_begin(); - return AMDGPU::isUniformMMO(MMO); -} - -RegisterBankInfo::InstructionMapping -AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { - - const MachineFunction &MF = *MI.getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - RegisterBankInfo::InstructionMapping Mapping = - InstructionMapping{1, 1, nullptr, MI.getNumOperands()}; - SmallVector OpdsMapping(MI.getNumOperands()); - unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); - unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); - - const ValueMapping *ValMapping; - const ValueMapping *PtrMapping; - - if (isInstrUniform(MI)) { - // We have a uniform instruction so we want to use an SMRD load - ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); - PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize); - } else { - ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); - // FIXME: What would happen if we used SGPRRegBankID here? - PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize); - } - - OpdsMapping[0] = ValMapping; - OpdsMapping[1] = PtrMapping; - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - return Mapping; - - // FIXME: Do we want to add a mapping for FLAT load, or should we just - // handle that during instruction selection? -} - -RegisterBankInfo::InstructionMapping -AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { - RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI); - - if (Mapping.isValid()) - return Mapping; - - const MachineFunction &MF = *MI.getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()}; - SmallVector OpdsMapping(MI.getNumOperands()); - - switch (MI.getOpcode()) { - default: break; - case AMDGPU::G_CONSTANT: { - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - return Mapping; - } - case AMDGPU::G_GEP: { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - if (!MI.getOperand(i).isReg()) - continue; - - unsigned Size = MRI.getType(MI.getOperand(i).getReg()).getSizeInBits(); - OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); - } - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - return Mapping; - } - case AMDGPU::G_STORE: { - assert(MI.getOperand(0).isReg()); - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - // FIXME: We need to specify a different reg bank once scalar stores - // are supported. - const ValueMapping *ValMapping = - AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); - // FIXME: Depending on the type of store, the pointer could be in - // the SGPR Reg bank. - // FIXME: Pointer size should be based on the address space. - const ValueMapping *PtrMapping = - AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64); - - OpdsMapping[0] = ValMapping; - OpdsMapping[1] = PtrMapping; - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - return Mapping; - } - - case AMDGPU::G_LOAD: - return getInstrMappingForLoad(MI); - } - - unsigned BankID = AMDGPU::SGPRRegBankID; - - Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()}; - unsigned Size = 0; - for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) { - // If the operand is not a register default to the size of the previous - // operand. - // FIXME: Can't we pull the types from the MachineInstr rather than the - // operands. - if (MI.getOperand(Idx).isReg()) - Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI); - OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size)); - } - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - - return Mapping; -} diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h deleted file mode 100644 index f13bde87ef2..00000000000 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ /dev/null @@ -1,65 +0,0 @@ -//===- AMDGPURegisterBankInfo -----------------------------------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file declares the targeting of the RegisterBankInfo class for AMDGPU. -/// \todo This should be generated by TableGen. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H -#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H - -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" - -namespace llvm { - -class SIRegisterInfo; -class TargetRegisterInfo; - -namespace AMDGPU { -enum { - SGPRRegBankID = 0, - VGPRRegBankID = 1, - NumRegisterBanks -}; -} // End AMDGPU namespace. - -/// This class provides the information for the target register banks. -class AMDGPUGenRegisterBankInfo : public RegisterBankInfo { - -protected: - -#define GET_TARGET_REGBANK_CLASS -#include "AMDGPUGenRegisterBank.inc" - -}; -class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { - const SIRegisterInfo *TRI; - - /// See RegisterBankInfo::applyMapping. - void applyMappingImpl(const OperandsMapper &OpdMapper) const override; - - RegisterBankInfo::InstructionMapping - getInstrMappingForLoad(const MachineInstr &MI) const; - -public: - AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI); - - unsigned copyCost(const RegisterBank &A, const RegisterBank &B, - unsigned Size) const override; - - const RegisterBank & - getRegBankFromRegClass(const TargetRegisterClass &RC) const override; - - InstructionMappings - getInstrAlternativeMappings(const MachineInstr &MI) const override; - - InstructionMapping getInstrMapping(const MachineInstr &MI) const override; -}; -} // End llvm namespace. -#endif diff --git a/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/lib/Target/AMDGPU/AMDGPURegisterBanks.td deleted file mode 100644 index f4428e56035..00000000000 --- a/lib/Target/AMDGPU/AMDGPURegisterBanks.td +++ /dev/null @@ -1,16 +0,0 @@ -//=- AMDGPURegisterBank.td - Describe the AMDGPU Banks -------*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -def SGPRRegBank : RegisterBank<"SGPR", - [SReg_32, SReg_64, SReg_128, SReg_256, SReg_512] ->; - -def VGPRRegBank : RegisterBank<"VGPR", - [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512] ->; diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 28038145c38..bbe69003944 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -517,21 +517,6 @@ public: return GISel->getCallLowering(); } - const InstructionSelector *getInstructionSelector() const override { - assert(GISel && "Access to GlobalISel APIs not set"); - return GISel->getInstructionSelector(); - } - - const LegalizerInfo *getLegalizerInfo() const { - assert(GISel && "Access to GlobalISel APIs not set"); - return GISel->getLegalizerInfo(); - } - - const RegisterBankInfo *getRegBankInfo() const override { - assert(GISel && "Access to GlobalISel APIs not set"); - return GISel->getRegBankInfo(); - } - const SIRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 20593c1eedb..e4f693267e2 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -16,18 +16,18 @@ #include "AMDGPUTargetMachine.h" #include "AMDGPU.h" #include "AMDGPUCallLowering.h" -#include "AMDGPUInstructionSelector.h" -#include "AMDGPULegalizerInfo.h" -#include "AMDGPURegisterBankInfo.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPUTargetTransformInfo.h" #include "GCNSchedStrategy.h" #include "R600MachineScheduler.h" #include "SIMachineScheduler.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" -#include "llvm/CodeGen/GlobalISel/Legalizer.h" -#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/TargetRegistry.h" @@ -256,21 +256,9 @@ namespace { struct SIGISelActualAccessor : public GISelAccessor { std::unique_ptr CallLoweringInfo; - std::unique_ptr InstSelector; - std::unique_ptr Legalizer; - std::unique_ptr RegBankInfo; const AMDGPUCallLowering *getCallLowering() const override { return CallLoweringInfo.get(); } - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } }; } // end anonymous namespace @@ -304,11 +292,6 @@ const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { SIGISelActualAccessor *GISel = new SIGISelActualAccessor(); GISel->CallLoweringInfo.reset( new AMDGPUCallLowering(*I->getTargetLowering())); - GISel->Legalizer.reset(new AMDGPULegalizerInfo()); - - GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*I->getRegisterInfo())); - GISel->InstSelector.reset(new AMDGPUInstructionSelector(*I, - *static_cast(GISel->RegBankInfo.get()))); #endif I->setGISelAccessor(*GISel); @@ -609,20 +592,16 @@ bool GCNPassConfig::addIRTranslator() { } bool GCNPassConfig::addLegalizeMachineIR() { - addPass(new Legalizer()); return false; } bool GCNPassConfig::addRegBankSelect() { - addPass(new RegBankSelect()); return false; } bool GCNPassConfig::addGlobalInstructionSelect() { - addPass(new InstructionSelect()); return false; } - #endif void GCNPassConfig::addPreRegAlloc() { diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index 65853bb6a51..bbd06b19d63 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -12,17 +12,11 @@ tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM AMDGPUGenMCPseudoLowering.inc -gen-pseudo-lowering) -if(LLVM_BUILD_GLOBAL_ISEL) - tablegen(LLVM AMDGPUGenRegisterBank.inc -gen-register-bank) -endif() add_public_tablegen_target(AMDGPUCommonTableGen) # List of all GlobalISel files. set(GLOBAL_ISEL_FILES AMDGPUCallLowering.cpp - AMDGPUInstructionSelector.cpp - AMDGPULegalizerInfo.cpp - AMDGPURegisterBankInfo.cpp ) # Add GlobalISel files to the dependencies if the user wants to build it. diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 70ed40ec3b0..e84ed9bb5ef 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -10,10 +10,10 @@ #include "AMDGPU.h" #include "SIDefines.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir deleted file mode 100644 index 4904cfdf328..00000000000 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ /dev/null @@ -1,25 +0,0 @@ -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN - ---- | - define void @global_addrspace(i32 addrspace(1)* %global0) { ret void } -... ---- - -name: global_addrspace -legalized: true -regBankSelected: true - -# GCN: global_addrspace -# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1 -# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0 - -body: | - bb.0: - liveins: %vgpr0_vgpr1 - - %0:vgpr(p1) = COPY %vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load 4 from %ir.global0) - -... ---- diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir deleted file mode 100644 index 57f3f3c8d4b..00000000000 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir +++ /dev/null @@ -1,141 +0,0 @@ -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI - ---- | - define void @smrd_imm(i32 addrspace(2)* %const0) { ret void } -... ---- - -name: smrd_imm -legalized: true -regBankSelected: true - -# GCN: body: -# GCN: [[PTR:%[0-9]+]] = COPY %sgpr0_sgpr1 - -# Immediate offset: -# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0 -# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0 - -# Max immediate offset for SI -# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0 -# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0 - -# Immediate overflow for SI -# FIXME: The immediate gets selected twice, once into the -# S_LOAD_DWORD instruction and once just as a normal constat. -# SI: S_MOV_B32 1024 -# SI: [[K1024:%[0-9]+]] = S_MOV_B32 1024 -# SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0 -# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0 -# VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0 - -# Max immediate offset for VI -# SI: S_MOV_B32 1048572 -# SI: [[K1048572:%[0-9]+]] = S_MOV_B32 1048572 -# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143 -# VI: S_LOAD_DWORD_IMM [[PTR]], 1048572 - -# -# Immediate overflow for VI -# FIXME: The immediate gets selected twice, once into the -# S_LOAD_DWORD instruction and once just as a normal constat. -# SIVI: S_MOV_B32 1048576 -# SIVI: [[K1048576:%[0-9]+]] = S_MOV_B32 1048576 -# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0 -# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0 - -# Max immediate for CI -# SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 4294967292 -# SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 3 -# SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 -# SIVI: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0 -# SIVI: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0 -# SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# SIVI: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1 -# SIVI: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1 -# SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] -# SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 -# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 -# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0 - -# Immediate overflow for CI -# GCN: [[K_LO:%[0-9]+]] = S_MOV_B32 0 -# GCN: [[K_HI:%[0-9]+]] = S_MOV_B32 4 -# GCN: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 -# GCN: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0 -# GCN: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0 -# GCN: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# GCN: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1 -# GCN: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1 -# GCN: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] -# GCN: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 -# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 - -# Max 32-bit byte offset -# FIXME: The immediate gets selected twice, once into the -# S_LOAD_DWORD instruction and once just as a normal constat. -# SIVI: S_MOV_B32 4294967292 -# SIVI: [[K4294967292:%[0-9]+]] = S_MOV_B32 4294967292 -# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0 -# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0 - -# Overflow 32-bit byte offset -# SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 0 -# SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 1 -# SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 -# SIVI: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0 -# SIVI: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0 -# SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# SIVI: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1 -# SIVI: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1 -# SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] -# SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 -# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 -# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0 - -body: | - bb.0: - liveins: %sgpr0_sgpr1 - - %0:sgpr(p2) = COPY %sgpr0_sgpr1 - - %1:sgpr(s64) = G_CONSTANT i64 4 - %2:sgpr(p2) = G_GEP %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load 4 from %ir.const0) - - %4:sgpr(s64) = G_CONSTANT i64 1020 - %5:sgpr(p2) = G_GEP %0, %4 - %6:sgpr(s32) = G_LOAD %5 :: (load 4 from %ir.const0) - - %7:sgpr(s64) = G_CONSTANT i64 1024 - %8:sgpr(p2) = G_GEP %0, %7 - %9:sgpr(s32) = G_LOAD %8 :: (load 4 from %ir.const0) - - %10:sgpr(s64) = G_CONSTANT i64 1048572 - %11:sgpr(p2) = G_GEP %0, %10 - %12:sgpr(s32) = G_LOAD %11 :: (load 4 from %ir.const0) - - %13:sgpr(s64) = G_CONSTANT i64 1048576 - %14:sgpr(p2) = G_GEP %0, %13 - %15:sgpr(s32) = G_LOAD %14 :: (load 4 from %ir.const0) - - %16:sgpr(s64) = G_CONSTANT i64 17179869180 - %17:sgpr(p2) = G_GEP %0, %16 - %18:sgpr(s32) = G_LOAD %17 :: (load 4 from %ir.const0) - - %19:sgpr(s64) = G_CONSTANT i64 17179869184 - %20:sgpr(p2) = G_GEP %0, %19 - %21:sgpr(s32) = G_LOAD %20 :: (load 4 from %ir.const0) - - %22:sgpr(s64) = G_CONSTANT i64 4294967292 - %23:sgpr(p2) = G_GEP %0, %22 - %24:sgpr(s32) = G_LOAD %23 :: (load 4 from %ir.const0) - - %25:sgpr(s64) = G_CONSTANT i64 4294967296 - %26:sgpr(p2) = G_GEP %0, %25 - %27:sgpr(s32) = G_LOAD %26 :: (load 4 from %ir.const0) - -... ---- diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir deleted file mode 100644 index 9dc4258a9f4..00000000000 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ /dev/null @@ -1,27 +0,0 @@ -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN - ---- | - define void @global_addrspace(i32 addrspace(1)* %global0) { ret void } -... ---- - -name: global_addrspace -legalized: true -regBankSelected: true - -# GCN: global_addrspace -# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1 -# GCN: [[VAL:%[0-9]+]] = COPY %vgpr2 -# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0 - -body: | - bb.0: - liveins: %vgpr0_vgpr1, %vgpr2 - - %0:vgpr(p1) = COPY %vgpr0_vgpr1 - %1:vgpr(s32) = COPY %vgpr2 - G_STORE %1, %0 :: (store 4 into %ir.global0) - -... ---- diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir deleted file mode 100644 index 9be9f07c706..00000000000 --- a/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir +++ /dev/null @@ -1,67 +0,0 @@ -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=regbankselect -global-isel %s -verify-machineinstrs -o - | FileCheck %s - ---- | - define void @load_constant(i32 addrspace(2)* %ptr0) { ret void } - define void @load_global_uniform(i32 addrspace(1)* %ptr1) { - %tmp0 = load i32, i32 addrspace(1)* %ptr1 - ret void - } - define void @load_global_non_uniform(i32 addrspace(1)* %ptr2) { - %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0 - %tmp1 = getelementptr i32, i32 addrspace(1)* %ptr2, i32 %tmp0 - %tmp2 = load i32, i32 addrspace(1)* %tmp1 - ret void - } - declare i32 @llvm.amdgcn.workitem.id.x() #0 - attributes #0 = { nounwind readnone } -... - ---- -name : load_constant -legalized: true - -# CHECK-LABEL: name: load_constant -# CHECK: registers: -# CHECK: - { id: 0, class: sgpr } -# CHECK: - { id: 1, class: sgpr } - -body: | - bb.0: - liveins: %sgpr0_sgpr1 - %0:_(p2) = COPY %sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr0) -... - ---- -name: load_global_uniform -legalized: true - -# CHECK-LABEL: name: load_global_uniform -# CHECK: registers: -# CHECK: - { id: 0, class: sgpr } -# CHECK: - { id: 1, class: sgpr } - -body: | - bb.0: - liveins: %sgpr0_sgpr1 - %0:_(p1) = COPY %sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr1) -... - ---- -name: load_global_non_uniform -legalized: true - -# CHECK-LABEL: name: load_global_non_uniform -# CHECK: registers: -# CHECK: - { id: 0, class: sgpr } -# CHECK: - { id: 1, class: vgpr } -# CHECK: - { id: 2, class: vgpr } - - -body: | - bb.0: - liveins: %sgpr0_sgpr1 - %0:_(p1) = COPY %sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.tmp1) -... diff --git a/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll b/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll deleted file mode 100644 index 23ab6336677..00000000000 --- a/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll +++ /dev/null @@ -1,9 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=GCN %s - -; GCN-LABEL: vs_epilog -; GCN: s_endpgm - -define amdgpu_vs void @vs_epilog() { -main_body: - ret void -} diff --git a/test/CodeGen/AMDGPU/GlobalISel/smrd.ll b/test/CodeGen/AMDGPU/GlobalISel/smrd.ll deleted file mode 100644 index 47308cd92d0..00000000000 --- a/test/CodeGen/AMDGPU/GlobalISel/smrd.ll +++ /dev/null @@ -1,87 +0,0 @@ -; FIXME: Need to add support for mubuf stores to enable this on SI. -; XUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s -; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=CI --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s - -; SMRD load with an immediate offset. -; GCN-LABEL: {{^}}smrd0: -; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01 -; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 -define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { -entry: - %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1 - %1 = load i32, i32 addrspace(2)* %0 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; SMRD load with the largest possible immediate offset. -; GCN-LABEL: {{^}}smrd1: -; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}} -; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc -define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { -entry: - %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255 - %1 = load i32, i32 addrspace(2)* %0 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; SMRD load with an offset greater than the largest possible immediate. -; GCN-LABEL: {{^}}smrd2: -; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 -; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] -; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100 -; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400 -; GCN: s_endpgm -define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { -entry: - %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256 - %1 = load i32, i32 addrspace(2)* %0 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; SMRD load with a 64-bit offset -; GCN-LABEL: {{^}}smrd3: -; FIXME: There are too many copies here because we don't fold immediates -; through REG_SEQUENCE -; XSI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b -; TODO: Add VI checks -; XGCN: s_endpgm -define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { -entry: - %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32 - %1 = load i32, i32 addrspace(2)* %0 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; SMRD load with the largest possible immediate offset on VI -; GCN-LABEL: {{^}}smrd4: -; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc -; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] -; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff -; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc -define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { -entry: - %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143 - %1 = load i32, i32 addrspace(2)* %0 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; SMRD load with an offset greater than the largest possible immediate on VI -; GCN-LABEL: {{^}}smrd5: -; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000 -; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] -; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000 -; GCN: s_endpgm -define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { -entry: - %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144 - %1 = load i32, i32 addrspace(2)* %0 - store i32 %1, i32 addrspace(1)* %out - ret void -} -