From b664d47cb06f3cdaaf95387c293e31e650031db0 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 21 Jul 2014 14:01:14 +0000 Subject: [PATCH] R600/SI: Store constant initializer data in constant memory This implements a solution for constant initializers suggested by Vadim Girlin, where we store the data after the shader code and then use the S_GETPC instruction to compute its address. This saves use the trouble of creating a new buffer for constant data and then having to pass the pointer to the kernel via user SGPRs or the input buffer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213530 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.h | 8 +++++ lib/Target/R600/AMDGPUAsmPrinter.cpp | 10 ++++++ lib/Target/R600/AMDGPUAsmPrinter.h | 2 ++ lib/Target/R600/AMDGPUISelLowering.cpp | 17 +--------- lib/Target/R600/AMDGPUISelLowering.h | 6 ++-- lib/Target/R600/AMDGPUMCInstLower.cpp | 16 ++++++++++ .../R600/MCTargetDesc/AMDGPUAsmBackend.cpp | 31 ++++++++++++++++--- .../MCTargetDesc/AMDGPUELFObjectWriter.cpp | 3 +- .../R600/MCTargetDesc/AMDGPUFixupKinds.h | 6 ++++ .../R600/MCTargetDesc/SIMCCodeEmitter.cpp | 29 +++++++++++++++-- lib/Target/R600/R600ISelLowering.cpp | 14 +++++++++ lib/Target/R600/SIISelLowering.cpp | 28 +++++++++++++++++ lib/Target/R600/SIISelLowering.h | 2 ++ lib/Target/R600/SIInstrInfo.cpp | 20 ++++++++++++ lib/Target/R600/SIInstrInfo.td | 4 +++ lib/Target/R600/SIInstructions.td | 16 +++++++++- test/CodeGen/R600/gv-const-addrspace.ll | 10 ++++-- .../R600/large-constant-initializer.ll | 3 +- 18 files changed, 194 insertions(+), 31 deletions(-) diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 713fc4b8f32..416e050f675 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -63,6 +63,14 @@ extern char &SIFixSGPRLiveRangesID; extern Target TheAMDGPUTarget; +namespace AMDGPU { +enum TargetIndex { + TI_CONSTDATA_START +}; +} + +#define END_OF_TEXT_LABEL_NAME "EndOfTextLabel" + } // End namespace llvm namespace ShaderType { diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index aaef1405157..257f72e5ce6 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -85,6 +85,16 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) DisasmEnabled = TM.getSubtarget().dumpCode(); } +void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { + + // This label is used to mark the end of the .text section. + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + OutStreamer.SwitchSection(TLOF.getTextSection()); + MCSymbol *EndOfTextLabel = + OutContext.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); + OutStreamer.EmitLabel(EndOfTextLabel); +} + bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h index 59b87113ed4..fc2d58915e7 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.h +++ b/lib/Target/R600/AMDGPUAsmPrinter.h @@ -70,6 +70,8 @@ public: /// Implemented in AMDGPUMCInstLower.cpp void EmitInstruction(const MachineInstr *MI) override; + void EmitEndOfAsmFile(Module &M) override; + protected: bool DisasmEnabled; std::vector DisasmLines, HexLines; diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index c8120ae5c35..ffd6357a9a0 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -21,7 +21,6 @@ #include "AMDGPUSubtarget.h" #include "R600MachineFunctionInfo.h" #include "SIMachineFunctionInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -1177,21 +1176,6 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(Ops, DL); } - // Lower loads constant address space global variable loads - if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && - isa( - GetUnderlyingObject(Load->getMemOperand()->getValue()))) { - - - SDValue Ptr = DAG.getZExtOrTrunc(Load->getBasePtr(), DL, - getPointerTy(AMDGPUAS::PRIVATE_ADDRESS)); - Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, - DAG.getConstant(2, MVT::i32)); - return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(), - Load->getChain(), Ptr, - DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2)); - } - if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS || ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32)) return SDValue(); @@ -2222,6 +2206,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CVT_F32_UBYTE2) NODE_NAME_CASE(CVT_F32_UBYTE3) NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) + NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(STORE_MSKOR) NODE_NAME_CASE(TBUFFER_STORE_FORMAT) } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 4445f81bcc2..0865645e077 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -78,8 +78,8 @@ protected: virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const; - SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, - SelectionDAG &DAG) const; + virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, + SelectionDAG &DAG) const; /// \brief Split a vector load into multiple scalar loads. SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const; SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; @@ -233,6 +233,8 @@ enum { /// T2|v.z| | | | /// T3|v.w| | | | BUILD_VERTICAL_VECTOR, + /// Pointer to the start of the shader's constant data. + CONST_DATA_PTR, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp index ac82e88c926..ce5c41ceb26 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.cpp +++ b/lib/Target/R600/AMDGPUMCInstLower.cpp @@ -22,7 +22,9 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectStreamer.h" @@ -77,6 +79,20 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { case MachineOperand::MO_MachineBasicBlock: MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( MO.getMBB()->getSymbol(), Ctx)); + break; + case MachineOperand::MO_GlobalAddress: { + const GlobalValue *GV = MO.getGlobal(); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(GV->getName())); + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(Sym, Ctx)); + break; + } + case MachineOperand::MO_TargetIndex: { + assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + MCOp = MCOperand::CreateExpr(Expr); + break; + } } OutMI.addOperand(MCOp); } diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp index f8228714a24..d55f27b0455 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -45,7 +45,7 @@ public: AMDGPUAsmBackend(const Target &T) : MCAsmBackend() {} - unsigned getNumFixupKinds() const override { return 0; }; + unsigned getNumFixupKinds() const override { return AMDGPU::NumTargetFixupKinds; }; void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, @@ -77,16 +77,37 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const { - uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset()); - assert(Fixup.getKind() == FK_PCRel_4); - *Dst = (Value - 4) / 4; + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("Unknown fixup kind"); + case AMDGPU::fixup_si_sopp_br: { + uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset()); + *Dst = (Value - 4) / 4; + break; + } + + case AMDGPU::fixup_si_rodata: { + uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset()); + *Dst = Value; + break; + } + + case AMDGPU::fixup_si_end_of_text: { + uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset()); + // The value points to the last instruction in the text section, so we + // need to add 4 bytes to get to the start of the constants. + *Dst = Value + 4; + break; + } + } } const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo( MCFixupKind Kind) const { const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = { // name offset bits flags - { "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel } + { "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_si_rodata", 0, 32, 0 }, + { "fixup_si_end_of_text", 0, 32, MCFixupKindInfo::FKF_IsPCRel } }; if (Kind < FirstTargetFixupKind) diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 53b0e85751d..5fb94d5914d 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -10,6 +10,7 @@ #include "AMDGPUMCTargetDesc.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixup.h" using namespace llvm; @@ -21,7 +22,7 @@ public: protected: unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override { - llvm_unreachable("Not implemented"); + return Fixup.getKind(); } }; diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h b/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h index ef64b40b3a6..4b12e548a56 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h @@ -18,6 +18,12 @@ enum Fixups { /// 16-bit PC relative fixup for SOPP branch instructions. fixup_si_sopp_br = FirstTargetFixupKind, + /// fixup for global addresses with constant initializers + fixup_si_rodata, + + /// fixup for offset from instruction to end of text section + fixup_si_end_of_text, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index 5e674d6394d..78776c11d75 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "AMDGPU.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" #include "MCTargetDesc/AMDGPUFixupKinds.h" @@ -40,6 +41,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCRegisterInfo &MRI; + MCContext &Ctx; /// \brief Can this operand also contain immediate values? bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const; @@ -50,7 +52,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { public: SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, MCContext &ctx) - : MCII(mcii), MRI(mri) { } + : MCII(mcii), MRI(mri), Ctx(ctx) { } ~SIMCCodeEmitter() { } @@ -97,6 +99,8 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const { Imm.I = MO.getImm(); else if (MO.isFPImm()) Imm.F = MO.getFPImm(); + else if (MO.isExpr()) + return 255; else return ~0; @@ -164,8 +168,13 @@ void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, IntFloatUnion Imm; if (Op.isImm()) Imm.I = Op.getImm(); - else + else if (Op.isFPImm()) Imm.F = Op.getFPImm(); + else { + assert(Op.isExpr()); + // This will be replaced with a fixup value. + Imm.I = 0; + } for (unsigned j = 0; j < 4; j++) { OS.write((uint8_t) ((Imm.I >> (8 * j)) & 0xff)); @@ -198,6 +207,22 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, if (MO.isReg()) return MRI.getEncodingValue(MO.getReg()); + if (MO.isExpr()) { + const MCSymbolRefExpr *Expr = cast(MO.getExpr()); + MCFixupKind Kind; + const MCSymbol *Sym = + Ctx.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); + + if (&Expr->getSymbol() == Sym) { + // Add the offset to the beginning of the constant values. + Kind = (MCFixupKind)AMDGPU::fixup_si_end_of_text; + } else { + // This is used for constant data stored in .rodata. + Kind = (MCFixupKind)AMDGPU::fixup_si_rodata; + } + Fixups.push_back(MCFixup::Create(4, Expr, Kind, MI.getLoc())); + } + // Figure out the operand number, needed for isSrcOperand check unsigned OpNo = 0; for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) { diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index c9e072a8327..52315bf0f33 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -19,6 +19,7 @@ #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -1526,6 +1527,19 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const return DAG.getMergeValues(Ops, DL); } + // Lower loads constant address space global variable loads + if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + isa( + GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) { + + SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL, + getPointerTy(AMDGPUAS::PRIVATE_ADDRESS)); + Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, + DAG.getConstant(2, MVT::i32)); + return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(), + LoadNode->getChain(), Ptr, + DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2)); + } if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) { SDValue MergedValues[2] = { diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index b3e3997a305..b2a8f1a9cb4 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -860,6 +860,34 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, return Chain; } +SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, + SDValue Op, + SelectionDAG &DAG) const { + GlobalAddressSDNode *GSD = cast(Op); + + if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) + return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); + + SDLoc DL(GSD); + const GlobalValue *GV = GSD->getGlobal(); + MVT PtrVT = getPointerTy(GSD->getAddressSpace()); + + SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT); + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); + + SDValue PtrLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr, + DAG.getConstant(0, MVT::i32)); + SDValue PtrHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr, + DAG.getConstant(1, MVT::i32)); + + SDValue Lo = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i32, MVT::Glue), + PtrLo, GA); + SDValue Hi = DAG.getNode(ISD::ADDE, DL, DAG.getVTList(MVT::i32, MVT::Glue), + PtrHi, DAG.getConstant(0, MVT::i32), + SDValue(Lo.getNode(), 1)); + return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); +} + SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); LoadSDNode *Load = cast(Op); diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index b3343ee6694..6c2c00989ff 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -25,6 +25,8 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue Chain, unsigned Offset, bool Signed) const; SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, + SelectionDAG &DAG) const override; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 455c8904122..15c9a5e1cdc 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -361,6 +361,26 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MI->eraseFromParent(); break; } + case AMDGPU::SI_CONSTDATA_PTR: { + unsigned Reg = MI->getOperand(0).getReg(); + unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0); + unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1); + + BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg); + + // Add 32-bit offset from this instruction to the start of the constant data. + BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_I32), RegLo) + .addReg(RegLo) + .addTargetIndex(AMDGPU::TI_CONSTDATA_START) + .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit); + BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi) + .addReg(RegHi) + .addImm(0) + .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit) + .addReg(AMDGPU::SCC, RegState::Implicit); + MI->eraseFromParent(); + break; + } } return true; } diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 7093db6d41c..c1f22a5d26d 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -57,6 +57,10 @@ def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">; def SIsampled : SDSample<"AMDGPUISD::SAMPLED">; def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">; +def SIconstdata_ptr : SDNode< + "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]> +>; + // Transformation function, extract the lower 32bit of a 64bit immediate def LO32 : SDNodeXFormgetTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 15f77b3d0b3..c97de9297ab 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -139,7 +139,11 @@ def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", ////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>; ////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>; ////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>; -def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>; +def S_GETPC_B64 : SOP1 < + 0x0000001f, (outs SReg_64:$dst), (ins), "S_GETPC_B64 $dst", [] +> { + let SSRC0 = 0; +} def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>; def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>; def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>; @@ -1694,6 +1698,16 @@ defm SI_SPILL_S128 : SI_SPILL_SGPR ; defm SI_SPILL_S256 : SI_SPILL_SGPR ; defm SI_SPILL_S512 : SI_SPILL_SGPR ; +let Defs = [SCC] in { + +def SI_CONSTDATA_PTR : InstSI < + (outs SReg_64:$dst), + (ins), + "", [(set SReg_64:$dst, (i64 SIconstdata_ptr))] +>; + +} // End Defs = [SCC] + } // end IsCodeGenOnly, isPseudo } // end SubtargetPredicate = SI diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll index db64a6fe8c7..074d9087ee6 100644 --- a/test/CodeGen/R600/gv-const-addrspace.ll +++ b/test/CodeGen/R600/gv-const-addrspace.ll @@ -4,11 +4,11 @@ @b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2 -; XXX: Test on SI once 64-bit adds are supportes. - @float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4 ; FUNC-LABEL: @float +; FIXME: We should be using S_LOAD_DWORD here. +; SI: BUFFER_LOAD_DWORD ; EG-DAG: MOV {{\** *}}T2.X ; EG-DAG: MOV {{\** *}}T3.X @@ -29,6 +29,9 @@ entry: ; FUNC-LABEL: @i32 +; FIXME: We should be using S_LOAD_DWORD here. +; SI: BUFFER_LOAD_DWORD + ; EG-DAG: MOV {{\** *}}T2.X ; EG-DAG: MOV {{\** *}}T3.X ; EG-DAG: MOV {{\** *}}T4.X @@ -50,6 +53,7 @@ entry: @struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ] ; FUNC-LABEL: @struct_foo_gv_load +; SI: S_LOAD_DWORD define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { %gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index @@ -64,6 +68,8 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { <1 x i32> ] ; FUNC-LABEL: @array_v1_gv_load +; FIXME: We should be using S_LOAD_DWORD here. +; SI: BUFFER_LOAD_DWORD define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) { %gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index %load = load <1 x i32> addrspace(2)* %gep, align 4 diff --git a/test/CodeGen/R600/large-constant-initializer.ll b/test/CodeGen/R600/large-constant-initializer.ll index 552cd05e137..191b5c3de91 100644 --- a/test/CodeGen/R600/large-constant-initializer.ll +++ b/test/CodeGen/R600/large-constant-initializer.ll @@ -1,6 +1,5 @@ -; XFAIL: * -; REQUIRES: asserts ; RUN: llc -march=r600 -mcpu=SI < %s +; CHECK: S_ENDPGM @gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4