diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index a65dd65b6e6..e0ed7216c15 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -21,6 +21,7 @@ #include "AMDILIntrinsicInfo.h" #include "R600MachineFunctionInfo.h" #include "SIMachineFunctionInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -276,32 +277,106 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) return Op; } +SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, + const GlobalValue *GV, + const SDValue &InitPtr, + SDValue Chain, + SelectionDAG &DAG) const { + const DataLayout *TD = getTargetMachine().getDataLayout(); + SDLoc DL(InitPtr); + if (const ConstantInt *CI = dyn_cast(Init)) { + EVT VT = EVT::getEVT(CI->getType()); + PointerType *PtrTy = PointerType::get(CI->getType(), 0); + return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr, + MachinePointerInfo(UndefValue::get(PtrTy)), false, false, + TD->getPrefTypeAlignment(CI->getType())); + } else if (const ConstantFP *CFP = dyn_cast(Init)) { + EVT VT = EVT::getEVT(CFP->getType()); + PointerType *PtrTy = PointerType::get(CFP->getType(), 0); + return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, VT), InitPtr, + MachinePointerInfo(UndefValue::get(PtrTy)), false, false, + TD->getPrefTypeAlignment(CFP->getType())); + } else if (Init->getType()->isAggregateType()) { + EVT PtrVT = InitPtr.getValueType(); + unsigned NumElements = Init->getType()->getArrayNumElements(); + SmallVector Chains; + for (unsigned i = 0; i < NumElements; ++i) { + SDValue Offset = DAG.getConstant(i * TD->getTypeAllocSize( + Init->getType()->getArrayElementType()), PtrVT); + SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset); + Chains.push_back(LowerConstantInitializer(Init->getAggregateElement(i), + GV, Ptr, Chain, DAG)); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], + Chains.size()); + } else { + Init->dump(); + llvm_unreachable("Unhandled constant initializer"); + } +} + SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, SDValue Op, SelectionDAG &DAG) const { const DataLayout *TD = getTargetMachine().getDataLayout(); GlobalAddressSDNode *G = cast(Op); - - assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS); - // XXX: What does the value of G->getOffset() mean? - assert(G->getOffset() == 0 && - "Do not know what to do with an non-zero offset"); - const GlobalValue *GV = G->getGlobal(); - unsigned Offset; - if (MFI->LocalMemoryObjects.count(GV) == 0) { - uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); - Offset = MFI->LDSSize; - MFI->LocalMemoryObjects[GV] = Offset; - // XXX: Account for alignment? - MFI->LDSSize += Size; - } else { - Offset = MFI->LocalMemoryObjects[GV]; - } + switch (G->getAddressSpace()) { + default: llvm_unreachable("Global Address lowering not implemented for this " + "address space"); + case AMDGPUAS::LOCAL_ADDRESS: { + // XXX: What does the value of G->getOffset() mean? + assert(G->getOffset() == 0 && + "Do not know what to do with an non-zero offset"); - return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace())); + unsigned Offset; + if (MFI->LocalMemoryObjects.count(GV) == 0) { + uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + Offset = MFI->LDSSize; + MFI->LocalMemoryObjects[GV] = Offset; + // XXX: Account for alignment? + MFI->LDSSize += Size; + } else { + Offset = MFI->LocalMemoryObjects[GV]; + } + + return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace())); + } + case AMDGPUAS::CONSTANT_ADDRESS: { + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + Type *EltType = GV->getType()->getElementType(); + unsigned Size = TD->getTypeAllocSize(EltType); + unsigned Alignment = TD->getPrefTypeAlignment(EltType); + + const GlobalVariable *Var = dyn_cast(GV); + const Constant *Init = Var->getInitializer(); + int FI = FrameInfo->CreateStackObject(Size, Alignment, false); + SDValue InitPtr = DAG.getFrameIndex(FI, + getPointerTy(AMDGPUAS::PRIVATE_ADDRESS)); + SmallVector WorkList; + + for (SDNode::use_iterator I = DAG.getEntryNode()->use_begin(), + E = DAG.getEntryNode()->use_end(); I != E; ++I) { + if (I->getOpcode() != AMDGPUISD::REGISTER_LOAD && I->getOpcode() != ISD::LOAD) + continue; + WorkList.push_back(*I); + } + SDValue Chain = LowerConstantInitializer(Init, GV, InitPtr, DAG.getEntryNode(), DAG); + for (SmallVector::iterator I = WorkList.begin(), + E = WorkList.end(); I != E; ++I) { + SmallVector Ops; + Ops.push_back(Chain); + for (unsigned i = 1; i < (*I)->getNumOperands(); ++i) { + Ops.push_back((*I)->getOperand(i)); + } + DAG.UpdateNodeOperands(*I, &Ops[0], Ops.size()); + } + return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), + getPointerTy(AMDGPUAS::CONSTANT_ADDRESS)); + } + } } void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, @@ -594,6 +669,19 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { LoadSDNode *Load = cast(Op); ISD::LoadExtType ExtType = Load->getExtensionType(); + // Lower loads constant address space global variable loads + if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + isa(GetUnderlyingObject(Load->getPointerInfo().V))) { + + SDValue Ptr = DAG.getZExtOrTrunc(Load->getBasePtr(), DL, + getPointerTy(AMDGPUAS::PRIVATE_ADDRESS)); + Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, + DAG.getConstant(2, MVT::i32)); + return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), + Load->getChain(), Ptr, + DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2)); + } + if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS || ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32)) return SDValue(); diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index fd6e3a59985..9782b5e755a 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -28,6 +28,10 @@ private: void ExtractVectorElements(SDValue Op, SelectionDAG &DAG, SmallVectorImpl &Args, unsigned Start, unsigned Count) const; + SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV, + const SDValue &InitPtr, + SDValue Chain, + SelectionDAG &DAG) const; SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 9430689c61c..a66f289e9ab 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -143,6 +143,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); setTargetDAGCombine(ISD::SELECT_CC); diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll new file mode 100644 index 00000000000..cda7ab1fccd --- /dev/null +++ b/test/CodeGen/R600/gv-const-addrspace.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC + +; XXX: Test on SI once 64-bit adds are supportes. + +@float_gv = internal addrspace(2) unnamed_addr constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4 + +; FUNC-LABEL: @float + +; R600-DAG: MOV {{\** *}}T2.X +; R600-DAG: MOV {{\** *}}T3.X +; R600-DAG: MOV {{\** *}}T4.X +; R600-DAG: MOV {{\** *}}T5.X +; R600-DAG: MOV {{\** *}}T6.X +; R600: MOVA_INT + +define void @float(float addrspace(1)* %out, i32 %index) { +entry: + %0 = getelementptr inbounds [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index + %1 = load float addrspace(2)* %0 + store float %1, float addrspace(1)* %out + ret void +} + +@i32_gv = internal addrspace(2) unnamed_addr constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4 + +; FUNC-LABEL: @i32 + +; R600-DAG: MOV {{\** *}}T2.X +; R600-DAG: MOV {{\** *}}T3.X +; R600-DAG: MOV {{\** *}}T4.X +; R600-DAG: MOV {{\** *}}T5.X +; R600-DAG: MOV {{\** *}}T6.X +; R600: MOVA_INT + +define void @i32(i32 addrspace(1)* %out, i32 %index) { +entry: + %0 = getelementptr inbounds [5 x i32] addrspace(2)* @i32_gv, i32 0, i32 %index + %1 = load i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +}