mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-25 20:59:51 +00:00
[NVPTX] Fix handling of ldg/ldu intrinsics.
The address space of the pointer must be global (1) for these intrinsics. There must also be alignment metadata attached to the intrinsic calls, e.g. %val = tail call i32 @llvm.nvvm.ldu.i.global.i32.p1i32(i32 addrspace(1)* %ptr), !align !0 !0 = metadata !{i32 4} git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211939 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8992274412
commit
cb8f98382b
@ -796,26 +796,25 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
|
||||
|
||||
|
||||
// Generated within nvvm. Use for ldu on sm_20 or later
|
||||
// @TODO: Revisit this, Changed LLVMAnyPointerType to LLVMPointerType
|
||||
def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
|
||||
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldu.global.i">;
|
||||
def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
|
||||
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldu.global.f">;
|
||||
def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
|
||||
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldu.global.p">;
|
||||
|
||||
// Generated within nvvm. Use for ldg on sm_35 or later
|
||||
def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
|
||||
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldg.global.i">;
|
||||
def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
|
||||
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldg.global.f">;
|
||||
def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
|
||||
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldg.global.p">;
|
||||
|
||||
// Use for generic pointers
|
||||
|
@ -141,7 +141,7 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
|
||||
case NVPTXISD::LDGV4:
|
||||
case NVPTXISD::LDUV2:
|
||||
case NVPTXISD::LDUV4:
|
||||
ResNode = SelectLDGLDUVector(N);
|
||||
ResNode = SelectLDGLDU(N);
|
||||
break;
|
||||
case NVPTXISD::StoreV2:
|
||||
case NVPTXISD::StoreV4:
|
||||
@ -167,6 +167,9 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
|
||||
case ISD::INTRINSIC_WO_CHAIN:
|
||||
ResNode = SelectIntrinsicNoChain(N);
|
||||
break;
|
||||
case ISD::INTRINSIC_W_CHAIN:
|
||||
ResNode = SelectIntrinsicChain(N);
|
||||
break;
|
||||
case NVPTXISD::Tex1DFloatI32:
|
||||
case NVPTXISD::Tex1DFloatFloat:
|
||||
case NVPTXISD::Tex1DFloatFloatLevel:
|
||||
@ -273,6 +276,21 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
|
||||
return SelectCode(N);
|
||||
}
|
||||
|
||||
SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
|
||||
unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
|
||||
switch (IID) {
|
||||
default:
|
||||
return NULL;
|
||||
case Intrinsic::nvvm_ldg_global_f:
|
||||
case Intrinsic::nvvm_ldg_global_i:
|
||||
case Intrinsic::nvvm_ldg_global_p:
|
||||
case Intrinsic::nvvm_ldu_global_f:
|
||||
case Intrinsic::nvvm_ldu_global_i:
|
||||
case Intrinsic::nvvm_ldu_global_p:
|
||||
return SelectLDGLDU(N);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int getCodeAddrSpace(MemSDNode *N,
|
||||
const NVPTXSubtarget &Subtarget) {
|
||||
const Value *Src = N->getMemOperand()->getValue();
|
||||
@ -990,22 +1008,101 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
|
||||
return LD;
|
||||
}
|
||||
|
||||
SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
|
||||
SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
|
||||
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue Op1 = N->getOperand(1);
|
||||
SDValue Op1;
|
||||
MemSDNode *Mem;
|
||||
bool IsLDG = true;
|
||||
|
||||
// If this is an LDG intrinsic, the address is the third operand. Its its an
|
||||
// LDG/LDU SD node (from custom vector handling), then its the second operand
|
||||
if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
|
||||
Op1 = N->getOperand(2);
|
||||
Mem = cast<MemIntrinsicSDNode>(N);
|
||||
unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
|
||||
switch (IID) {
|
||||
default:
|
||||
return NULL;
|
||||
case Intrinsic::nvvm_ldg_global_f:
|
||||
case Intrinsic::nvvm_ldg_global_i:
|
||||
case Intrinsic::nvvm_ldg_global_p:
|
||||
IsLDG = true;
|
||||
break;
|
||||
case Intrinsic::nvvm_ldu_global_f:
|
||||
case Intrinsic::nvvm_ldu_global_i:
|
||||
case Intrinsic::nvvm_ldu_global_p:
|
||||
IsLDG = false;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
Op1 = N->getOperand(1);
|
||||
Mem = cast<MemSDNode>(N);
|
||||
}
|
||||
|
||||
unsigned Opcode;
|
||||
SDLoc DL(N);
|
||||
SDNode *LD;
|
||||
MemSDNode *Mem = cast<MemSDNode>(N);
|
||||
SDValue Base, Offset, Addr;
|
||||
|
||||
EVT EltVT = Mem->getMemoryVT().getVectorElementType();
|
||||
EVT EltVT = Mem->getMemoryVT();
|
||||
if (EltVT.isVector()) {
|
||||
EltVT = EltVT.getVectorElementType();
|
||||
}
|
||||
|
||||
if (SelectDirectAddr(Op1, Addr)) {
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
return nullptr;
|
||||
case ISD::INTRINSIC_W_CHAIN:
|
||||
if (IsLDG) {
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return nullptr;
|
||||
case MVT::i8:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
|
||||
break;
|
||||
case MVT::i16:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
|
||||
break;
|
||||
case MVT::i32:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
|
||||
break;
|
||||
case MVT::i64:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
|
||||
break;
|
||||
case MVT::f32:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
|
||||
break;
|
||||
case MVT::f64:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return nullptr;
|
||||
case MVT::i8:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
|
||||
break;
|
||||
case MVT::i16:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
|
||||
break;
|
||||
case MVT::i32:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
|
||||
break;
|
||||
case MVT::i64:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
|
||||
break;
|
||||
case MVT::f32:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
|
||||
break;
|
||||
case MVT::f64:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LDGV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
@ -1101,6 +1198,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
return nullptr;
|
||||
case ISD::INTRINSIC_W_CHAIN:
|
||||
if (IsLDG) {
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return nullptr;
|
||||
case MVT::i8:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
|
||||
break;
|
||||
case MVT::i16:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
|
||||
break;
|
||||
case MVT::i32:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
|
||||
break;
|
||||
case MVT::i64:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
|
||||
break;
|
||||
case MVT::f32:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
|
||||
break;
|
||||
case MVT::f64:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return nullptr;
|
||||
case MVT::i8:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
|
||||
break;
|
||||
case MVT::i16:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
|
||||
break;
|
||||
case MVT::i32:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
|
||||
break;
|
||||
case MVT::i64:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
|
||||
break;
|
||||
case MVT::f32:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
|
||||
break;
|
||||
case MVT::f64:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LDGV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
@ -1190,6 +1336,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
return nullptr;
|
||||
case ISD::INTRINSIC_W_CHAIN:
|
||||
if (IsLDG) {
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return nullptr;
|
||||
case MVT::i8:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
|
||||
break;
|
||||
case MVT::i16:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
|
||||
break;
|
||||
case MVT::i32:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
|
||||
break;
|
||||
case MVT::i64:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
|
||||
break;
|
||||
case MVT::f32:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
|
||||
break;
|
||||
case MVT::f64:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return nullptr;
|
||||
case MVT::i8:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
|
||||
break;
|
||||
case MVT::i16:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
|
||||
break;
|
||||
case MVT::i32:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
|
||||
break;
|
||||
case MVT::i64:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
|
||||
break;
|
||||
case MVT::f32:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
|
||||
break;
|
||||
case MVT::f64:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LDGV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
@ -1285,6 +1480,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
return nullptr;
|
||||
case ISD::INTRINSIC_W_CHAIN:
|
||||
if (IsLDG) {
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return nullptr;
|
||||
case MVT::i8:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
|
||||
break;
|
||||
case MVT::i16:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
|
||||
break;
|
||||
case MVT::i32:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
|
||||
break;
|
||||
case MVT::i64:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
|
||||
break;
|
||||
case MVT::f32:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
|
||||
break;
|
||||
case MVT::f64:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return nullptr;
|
||||
case MVT::i8:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
|
||||
break;
|
||||
case MVT::i16:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
|
||||
break;
|
||||
case MVT::i32:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
|
||||
break;
|
||||
case MVT::i64:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
|
||||
break;
|
||||
case MVT::f32:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
|
||||
break;
|
||||
case MVT::f64:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LDGV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
@ -1374,6 +1618,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
return nullptr;
|
||||
case ISD::INTRINSIC_W_CHAIN:
|
||||
if (IsLDG) {
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return nullptr;
|
||||
case MVT::i8:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
|
||||
break;
|
||||
case MVT::i16:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
|
||||
break;
|
||||
case MVT::i32:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
|
||||
break;
|
||||
case MVT::i64:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
|
||||
break;
|
||||
case MVT::f32:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
|
||||
break;
|
||||
case MVT::f64:
|
||||
Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return nullptr;
|
||||
case MVT::i8:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
|
||||
break;
|
||||
case MVT::i16:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
|
||||
break;
|
||||
case MVT::i32:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
|
||||
break;
|
||||
case MVT::i64:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
|
||||
break;
|
||||
case MVT::f32:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
|
||||
break;
|
||||
case MVT::f64:
|
||||
Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LDGV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
@ -1466,7 +1759,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
|
||||
}
|
||||
|
||||
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
|
||||
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||
MemRefs0[0] = Mem->getMemOperand();
|
||||
cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
|
||||
|
||||
return LD;
|
||||
|
@ -59,10 +59,11 @@ private:
|
||||
|
||||
SDNode *Select(SDNode *N) override;
|
||||
SDNode *SelectIntrinsicNoChain(SDNode *N);
|
||||
SDNode *SelectIntrinsicChain(SDNode *N);
|
||||
SDNode *SelectTexSurfHandle(SDNode *N);
|
||||
SDNode *SelectLoad(SDNode *N);
|
||||
SDNode *SelectLoadVector(SDNode *N);
|
||||
SDNode *SelectLDGLDUVector(SDNode *N);
|
||||
SDNode *SelectLDGLDU(SDNode *N);
|
||||
SDNode *SelectStore(SDNode *N);
|
||||
SDNode *SelectStoreVector(SDNode *N);
|
||||
SDNode *SelectLoadParam(SDNode *N);
|
||||
|
@ -2363,22 +2363,62 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
||||
|
||||
case Intrinsic::nvvm_ldu_global_i:
|
||||
case Intrinsic::nvvm_ldu_global_f:
|
||||
case Intrinsic::nvvm_ldu_global_p:
|
||||
case Intrinsic::nvvm_ldu_global_p: {
|
||||
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
|
||||
Info.memVT = getValueType(I.getType());
|
||||
else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
|
||||
Info.memVT = getValueType(I.getType());
|
||||
else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
|
||||
Info.memVT = getPointerTy();
|
||||
else
|
||||
Info.memVT = MVT::f32;
|
||||
Info.memVT = getValueType(I.getType());
|
||||
Info.ptrVal = I.getArgOperand(0);
|
||||
Info.offset = 0;
|
||||
Info.vol = 0;
|
||||
Info.readMem = true;
|
||||
Info.writeMem = false;
|
||||
Info.align = 0;
|
||||
|
||||
// alignment is available as metadata.
|
||||
// Grab it and set the alignment.
|
||||
assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
|
||||
MDNode *AlignMD = I.getMetadata("align");
|
||||
assert(AlignMD && "Must have a non-null MDNode");
|
||||
assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
|
||||
Value *Align = AlignMD->getOperand(0);
|
||||
int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
|
||||
Info.align = Alignment;
|
||||
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::nvvm_ldg_global_i:
|
||||
case Intrinsic::nvvm_ldg_global_f:
|
||||
case Intrinsic::nvvm_ldg_global_p: {
|
||||
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
|
||||
Info.memVT = getValueType(I.getType());
|
||||
else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
|
||||
Info.memVT = getPointerTy();
|
||||
else
|
||||
Info.memVT = getValueType(I.getType());
|
||||
Info.ptrVal = I.getArgOperand(0);
|
||||
Info.offset = 0;
|
||||
Info.vol = 0;
|
||||
Info.readMem = true;
|
||||
Info.writeMem = false;
|
||||
|
||||
// alignment is available as metadata.
|
||||
// Grab it and set the alignment.
|
||||
assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
|
||||
MDNode *AlignMD = I.getMetadata("align");
|
||||
assert(AlignMD && "Must have a non-null MDNode");
|
||||
assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
|
||||
Value *Align = AlignMD->getOperand(0);
|
||||
int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
|
||||
Info.align = Alignment;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
case Intrinsic::nvvm_tex_1d_v4f32_i32:
|
||||
case Intrinsic::nvvm_tex_1d_v4f32_f32:
|
||||
|
@ -1374,67 +1374,33 @@ def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs,
|
||||
// Support for ldu on sm_20 or later
|
||||
//-----------------------------------
|
||||
|
||||
def ldu_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldu_global_i node:$ptr), [{
|
||||
MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
|
||||
return M->getMemoryVT() == MVT::i8;
|
||||
}]>;
|
||||
|
||||
// Scalar
|
||||
// @TODO: Revisit this, Changed imemAny to imem
|
||||
multiclass LDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
|
||||
multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
|
||||
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
|
||||
!strconcat("ldu.global.", TyStr),
|
||||
[(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>;
|
||||
[]>, Requires<[hasLDU]>;
|
||||
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
|
||||
!strconcat("ldu.global.", TyStr),
|
||||
[(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>;
|
||||
def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
|
||||
[]>, Requires<[hasLDU]>;
|
||||
def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
|
||||
!strconcat("ldu.global.", TyStr),
|
||||
[(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
|
||||
Requires<[hasLDU]>;
|
||||
[]>, Requires<[hasLDU]>;
|
||||
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
|
||||
!strconcat("ldu.global.", TyStr),
|
||||
[(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>;
|
||||
[]>, Requires<[hasLDU]>;
|
||||
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
|
||||
!strconcat("ldu.global.", TyStr),
|
||||
[(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
|
||||
[]>, Requires<[hasLDU]>;
|
||||
}
|
||||
|
||||
multiclass LDU_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
|
||||
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
|
||||
!strconcat("ldu.global.", TyStr),
|
||||
[(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>;
|
||||
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
|
||||
!strconcat("ldu.global.", TyStr),
|
||||
[(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>;
|
||||
def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
|
||||
!strconcat("ldu.global.", TyStr),
|
||||
[(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
|
||||
Requires<[hasLDU]>;
|
||||
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
|
||||
!strconcat("ldu.global.", TyStr),
|
||||
[(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>;
|
||||
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
|
||||
!strconcat("ldu.global.", TyStr),
|
||||
[(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
|
||||
}
|
||||
|
||||
defm INT_PTX_LDU_GLOBAL_i8 : LDU_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs,
|
||||
ldu_i8>;
|
||||
defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs,
|
||||
int_nvvm_ldu_global_i>;
|
||||
defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
|
||||
int_nvvm_ldu_global_i>;
|
||||
defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
|
||||
int_nvvm_ldu_global_i>;
|
||||
defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs,
|
||||
int_nvvm_ldu_global_f>;
|
||||
defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs,
|
||||
int_nvvm_ldu_global_f>;
|
||||
defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
|
||||
int_nvvm_ldu_global_p>;
|
||||
defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
|
||||
int_nvvm_ldu_global_p>;
|
||||
defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
|
||||
defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
|
||||
defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
|
||||
defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
|
||||
defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
|
||||
defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
|
||||
defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
|
||||
defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
|
||||
|
||||
// vector
|
||||
|
||||
@ -1504,65 +1470,40 @@ defm INT_PTX_LDU_G_v4f32_ELE
|
||||
// Support for ldg on sm_35 or later
|
||||
//-----------------------------------
|
||||
|
||||
def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{
|
||||
MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
|
||||
return M->getMemoryVT() == MVT::i8;
|
||||
}]>;
|
||||
|
||||
multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
|
||||
multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
|
||||
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
|
||||
[]>, Requires<[hasLDG]>;
|
||||
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
|
||||
def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
|
||||
[]>, Requires<[hasLDG]>;
|
||||
def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
|
||||
Requires<[hasLDG]>;
|
||||
[]>, Requires<[hasLDG]>;
|
||||
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
|
||||
[]>, Requires<[hasLDG]>;
|
||||
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
|
||||
}
|
||||
|
||||
multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
|
||||
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
|
||||
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
|
||||
def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
|
||||
Requires<[hasLDG]>;
|
||||
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
|
||||
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
|
||||
[]>, Requires<[hasLDG]>;
|
||||
}
|
||||
|
||||
defm INT_PTX_LDG_GLOBAL_i8
|
||||
: LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>;
|
||||
: LDG_G<"u8 \t$result, [$src];", Int16Regs>;
|
||||
defm INT_PTX_LDG_GLOBAL_i16
|
||||
: LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>;
|
||||
: LDG_G<"u16 \t$result, [$src];", Int16Regs>;
|
||||
defm INT_PTX_LDG_GLOBAL_i32
|
||||
: LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>;
|
||||
: LDG_G<"u32 \t$result, [$src];", Int32Regs>;
|
||||
defm INT_PTX_LDG_GLOBAL_i64
|
||||
: LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>;
|
||||
: LDG_G<"u64 \t$result, [$src];", Int64Regs>;
|
||||
defm INT_PTX_LDG_GLOBAL_f32
|
||||
: LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>;
|
||||
: LDG_G<"f32 \t$result, [$src];", Float32Regs>;
|
||||
defm INT_PTX_LDG_GLOBAL_f64
|
||||
: LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>;
|
||||
: LDG_G<"f64 \t$result, [$src];", Float64Regs>;
|
||||
defm INT_PTX_LDG_GLOBAL_p32
|
||||
: LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>;
|
||||
: LDG_G<"u32 \t$result, [$src];", Int32Regs>;
|
||||
defm INT_PTX_LDG_GLOBAL_p64
|
||||
: LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>;
|
||||
: LDG_G<"u64 \t$result, [$src];", Int64Regs>;
|
||||
|
||||
// vector
|
||||
|
||||
|
@ -2,13 +2,15 @@
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
||||
declare i8 @llvm.nvvm.ldu.global.i.i8(i8*)
|
||||
declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*)
|
||||
|
||||
define i8 @foo(i8* %a) {
|
||||
; Ensure we properly truncate off the high-order 24 bits
|
||||
; CHECK: ldu.global.u8
|
||||
; CHECK: cvt.u32.u16
|
||||
; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 255
|
||||
%val = tail call i8 @llvm.nvvm.ldu.global.i.i8(i8* %a)
|
||||
%val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a), !align !0
|
||||
ret i8 %val
|
||||
}
|
||||
|
||||
!0 = metadata !{i32 4}
|
||||
|
40
test/CodeGen/NVPTX/ldu-ldg.ll
Normal file
40
test/CodeGen/NVPTX/ldu-ldg.ll
Normal file
@ -0,0 +1,40 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
|
||||
declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
|
||||
declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
|
||||
declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
|
||||
declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
|
||||
|
||||
|
||||
; CHECK: func0
|
||||
define i8 @func0(i8 addrspace(1)* %ptr) {
|
||||
; ldu.global.u8
|
||||
%val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0
|
||||
ret i8 %val
|
||||
}
|
||||
|
||||
; CHECK: func1
|
||||
define i32 @func1(i32 addrspace(1)* %ptr) {
|
||||
; ldu.global.u32
|
||||
%val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
; CHECK: func2
|
||||
define i8 @func2(i8 addrspace(1)* %ptr) {
|
||||
; ld.global.nc.u8
|
||||
%val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0
|
||||
ret i8 %val
|
||||
}
|
||||
|
||||
; CHECK: func3
|
||||
define i32 @func3(i32 addrspace(1)* %ptr) {
|
||||
; ld.global.nc.u32
|
||||
%val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
|
||||
|
||||
!0 = metadata !{i32 4}
|
@ -7,9 +7,9 @@ define void @reg_plus_offset(i32* %a) {
|
||||
; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32];
|
||||
; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36];
|
||||
%p2 = getelementptr i32* %a, i32 8
|
||||
%t1 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p2), !align !1
|
||||
%t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2), !align !1
|
||||
%p3 = getelementptr i32* %a, i32 9
|
||||
%t2 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p3), !align !1
|
||||
%t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3), !align !1
|
||||
%t3 = mul i32 %t1, %t2
|
||||
store i32 %t3, i32* %a
|
||||
ret void
|
||||
@ -17,5 +17,5 @@ define void @reg_plus_offset(i32* %a) {
|
||||
|
||||
!1 = metadata !{ i32 4 }
|
||||
|
||||
declare i32 @llvm.nvvm.ldu.global.i.i32(i32*)
|
||||
declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*)
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
|
Loading…
Reference in New Issue
Block a user