[NVPTX] Add isel patterns for [reg+offset] form of ldg/ldu.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185329 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Justin Holewinski 2013-07-01 12:58:52 +00:00
parent 30b13ebd0b
commit 9bc8feeb4f
3 changed files with 471 additions and 132 deletions

View File

@ -787,194 +787,476 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
unsigned Opcode;
SDLoc DL(N);
SDNode *LD;
MemSDNode *Mem = cast<MemSDNode>(N);
SDValue Base, Offset, Addr;
EVT RetVT = Mem->getMemoryVT().getVectorElementType();
EVT EltVT = Mem->getMemoryVT().getVectorElementType();
// Select opcode
if (Subtarget.is64Bit()) {
if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
default:
return NULL;
case NVPTXISD::LDGV2:
switch (RetVT.getSimpleVT().SimpleTy) {
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64;
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64;
Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64;
Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
break;
case MVT::i64:
Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64;
Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64;
Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
break;
case MVT::f64:
Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64;
break;
}
break;
case NVPTXISD::LDGV4:
switch (RetVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64;
Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
break;
}
break;
case NVPTXISD::LDUV2:
switch (RetVT.getSimpleVT().SimpleTy) {
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
break;
case MVT::i64:
Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
break;
case MVT::f64:
Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
break;
}
break;
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
break;
}
break;
case NVPTXISD::LDUV4:
switch (RetVT.getSimpleVT().SimpleTy) {
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
break;
}
break;
}
SDValue Ops[] = { Addr, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
ArrayRef<SDValue>(Ops, 2));
} else if (Subtarget.is64Bit()
? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default:
return NULL;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
break;
case MVT::i64:
Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
break;
case MVT::f64:
Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
break;
}
break;
case NVPTXISD::LDUV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
break;
case MVT::i64:
Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
break;
case MVT::f64:
Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
break;
}
break;
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
break;
}
break;
case NVPTXISD::LDUV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
break;
}
break;
}
} else {
switch (N->getOpcode()) {
default:
return NULL;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
break;
case MVT::i64:
Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
break;
case MVT::f64:
Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
break;
}
break;
case NVPTXISD::LDUV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
break;
case MVT::i64:
Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
break;
case MVT::f64:
Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
break;
}
break;
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
break;
}
break;
case NVPTXISD::LDUV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
break;
}
break;
}
}
SDValue Ops[] = { Base, Offset, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
ArrayRef<SDValue>(Ops, 3));
} else {
switch (N->getOpcode()) {
default:
return NULL;
case NVPTXISD::LDGV2:
switch (RetVT.getSimpleVT().SimpleTy) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
break;
case MVT::i64:
Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
break;
case MVT::f64:
Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
break;
}
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32;
case NVPTXISD::LDUV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
break;
case MVT::i64:
Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
break;
case MVT::f64:
Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
break;
}
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32;
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
break;
}
break;
case MVT::i64:
Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32;
break;
case MVT::f64:
Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32;
case NVPTXISD::LDUV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
break;
}
break;
}
break;
case NVPTXISD::LDGV4:
switch (RetVT.getSimpleVT().SimpleTy) {
} else {
switch (N->getOpcode()) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
break;
case MVT::i64:
Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
break;
case MVT::f64:
Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
break;
}
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32;
case NVPTXISD::LDUV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
break;
case MVT::i64:
Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
break;
case MVT::f64:
Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
break;
}
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32;
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
break;
}
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32;
case NVPTXISD::LDUV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
break;
}
break;
}
break;
case NVPTXISD::LDUV2:
switch (RetVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
break;
case MVT::i64:
Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
break;
case MVT::f64:
Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
break;
}
break;
case NVPTXISD::LDUV4:
switch (RetVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
break;
case MVT::i16:
Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
break;
case MVT::i32:
Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
break;
case MVT::f32:
Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
break;
}
break;
}
}
SDValue Ops[] = { Op1, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
SDValue Ops[] = { Op1, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
ArrayRef<SDValue>(Ops, 2));
}
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();

View File

@ -1342,20 +1342,38 @@ int_nvvm_ldu_global_p>;
// Elementized vector ldu
multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins Int32Regs:$src),
def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins Int32Regs:$src),
!strconcat("ldu.global.", TyStr), []>;
def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins Int64Regs:$src),
def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins Int64Regs:$src),
!strconcat("ldu.global.", TyStr), []>;
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins MEMri:$src),
!strconcat("ldu.global.", TyStr), []>;
def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins MEMri64:$src),
!strconcat("ldu.global.", TyStr), []>;
def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins imemAny:$src),
!strconcat("ldu.global.", TyStr), []>;
}
multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4), (ins Int32Regs:$src),
multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4), (ins Int32Regs:$src),
!strconcat("ldu.global.", TyStr), []>;
def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4), (ins Int64Regs:$src),
def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4), (ins Int64Regs:$src),
!strconcat("ldu.global.", TyStr), []>;
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4), (ins MEMri:$src),
!strconcat("ldu.global.", TyStr), []>;
def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4), (ins MEMri64:$src),
!strconcat("ldu.global.", TyStr), []>;
def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4), (ins imemAny:$src),
!strconcat("ldu.global.", TyStr), []>;
}
@ -1452,20 +1470,38 @@ defm INT_PTX_LDG_GLOBAL_p64
// Elementized vector ldg
multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins Int32Regs:$src),
!strconcat("ld.global.nc.", TyStr), []>;
def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins Int64Regs:$src),
!strconcat("ld.global.nc.", TyStr), []>;
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins MEMri:$src),
!strconcat("ld.global.nc.", TyStr), []>;
def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins MEMri64:$src),
!strconcat("ld.global.nc.", TyStr), []>;
def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins imemAny:$src),
!strconcat("ld.global.nc.", TyStr), []>;
}
multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src),
def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4), (ins Int32Regs:$src),
!strconcat("ld.global.nc.", TyStr), []>;
def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src),
def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4), (ins Int64Regs:$src),
!strconcat("ld.global.nc.", TyStr), []>;
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4), (ins MEMri:$src),
!strconcat("ld.global.nc.", TyStr), []>;
def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4), (ins MEMri64:$src),
!strconcat("ld.global.nc.", TyStr), []>;
def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4), (ins imemAny:$src),
!strconcat("ld.global.nc.", TyStr), []>;
}

View File

@ -0,0 +1,21 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
define void @reg_plus_offset(i32* %a) {
; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32];
; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36];
%p2 = getelementptr i32* %a, i32 8
%t1 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p2), !align !1
%p3 = getelementptr i32* %a, i32 9
%t2 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p3), !align !1
%t3 = mul i32 %t1, %t2
store i32 %t3, i32* %a
ret void
}
!1 = metadata !{ i32 4 }
declare i32 @llvm.nvvm.ldu.global.i.i32(i32*)
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()