[NVPTX] Implement custom lowering of loads/stores for i1

Loads from i1 become loads from i8 followed by trunc
Stores to i1 become zext to i8 followed by store to i8

Fixes PR13291

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167948 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Justin Holewinski 2012-11-14 19:19:16 +00:00
parent 2337dd7c86
commit a20067b5d4
3 changed files with 87 additions and 2 deletions

View File

@ -174,10 +174,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PTX does not support load / store predicate registers
setOperationAction(ISD::LOAD, MVT::i1, Expand);
setOperationAction(ISD::LOAD, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
setOperationAction(ISD::STORE, MVT::i1, Expand);
setTruncStoreAction(MVT::i64, MVT::i1, Expand);
setTruncStoreAction(MVT::i32, MVT::i1, Expand);
setTruncStoreAction(MVT::i16, MVT::i1, Expand);
@ -856,11 +857,66 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::EXTRACT_SUBVECTOR:
return Op;
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
}
}
// v = ld i1* addr
// =>
// v1 = ld i8* addr
// v = trunc v1 to i1
SDValue NVPTXTargetLowering::
LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(Node);
DebugLoc dl = Node->getDebugLoc();
ISD::LoadExtType ExtType = LD->getExtensionType();
assert(ExtType == ISD::NON_EXTLOAD) ;
EVT VT = Node->getValueType(0);
assert(VT == MVT::i1 && "Custom lowering for i1 load only");
SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(),
LD->getAlignment());
SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
// The legalizer (the caller) is expecting two values from the legalized
// load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
// in LegalizeDAG.cpp which also uses MergeValues.
SDValue Ops[] = {result, LD->getChain()};
return DAG.getMergeValues(Ops, 2, dl);
}
// st i1 v, addr
// =>
// v1 = zxt v to i8
// st i8, addr
SDValue NVPTXTargetLowering::
LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
StoreSDNode *ST = cast<StoreSDNode>(Node);
SDValue Tmp1 = ST->getChain();
SDValue Tmp2 = ST->getBasePtr();
SDValue Tmp3 = ST->getValue();
EVT VT = Tmp3.getValueType();
assert(VT == MVT::i1 && "Custom lowering for i1 store only");
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl,
MVT::i8, Tmp3);
SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
ST->getPointerInfo(), isVolatile,
isNonTemporal, Alignment);
return Result;
}
SDValue
NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
EVT v) const {

View File

@ -138,6 +138,9 @@ private:
SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
};
} // namespace llvm

View File

@ -0,0 +1,26 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
define ptx_kernel void @t1(i1* %a) {
; PTX32: mov.u16 %rc{{[0-9]+}}, 0;
; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}};
; PTX64: mov.u16 %rc{{[0-9]+}}, 0;
; PTX64-NEXT: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}};
store i1 false, i1* %a
ret void
}
define ptx_kernel void @t2(i1* %a, i8* %b) {
; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: and.b16 temp, %rc{{[0-9]+}}, 1;
; PTX32: setp.b16.eq %p{{[0-9]+}}, temp, 1;
; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: and.b16 temp, %rc{{[0-9]+}}, 1;
; PTX64: setp.b16.eq %p{{[0-9]+}}, temp, 1;
%t1 = load i1* %a
%t2 = select i1 %t1, i8 1, i8 2
store i8 %t2, i8* %b
ret void
}