mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-25 04:39:44 +00:00
PTX: Add support for i8 type and introduce associated .b8 registers
The i8 type is required for boolean values, but can only use ld, st and mov instructions. The i1 type continues to be used for predicates. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133814 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ef01edf1e9
commit
b05a8a8f02
@ -92,6 +92,7 @@ static const char *getRegisterTypeName(unsigned RegNo) {
|
||||
#define TEST_REGCLS(cls, clsstr) \
|
||||
if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
|
||||
TEST_REGCLS(RegPred, pred);
|
||||
TEST_REGCLS(RegI8, b8);
|
||||
TEST_REGCLS(RegI16, b16);
|
||||
TEST_REGCLS(RegI32, b32);
|
||||
TEST_REGCLS(RegI64, b64);
|
||||
@ -124,6 +125,7 @@ static const char *getTypeName(const Type* type) {
|
||||
case Type::IntegerTyID:
|
||||
switch (type->getPrimitiveSizeInBits()) {
|
||||
default: llvm_unreachable("Unknown integer bit-width");
|
||||
case 8: return ".u8";
|
||||
case 16: return ".u16";
|
||||
case 32: return ".u32";
|
||||
case 64: return ".u64";
|
||||
|
@ -15,6 +15,7 @@
|
||||
// PTX Formal Parameter Calling Convention
|
||||
def CC_PTX : CallingConv<[
|
||||
CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>,
|
||||
CCIfType<[i8], CCAssignToReg<[RQ12, RQ13, RQ14, RQ15, RQ16, RQ17, RQ18, RQ19, RQ20, RQ21, RQ22, RQ23, RQ24, RQ25, RQ26, RQ27, RQ28, RQ29, RQ30, RQ31, RQ32, RQ33, RQ34, RQ35, RQ36, RQ37, RQ38, RQ39, RQ40, RQ41, RQ42, RQ43, RQ44, RQ45, RQ46, RQ47, RQ48, RQ49, RQ50, RQ51, RQ52, RQ53, RQ54, RQ55, RQ56, RQ57, RQ58, RQ59, RQ60, RQ61, RQ62, RQ63, RQ64, RQ65, RQ66, RQ67, RQ68, RQ69, RQ70, RQ71, RQ72, RQ73, RQ74, RQ75, RQ76, RQ77, RQ78, RQ79, RQ80, RQ81, RQ82, RQ83, RQ84, RQ85, RQ86, RQ87, RQ88, RQ89, RQ90, RQ91, RQ92, RQ93, RQ94, RQ95, RQ96, RQ97, RQ98, RQ99, RQ100, RQ101, RQ102, RQ103, RQ104, RQ105, RQ106, RQ107, RQ108, RQ109, RQ110, RQ111, RQ112, RQ113, RQ114, RQ115, RQ116, RQ117, RQ118, RQ119, RQ120, RQ121, RQ122, RQ123, RQ124, RQ125, RQ126, RQ127]>>,
|
||||
CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>,
|
||||
CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>,
|
||||
CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>>
|
||||
@ -23,6 +24,7 @@ def CC_PTX : CallingConv<[
|
||||
// PTX Return Value Calling Convention
|
||||
def RetCC_PTX : CallingConv<[
|
||||
CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>,
|
||||
CCIfType<[i8], CCAssignToReg<[RQ0, RQ1, RQ2, RQ3, RQ4, RQ5, RQ6, RQ7, RQ8, RQ9, RQ10, RQ11]>>,
|
||||
CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>,
|
||||
CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>,
|
||||
CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>>
|
||||
|
@ -40,6 +40,7 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
|
||||
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
|
||||
// Set up the register classes.
|
||||
addRegisterClass(MVT::i1, PTX::RegPredRegisterClass);
|
||||
addRegisterClass(MVT::i8, PTX::RegI8RegisterClass);
|
||||
addRegisterClass(MVT::i16, PTX::RegI16RegisterClass);
|
||||
addRegisterClass(MVT::i32, PTX::RegI32RegisterClass);
|
||||
addRegisterClass(MVT::i64, PTX::RegI64RegisterClass);
|
||||
@ -52,10 +53,20 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
|
||||
|
||||
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
|
||||
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
|
||||
|
||||
|
||||
// Promote i1 type
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
|
||||
|
||||
setTruncStoreAction(MVT::i8, MVT::i1, Promote);
|
||||
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
|
||||
|
||||
// Turn i16 (z)extload into load + (z)extend
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
|
||||
|
||||
// Turn f32 extload into load + fextend
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
|
||||
@ -176,6 +187,7 @@ struct argmap_entry {
|
||||
bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
|
||||
} argmap[] = {
|
||||
argmap_entry(MVT::i1, PTX::RegPredRegisterClass),
|
||||
argmap_entry(MVT::i8, PTX::RegI8RegisterClass),
|
||||
argmap_entry(MVT::i16, PTX::RegI16RegisterClass),
|
||||
argmap_entry(MVT::i32, PTX::RegI32RegisterClass),
|
||||
argmap_entry(MVT::i64, PTX::RegI64RegisterClass),
|
||||
@ -252,6 +264,9 @@ SDValue PTXTargetLowering::
|
||||
if (RegVT == MVT::i1) {
|
||||
TRC = PTX::RegPredRegisterClass;
|
||||
}
|
||||
else if (RegVT == MVT::i8) {
|
||||
TRC = PTX::RegI8RegisterClass;
|
||||
}
|
||||
else if (RegVT == MVT::i16) {
|
||||
TRC = PTX::RegI16RegisterClass;
|
||||
}
|
||||
|
@ -33,6 +33,7 @@ static const struct map_entry {
|
||||
const TargetRegisterClass *cls;
|
||||
const int opcode;
|
||||
} map[] = {
|
||||
{ &PTX::RegI8RegClass, PTX::MOVU8rr },
|
||||
{ &PTX::RegI16RegClass, PTX::MOVU16rr },
|
||||
{ &PTX::RegI32RegClass, PTX::MOVU32rr },
|
||||
{ &PTX::RegI64RegClass, PTX::MOVU64rr },
|
||||
@ -302,7 +303,9 @@ void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
int OpCode;
|
||||
|
||||
// Select the appropriate opcode based on the register class
|
||||
if (RC == PTX::RegI16RegisterClass) {
|
||||
if (RC == PTX::RegI8RegisterClass) {
|
||||
OpCode = PTX::STACKSTOREI8;
|
||||
} else if (RC == PTX::RegI16RegisterClass) {
|
||||
OpCode = PTX::STACKSTOREI16;
|
||||
} else if (RC == PTX::RegI32RegisterClass) {
|
||||
OpCode = PTX::STACKSTOREI32;
|
||||
@ -337,7 +340,9 @@ void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
int OpCode;
|
||||
|
||||
// Select the appropriate opcode based on the register class
|
||||
if (RC == PTX::RegI16RegisterClass) {
|
||||
if (RC == PTX::RegI8RegisterClass) {
|
||||
OpCode = PTX::STACKLOADI8;
|
||||
} else if (RC == PTX::RegI16RegisterClass) {
|
||||
OpCode = PTX::STACKLOADI16;
|
||||
} else if (RC == PTX::RegI32RegisterClass) {
|
||||
OpCode = PTX::STACKLOADI32;
|
||||
|
@ -537,6 +537,7 @@ multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_lo
|
||||
}
|
||||
|
||||
multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
|
||||
defm u8 : PTX_LD<opstr, ".u8", RegI8, pat_load>;
|
||||
defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
|
||||
defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
|
||||
defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
|
||||
@ -572,6 +573,7 @@ multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_st
|
||||
}
|
||||
|
||||
multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
|
||||
defm u8 : PTX_ST<opstr, ".u8", RegI8, pat_store>;
|
||||
defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
|
||||
defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
|
||||
defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
|
||||
@ -783,22 +785,27 @@ defm XOR : PTX_LOGIC<"xor", xor>;
|
||||
let neverHasSideEffects = 1 in {
|
||||
def MOVPREDrr
|
||||
: InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>;
|
||||
def MOVU8rr
|
||||
: InstPTX<(outs RegI8:$d), (ins RegI8:$a), "mov.u8\t$d, $a", []>;
|
||||
def MOVU16rr
|
||||
: InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>;
|
||||
def MOVU32rr
|
||||
: InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>;
|
||||
def MOVU64rr
|
||||
: InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>;
|
||||
def MOVF32rr
|
||||
: InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>;
|
||||
def MOVF64rr
|
||||
: InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>;
|
||||
}
|
||||
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
|
||||
def MOVPREDri
|
||||
: InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
|
||||
[(set RegPred:$d, imm:$a)]>;
|
||||
def MOVU8ri
|
||||
: InstPTX<(outs RegI8:$d), (ins i8imm:$a), "mov.u8\t$d, $a",
|
||||
[(set RegI8:$d, imm:$a)]>;
|
||||
def MOVU16ri
|
||||
: InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
|
||||
[(set RegI16:$d, imm:$a)]>;
|
||||
@ -838,6 +845,9 @@ let hasSideEffects = 1 in {
|
||||
def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a),
|
||||
"ld.param.pred\t$d, [$a]",
|
||||
[(set RegPred:$d, (PTXloadparam timm:$a))]>;
|
||||
def LDpiU8 : InstPTX<(outs RegI8:$d), (ins MEMpi:$a),
|
||||
"ld.param.u8\t$d, [$a]",
|
||||
[(set RegI8:$d, (PTXloadparam timm:$a))]>;
|
||||
def LDpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a),
|
||||
"ld.param.u16\t$d, [$a]",
|
||||
[(set RegI16:$d, (PTXloadparam timm:$a))]>;
|
||||
@ -857,6 +867,9 @@ let hasSideEffects = 1 in {
|
||||
def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a),
|
||||
"st.param.pred\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegPred:$a)]>;
|
||||
def STpiU8 : InstPTX<(outs), (ins MEMret:$d, RegI8:$a),
|
||||
"st.param.u8\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegI8:$a)]>;
|
||||
def STpiU16 : InstPTX<(outs), (ins MEMret:$d, RegI16:$a),
|
||||
"st.param.u16\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegI16:$a)]>;
|
||||
@ -887,6 +900,10 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>;
|
||||
// PTX does not directly support converting to a predicate type, so we fake it
|
||||
// by performing a greater-than test between the value and zero. This follows
|
||||
// the C convention that any non-zero value is equivalent to 'true'.
|
||||
def CVT_pred_u8
|
||||
: InstPTX<(outs RegPred:$d), (ins RegI8:$a), "setp.gt.b8\t$d, $a, 0",
|
||||
[(set RegPred:$d, (trunc RegI8:$a))]>;
|
||||
|
||||
def CVT_pred_u16
|
||||
: InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.b16\t$d, $a, 0",
|
||||
[(set RegPred:$d, (trunc RegI16:$a))]>;
|
||||
@ -907,6 +924,34 @@ def CVT_pred_f64
|
||||
: InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.b64\t$d, $a, 0",
|
||||
[(set RegPred:$d, (fp_to_uint RegF64:$a))]>;
|
||||
|
||||
// Conversion to u8
|
||||
// PTX does not directly support converting a predicate to a value, so we
|
||||
// use a select instruction to select either 0 or 1 (integer or fp) based
|
||||
// on the truth value of the predicate.
|
||||
def CVT_u8_pred
|
||||
: InstPTX<(outs RegI8:$d), (ins RegPred:$a), "selp.u8\t$d, 1, 0, $a",
|
||||
[(set RegI8:$d, (zext RegPred:$a))]>;
|
||||
|
||||
def CVT_u8_preds
|
||||
: InstPTX<(outs RegI8:$d), (ins RegPred:$a), "selp.u8\t$d, 1, 0, $a",
|
||||
[(set RegI8:$d, (sext RegPred:$a))]>;
|
||||
|
||||
def CVT_u8_u32
|
||||
: InstPTX<(outs RegI8:$d), (ins RegI32:$a), "cvt.u8.u32\t$d, $a",
|
||||
[(set RegI8:$d, (trunc RegI32:$a))]>;
|
||||
|
||||
def CVT_u8_u64
|
||||
: InstPTX<(outs RegI8:$d), (ins RegI64:$a), "cvt.u8.u64\t$d, $a",
|
||||
[(set RegI8:$d, (trunc RegI64:$a))]>;
|
||||
|
||||
def CVT_u8_f32
|
||||
: InstPTX<(outs RegI8:$d), (ins RegF32:$a), "cvt.rzi.u8.f32\t$d, $a",
|
||||
[(set RegI8:$d, (fp_to_uint RegF32:$a))]>;
|
||||
|
||||
def CVT_u8_f64
|
||||
: InstPTX<(outs RegI8:$d), (ins RegF64:$a), "cvt.rzi.u8.f64\t$d, $a",
|
||||
[(set RegI8:$d, (fp_to_uint RegF64:$a))]>;
|
||||
|
||||
// Conversion to u16
|
||||
// PTX does not directly support converting a predicate to a value, so we
|
||||
// use a select instruction to select either 0 or 1 (integer or fp) based
|
||||
@ -915,6 +960,18 @@ def CVT_u16_pred
|
||||
: InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
|
||||
[(set RegI16:$d, (zext RegPred:$a))]>;
|
||||
|
||||
def CVT_u16_preds
|
||||
: InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
|
||||
[(set RegI16:$d, (sext RegPred:$a))]>;
|
||||
|
||||
def CVT_u16_u8
|
||||
: InstPTX<(outs RegI16:$d), (ins RegI8:$a), "cvt.u16.u8\t$d, $a",
|
||||
[(set RegI16:$d, (zext RegI8:$a))]>;
|
||||
|
||||
def CVT_u16_s8
|
||||
: InstPTX<(outs RegI16:$d), (ins RegI8:$a), "cvt.u16.s8\t$d, $a",
|
||||
[(set RegI16:$d, (sext RegI8:$a))]>;
|
||||
|
||||
def CVT_u16_u32
|
||||
: InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a",
|
||||
[(set RegI16:$d, (trunc RegI32:$a))]>;
|
||||
@ -937,10 +994,26 @@ def CVT_u32_pred
|
||||
: InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
|
||||
[(set RegI32:$d, (zext RegPred:$a))]>;
|
||||
|
||||
def CVT_u32_u8
|
||||
: InstPTX<(outs RegI32:$d), (ins RegI8:$a), "cvt.u32.u8\t$d, $a",
|
||||
[(set RegI32:$d, (zext RegI8:$a))]>;
|
||||
|
||||
def CVT_u32_u16
|
||||
: InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
|
||||
[(set RegI32:$d, (zext RegI16:$a))]>;
|
||||
|
||||
def CVT_u32_preds
|
||||
: InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
|
||||
[(set RegI32:$d, (sext RegPred:$a))]>;
|
||||
|
||||
def CVT_u32_s8
|
||||
: InstPTX<(outs RegI32:$d), (ins RegI8:$a), "cvt.u32.s8\t$d, $a",
|
||||
[(set RegI32:$d, (zext RegI8:$a))]>;
|
||||
|
||||
def CVT_u32_s16
|
||||
: InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a",
|
||||
[(set RegI32:$d, (sext RegI16:$a))]>;
|
||||
|
||||
def CVT_u32_u64
|
||||
: InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a",
|
||||
[(set RegI32:$d, (trunc RegI64:$a))]>;
|
||||
@ -959,6 +1032,10 @@ def CVT_u64_pred
|
||||
: InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
|
||||
[(set RegI64:$d, (zext RegPred:$a))]>;
|
||||
|
||||
def CVT_u64_u8
|
||||
: InstPTX<(outs RegI64:$d), (ins RegI8:$a), "cvt.u64.u8\t$d, $a",
|
||||
[(set RegI64:$d, (zext RegI8:$a))]>;
|
||||
|
||||
def CVT_u64_u16
|
||||
: InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a",
|
||||
[(set RegI64:$d, (zext RegI16:$a))]>;
|
||||
@ -967,6 +1044,22 @@ def CVT_u64_u32
|
||||
: InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a",
|
||||
[(set RegI64:$d, (zext RegI32:$a))]>;
|
||||
|
||||
def CVT_u64_preds
|
||||
: InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
|
||||
[(set RegI64:$d, (sext RegPred:$a))]>;
|
||||
|
||||
def CVT_u64_s8
|
||||
: InstPTX<(outs RegI64:$d), (ins RegI8:$a), "cvt.u64.s8\t$d, $a",
|
||||
[(set RegI64:$d, (zext RegI8:$a))]>;
|
||||
|
||||
def CVT_u64_s16
|
||||
: InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a",
|
||||
[(set RegI64:$d, (sext RegI16:$a))]>;
|
||||
|
||||
def CVT_u64_s32
|
||||
: InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a",
|
||||
[(set RegI64:$d, (sext RegI32:$a))]>;
|
||||
|
||||
def CVT_u64_f32
|
||||
: InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a",
|
||||
[(set RegI64:$d, (fp_to_uint RegF32:$a))]>;
|
||||
@ -982,6 +1075,10 @@ def CVT_f32_pred
|
||||
"selp.f32\t$d, 0F3F800000, 0F00000000, $a", // 1.0
|
||||
[(set RegF32:$d, (uint_to_fp RegPred:$a))]>;
|
||||
|
||||
def CVT_f32_u8
|
||||
: InstPTX<(outs RegF32:$d), (ins RegI8:$a), "cvt.rn.f32.u8\t$d, $a",
|
||||
[(set RegF32:$d, (uint_to_fp RegI8:$a))]>;
|
||||
|
||||
def CVT_f32_u16
|
||||
: InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a",
|
||||
[(set RegF32:$d, (uint_to_fp RegI16:$a))]>;
|
||||
@ -1005,6 +1102,10 @@ def CVT_f64_pred
|
||||
"selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a", // 1.0
|
||||
[(set RegF64:$d, (uint_to_fp RegPred:$a))]>;
|
||||
|
||||
def CVT_f64_u8
|
||||
: InstPTX<(outs RegF64:$d), (ins RegI8:$a), "cvt.rn.f64.u8\t$d, $a",
|
||||
[(set RegF64:$d, (uint_to_fp RegI8:$a))]>;
|
||||
|
||||
def CVT_f64_u16
|
||||
: InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a",
|
||||
[(set RegF64:$d, (uint_to_fp RegI16:$a))]>;
|
||||
@ -1043,6 +1144,8 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
|
||||
|
||||
///===- Spill Instructions ------------------------------------------------===//
|
||||
// Special instructions used for stack spilling
|
||||
def STACKSTOREI8 : InstPTX<(outs), (ins i32imm:$d, RegI8:$a),
|
||||
"mov.u8\ts$d, $a", []>;
|
||||
def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
|
||||
"mov.u16\ts$d, $a", []>;
|
||||
def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
|
||||
@ -1054,6 +1157,8 @@ def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a),
|
||||
def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
|
||||
"mov.f64\ts$d, $a", []>;
|
||||
|
||||
def STACKLOADI8 : InstPTX<(outs), (ins RegI8:$d, i32imm:$a),
|
||||
"mov.u8\t$d, s$a", []>;
|
||||
def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
|
||||
"mov.u16\t$d, s$a", []>;
|
||||
def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),
|
||||
|
@ -151,6 +151,137 @@ def P125 : PTXReg<"p125">;
|
||||
def P126 : PTXReg<"p126">;
|
||||
def P127 : PTXReg<"p127">;
|
||||
|
||||
///===- 8-Bit Registers --------------------------------------------------===//
|
||||
|
||||
def RQ0 : PTXReg<"rq0">;
|
||||
def RQ1 : PTXReg<"rq1">;
|
||||
def RQ2 : PTXReg<"rq2">;
|
||||
def RQ3 : PTXReg<"rq3">;
|
||||
def RQ4 : PTXReg<"rq4">;
|
||||
def RQ5 : PTXReg<"rq5">;
|
||||
def RQ6 : PTXReg<"rq6">;
|
||||
def RQ7 : PTXReg<"rq7">;
|
||||
def RQ8 : PTXReg<"rq8">;
|
||||
def RQ9 : PTXReg<"rq9">;
|
||||
def RQ10 : PTXReg<"rq10">;
|
||||
def RQ11 : PTXReg<"rq11">;
|
||||
def RQ12 : PTXReg<"rq12">;
|
||||
def RQ13 : PTXReg<"rq13">;
|
||||
def RQ14 : PTXReg<"rq14">;
|
||||
def RQ15 : PTXReg<"rq15">;
|
||||
def RQ16 : PTXReg<"rq16">;
|
||||
def RQ17 : PTXReg<"rq17">;
|
||||
def RQ18 : PTXReg<"rq18">;
|
||||
def RQ19 : PTXReg<"rq19">;
|
||||
def RQ20 : PTXReg<"rq20">;
|
||||
def RQ21 : PTXReg<"rq21">;
|
||||
def RQ22 : PTXReg<"rq22">;
|
||||
def RQ23 : PTXReg<"rq23">;
|
||||
def RQ24 : PTXReg<"rq24">;
|
||||
def RQ25 : PTXReg<"rq25">;
|
||||
def RQ26 : PTXReg<"rq26">;
|
||||
def RQ27 : PTXReg<"rq27">;
|
||||
def RQ28 : PTXReg<"rq28">;
|
||||
def RQ29 : PTXReg<"rq29">;
|
||||
def RQ30 : PTXReg<"rq30">;
|
||||
def RQ31 : PTXReg<"rq31">;
|
||||
def RQ32 : PTXReg<"rq32">;
|
||||
def RQ33 : PTXReg<"rq33">;
|
||||
def RQ34 : PTXReg<"rq34">;
|
||||
def RQ35 : PTXReg<"rq35">;
|
||||
def RQ36 : PTXReg<"rq36">;
|
||||
def RQ37 : PTXReg<"rq37">;
|
||||
def RQ38 : PTXReg<"rq38">;
|
||||
def RQ39 : PTXReg<"rq39">;
|
||||
def RQ40 : PTXReg<"rq40">;
|
||||
def RQ41 : PTXReg<"rq41">;
|
||||
def RQ42 : PTXReg<"rq42">;
|
||||
def RQ43 : PTXReg<"rq43">;
|
||||
def RQ44 : PTXReg<"rq44">;
|
||||
def RQ45 : PTXReg<"rq45">;
|
||||
def RQ46 : PTXReg<"rq46">;
|
||||
def RQ47 : PTXReg<"rq47">;
|
||||
def RQ48 : PTXReg<"rq48">;
|
||||
def RQ49 : PTXReg<"rq49">;
|
||||
def RQ50 : PTXReg<"rq50">;
|
||||
def RQ51 : PTXReg<"rq51">;
|
||||
def RQ52 : PTXReg<"rq52">;
|
||||
def RQ53 : PTXReg<"rq53">;
|
||||
def RQ54 : PTXReg<"rq54">;
|
||||
def RQ55 : PTXReg<"rq55">;
|
||||
def RQ56 : PTXReg<"rq56">;
|
||||
def RQ57 : PTXReg<"rq57">;
|
||||
def RQ58 : PTXReg<"rq58">;
|
||||
def RQ59 : PTXReg<"rq59">;
|
||||
def RQ60 : PTXReg<"rq60">;
|
||||
def RQ61 : PTXReg<"rq61">;
|
||||
def RQ62 : PTXReg<"rq62">;
|
||||
def RQ63 : PTXReg<"rq63">;
|
||||
def RQ64 : PTXReg<"rq64">;
|
||||
def RQ65 : PTXReg<"rq65">;
|
||||
def RQ66 : PTXReg<"rq66">;
|
||||
def RQ67 : PTXReg<"rq67">;
|
||||
def RQ68 : PTXReg<"rq68">;
|
||||
def RQ69 : PTXReg<"rq69">;
|
||||
def RQ70 : PTXReg<"rq70">;
|
||||
def RQ71 : PTXReg<"rq71">;
|
||||
def RQ72 : PTXReg<"rq72">;
|
||||
def RQ73 : PTXReg<"rq73">;
|
||||
def RQ74 : PTXReg<"rq74">;
|
||||
def RQ75 : PTXReg<"rq75">;
|
||||
def RQ76 : PTXReg<"rq76">;
|
||||
def RQ77 : PTXReg<"rq77">;
|
||||
def RQ78 : PTXReg<"rq78">;
|
||||
def RQ79 : PTXReg<"rq79">;
|
||||
def RQ80 : PTXReg<"rq80">;
|
||||
def RQ81 : PTXReg<"rq81">;
|
||||
def RQ82 : PTXReg<"rq82">;
|
||||
def RQ83 : PTXReg<"rq83">;
|
||||
def RQ84 : PTXReg<"rq84">;
|
||||
def RQ85 : PTXReg<"rq85">;
|
||||
def RQ86 : PTXReg<"rq86">;
|
||||
def RQ87 : PTXReg<"rq87">;
|
||||
def RQ88 : PTXReg<"rq88">;
|
||||
def RQ89 : PTXReg<"rq89">;
|
||||
def RQ90 : PTXReg<"rq90">;
|
||||
def RQ91 : PTXReg<"rq91">;
|
||||
def RQ92 : PTXReg<"rq92">;
|
||||
def RQ93 : PTXReg<"rq93">;
|
||||
def RQ94 : PTXReg<"rq94">;
|
||||
def RQ95 : PTXReg<"rq95">;
|
||||
def RQ96 : PTXReg<"rq96">;
|
||||
def RQ97 : PTXReg<"rq97">;
|
||||
def RQ98 : PTXReg<"rq98">;
|
||||
def RQ99 : PTXReg<"rq99">;
|
||||
def RQ100 : PTXReg<"rq100">;
|
||||
def RQ101 : PTXReg<"rq101">;
|
||||
def RQ102 : PTXReg<"rq102">;
|
||||
def RQ103 : PTXReg<"rq103">;
|
||||
def RQ104 : PTXReg<"rq104">;
|
||||
def RQ105 : PTXReg<"rq105">;
|
||||
def RQ106 : PTXReg<"rq106">;
|
||||
def RQ107 : PTXReg<"rq107">;
|
||||
def RQ108 : PTXReg<"rq108">;
|
||||
def RQ109 : PTXReg<"rq109">;
|
||||
def RQ110 : PTXReg<"rq110">;
|
||||
def RQ111 : PTXReg<"rq111">;
|
||||
def RQ112 : PTXReg<"rq112">;
|
||||
def RQ113 : PTXReg<"rq113">;
|
||||
def RQ114 : PTXReg<"rq114">;
|
||||
def RQ115 : PTXReg<"rq115">;
|
||||
def RQ116 : PTXReg<"rq116">;
|
||||
def RQ117 : PTXReg<"rq117">;
|
||||
def RQ118 : PTXReg<"rq118">;
|
||||
def RQ119 : PTXReg<"rq119">;
|
||||
def RQ120 : PTXReg<"rq120">;
|
||||
def RQ121 : PTXReg<"rq121">;
|
||||
def RQ122 : PTXReg<"rq122">;
|
||||
def RQ123 : PTXReg<"rq123">;
|
||||
def RQ124 : PTXReg<"rq124">;
|
||||
def RQ125 : PTXReg<"rq125">;
|
||||
def RQ126 : PTXReg<"rq126">;
|
||||
def RQ127 : PTXReg<"rq127">;
|
||||
|
||||
///===- 16-Bit Registers --------------------------------------------------===//
|
||||
|
||||
def RH0 : PTXReg<"rh0">;
|
||||
@ -548,6 +679,7 @@ def RD127 : PTXReg<"rd127">;
|
||||
// Register classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>;
|
||||
def RegI8 : RegisterClass<"PTX", [i8], 8, (sequence "RQ%u", 0, 127)>;
|
||||
def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>;
|
||||
def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>;
|
||||
def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>;
|
||||
|
@ -15,15 +15,16 @@
|
||||
from sys import argv, exit, stdout
|
||||
|
||||
|
||||
if len(argv) != 5:
|
||||
print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>')
|
||||
if len(argv) != 6:
|
||||
print('Usage: generate-register-td.py <num_preds> <num_8> <num_16> <num_32> <num_64>')
|
||||
exit(1)
|
||||
|
||||
try:
|
||||
num_pred = int(argv[1])
|
||||
num_16bit = int(argv[2])
|
||||
num_32bit = int(argv[3])
|
||||
num_64bit = int(argv[4])
|
||||
num_8bit = int(argv[2])
|
||||
num_16bit = int(argv[3])
|
||||
num_32bit = int(argv[4])
|
||||
num_64bit = int(argv[5])
|
||||
except:
|
||||
print('ERROR: Invalid integer parameter')
|
||||
exit(1)
|
||||
@ -60,6 +61,11 @@ td_file.write('\n///===- Predicate Registers -----------------------------------
|
||||
for r in range(0, num_pred):
|
||||
td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r))
|
||||
|
||||
# Print 8-bit registers
|
||||
td_file.write('\n///===- 8-Bit Registers --------------------------------------------------===//\n\n')
|
||||
for r in range(0, num_8bit):
|
||||
td_file.write('def RQ%d : PTXReg<"rq%d">;\n' % (r, r))
|
||||
|
||||
# Print 16-bit registers
|
||||
td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n')
|
||||
for r in range(0, num_16bit):
|
||||
@ -86,6 +92,7 @@ td_file.write('''
|
||||
# Print register classes
|
||||
|
||||
td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1))
|
||||
td_file.write('def RegI8 : RegisterClass<"PTX", [i8], 8, (sequence "RQ%%u", 0, %d)>;\n' % (num_8bit-1))
|
||||
td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1))
|
||||
td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
|
||||
td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
|
||||
@ -101,16 +108,20 @@ td_file = open('PTXCallingConv.td', 'w')
|
||||
# Reserve 10% of the available registers for return values, and the other 90%
|
||||
# for parameters
|
||||
num_ret_pred = int(0.1 * num_pred)
|
||||
num_ret_8bit = int(0.1 * num_8bit)
|
||||
num_ret_16bit = int(0.1 * num_16bit)
|
||||
num_ret_32bit = int(0.1 * num_32bit)
|
||||
num_ret_64bit = int(0.1 * num_64bit)
|
||||
num_param_pred = num_pred - num_ret_pred
|
||||
num_param_8bit = num_8bit - num_ret_8bit
|
||||
num_param_16bit = num_16bit - num_ret_16bit
|
||||
num_param_32bit = num_32bit - num_ret_32bit
|
||||
num_param_64bit = num_64bit - num_ret_64bit
|
||||
|
||||
param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)]
|
||||
ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)]
|
||||
param_regs_8bit = [('RQ%d' % (i+num_ret_8bit)) for i in range(0, num_param_8bit)]
|
||||
ret_regs_8bit = ['RQ%d' % i for i in range(0, num_ret_8bit)]
|
||||
param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)]
|
||||
ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)]
|
||||
param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)]
|
||||
@ -120,6 +131,8 @@ ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)]
|
||||
|
||||
param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred)
|
||||
ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred)
|
||||
param_list_8bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_8bit)
|
||||
ret_list_8bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_8bit)
|
||||
param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit)
|
||||
ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit)
|
||||
param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit)
|
||||
@ -144,6 +157,7 @@ td_file.write('''
|
||||
// PTX Formal Parameter Calling Convention
|
||||
def CC_PTX : CallingConv<[
|
||||
CCIfType<[i1], CCAssignToReg<[%s]>>,
|
||||
CCIfType<[i8], CCAssignToReg<[%s]>>,
|
||||
CCIfType<[i16], CCAssignToReg<[%s]>>,
|
||||
CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
|
||||
CCIfType<[i64,f64], CCAssignToReg<[%s]>>
|
||||
@ -152,12 +166,13 @@ def CC_PTX : CallingConv<[
|
||||
// PTX Return Value Calling Convention
|
||||
def RetCC_PTX : CallingConv<[
|
||||
CCIfType<[i1], CCAssignToReg<[%s]>>,
|
||||
CCIfType<[i8], CCAssignToReg<[%s]>>,
|
||||
CCIfType<[i16], CCAssignToReg<[%s]>>,
|
||||
CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
|
||||
CCIfType<[i64,f64], CCAssignToReg<[%s]>>
|
||||
]>;
|
||||
''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit,
|
||||
ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit))
|
||||
''' % (param_list_pred, param_list_8bit, param_list_16bit, param_list_32bit, param_list_64bit,
|
||||
ret_list_pred, ret_list_8bit, ret_list_16bit, ret_list_32bit, ret_list_64bit))
|
||||
|
||||
|
||||
td_file.close()
|
||||
|
@ -3,6 +3,17 @@
|
||||
; preds
|
||||
; (note: we convert back to i32 to return)
|
||||
|
||||
define ptx_device i32 @cvt_pred_i8(i8 %x, i1 %y) {
|
||||
; CHECK: setp.gt.b8 p[[P0:[0-9]+]], rq{{[0-9]+}}, 0
|
||||
; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
|
||||
; CHECK-NEXT: ret;
|
||||
%a = trunc i8 %x to i1
|
||||
%b = and i1 %a, %y
|
||||
%c = zext i1 %b to i32
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
|
||||
; CHECK: setp.gt.b16 p[[P0:[0-9]+]], rh{{[0-9]+}}, 0
|
||||
; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
|
||||
@ -58,6 +69,43 @@ define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; i8
|
||||
|
||||
define ptx_device i8 @cvt_i8_preds(i1 %x) {
|
||||
; CHECK: selp.u8 rq{{[0-9]+}}, 1, 0, p{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = zext i1 %x to i8
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
define ptx_device i8 @cvt_i8_i32(i32 %x) {
|
||||
; CHECK: cvt.u8.u32 rq{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = trunc i32 %x to i8
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
define ptx_device i8 @cvt_i8_i64(i64 %x) {
|
||||
; CHECK: cvt.u8.u64 rq{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = trunc i64 %x to i8
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
define ptx_device i8 @cvt_i8_f32(float %x) {
|
||||
; CHECK: cvt.rzi.u8.f32 rq{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fptoui float %x to i8
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
define ptx_device i8 @cvt_i8_f64(double %x) {
|
||||
; CHECK: cvt.rzi.u8.f64 rq{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fptoui double %x to i8
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
; i16
|
||||
|
||||
define ptx_device i16 @cvt_i16_preds(i1 %x) {
|
||||
@ -67,6 +115,13 @@ define ptx_device i16 @cvt_i16_preds(i1 %x) {
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define ptx_device i16 @cvt_i16_i8(i8 %x) {
|
||||
; CHECK: cvt.u16.u8 rh{{[0-9]+}}, rq{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = zext i8 %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define ptx_device i16 @cvt_i16_i32(i32 %x) {
|
||||
; CHECK: cvt.u16.u32 rh{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
@ -104,6 +159,13 @@ define ptx_device i32 @cvt_i32_preds(i1 %x) {
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_i32_i8(i8 %x) {
|
||||
; CHECK: cvt.u32.u8 r{{[0-9]+}}, rq{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = zext i8 %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_i32_i16(i16 %x) {
|
||||
; CHECK: cvt.u32.u16 r{{[0-9]+}}, rh{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
@ -141,6 +203,13 @@ define ptx_device i64 @cvt_i64_preds(i1 %x) {
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define ptx_device i64 @cvt_i64_i8(i8 %x) {
|
||||
; CHECK: cvt.u64.u8 rd{{[0-9]+}}, rq{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = zext i8 %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define ptx_device i64 @cvt_i64_i16(i16 %x) {
|
||||
; CHECK: cvt.u64.u16 rd{{[0-9]+}}, rh{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
@ -178,6 +247,13 @@ define ptx_device float @cvt_f32_preds(i1 %x) {
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device float @cvt_f32_i8(i8 %x) {
|
||||
; CHECK: cvt.rn.f32.u8 r{{[0-9]+}}, rq{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = uitofp i8 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device float @cvt_f32_i16(i16 %x) {
|
||||
; CHECK: cvt.rn.f32.u16 r{{[0-9]+}}, rh{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
@ -215,6 +291,13 @@ define ptx_device double @cvt_f64_preds(i1 %x) {
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define ptx_device double @cvt_f64_i8(i8 %x) {
|
||||
; CHECK: cvt.rn.f64.u8 rd{{[0-9]+}}, rq{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = uitofp i8 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define ptx_device double @cvt_f64_i16(i16 %x) {
|
||||
; CHECK: cvt.rn.f64.u16 rd{{[0-9]+}}, rh{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
|
@ -1,5 +1,17 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
;CHECK: .extern .global .b8 array_i8[10];
|
||||
@array_i8 = external global [10 x i8]
|
||||
|
||||
;CHECK: .extern .const .b8 array_constant_i8[10];
|
||||
@array_constant_i8 = external addrspace(1) constant [10 x i8]
|
||||
|
||||
;CHECK: .extern .local .b8 array_local_i8[10];
|
||||
@array_local_i8 = external addrspace(2) global [10 x i8]
|
||||
|
||||
;CHECK: .extern .shared .b8 array_shared_i8[10];
|
||||
@array_shared_i8 = external addrspace(4) global [10 x i8]
|
||||
|
||||
;CHECK: .extern .global .b8 array_i16[20];
|
||||
@array_i16 = external global [10 x i16]
|
||||
|
||||
@ -60,6 +72,13 @@
|
||||
;CHECK: .extern .shared .b8 array_shared_double[80];
|
||||
@array_shared_double = external addrspace(4) global [10 x double]
|
||||
|
||||
define ptx_device i8 @t1_u8(i8* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u8 rq{{[0-9]+}}, [r{{[0-9]+}}];
|
||||
;CHECK-NEXT: ret;
|
||||
%x = load i8* %p
|
||||
ret i8 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t1_u16(i16* %p) {
|
||||
entry:
|
||||
@ -101,6 +120,15 @@ entry:
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i8 @t2_u8(i8* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u8 rq{{[0-9]+}}, [r{{[0-9]+}}+1];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i8* %p, i32 1
|
||||
%x = load i8* %i
|
||||
ret i8 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t2_u16(i16* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}+2];
|
||||
@ -146,6 +174,15 @@ entry:
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i8 @t3_u8(i8* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: add.u32 r[[R0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
|
||||
;CHECK-NEXT: ld.global.u8 rq{{[0-9]+}}, [r[[R0]]];
|
||||
%i = getelementptr i8* %p, i32 %q
|
||||
%x = load i8* %i
|
||||
ret i8 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1;
|
||||
@ -196,6 +233,16 @@ entry:
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i8 @t4_global_u8() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8;
|
||||
;CHECK-NEXT: ld.global.u8 rq{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i8]* @array_i8, i32 0, i32 0
|
||||
%x = load i8* %i
|
||||
ret i8 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t4_global_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
|
||||
@ -296,6 +343,16 @@ entry:
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i8 @t4_local_u8() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i8;
|
||||
;CHECK-NEXT: ld.local.u8 rq{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i8] addrspace(2)* @array_local_i8, i32 0, i32 0
|
||||
%x = load i8 addrspace(2)* %i
|
||||
ret i8 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t4_local_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16;
|
||||
@ -346,6 +403,16 @@ entry:
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i8 @t4_shared_u8() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i8;
|
||||
;CHECK-NEXT: ld.shared.u8 rq{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i8] addrspace(4)* @array_shared_i8, i32 0, i32 0
|
||||
%x = load i8 addrspace(4)* %i
|
||||
ret i8 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t4_shared_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16;
|
||||
@ -396,6 +463,16 @@ entry:
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i8 @t5_u8() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8;
|
||||
;CHECK-NEXT: ld.global.u8 rq{{[0-9]+}}, [r[[R0]]+1];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i8]* @array_i8, i32 0, i32 1
|
||||
%x = load i8* %i
|
||||
ret i8 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t5_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
|
||||
|
@ -1,5 +1,11 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i8 @t1_u8() {
|
||||
; CHECK: mov.u8 rq{{[0-9]+}}, 0;
|
||||
; CHECK: ret;
|
||||
ret i8 0
|
||||
}
|
||||
|
||||
define ptx_device i16 @t1_u16() {
|
||||
; CHECK: mov.u16 rh{{[0-9]+}}, 0;
|
||||
; CHECK: ret;
|
||||
@ -30,6 +36,12 @@ define ptx_device double @t1_f64() {
|
||||
ret double 0.0
|
||||
}
|
||||
|
||||
define ptx_device i8 @t2_u8(i8 %x) {
|
||||
; CHECK: mov.u8 rq{{[0-9]+}}, rq{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
ret i8 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t2_u16(i16 %x) {
|
||||
; CHECK: mov.u16 rh{{[0-9]+}}, rh{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
|
@ -1,5 +1,17 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
;CHECK: .extern .global .b8 array_i8[10];
|
||||
@array_i8 = external global [10 x i8]
|
||||
|
||||
;CHECK: .extern .const .b8 array_constant_i8[10];
|
||||
@array_constant_i8 = external addrspace(1) constant [10 x i8]
|
||||
|
||||
;CHECK: .extern .local .b8 array_local_i8[10];
|
||||
@array_local_i8 = external addrspace(2) global [10 x i8]
|
||||
|
||||
;CHECK: .extern .shared .b8 array_shared_i8[10];
|
||||
@array_shared_i8 = external addrspace(4) global [10 x i8]
|
||||
|
||||
;CHECK: .extern .global .b8 array_i16[20];
|
||||
@array_i16 = external global [10 x i16]
|
||||
|
||||
@ -60,6 +72,13 @@
|
||||
;CHECK: .extern .shared .b8 array_shared_double[80];
|
||||
@array_shared_double = external addrspace(4) global [10 x double]
|
||||
|
||||
define ptx_device void @t1_u8(i8* %p, i8 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u8 [r{{[0-9]+}}], rq{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
store i8 %x, i8* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t1_u16(i16* %p, i16 %x) {
|
||||
entry:
|
||||
@ -101,6 +120,15 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2_u8(i8* %p, i8 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u8 [r{{[0-9]+}}+1], rq{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i8* %p, i32 1
|
||||
store i8 %x, i8* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2_u16(i16* %p, i16 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u16 [r{{[0-9]+}}+2], rh{{[0-9]+}};
|
||||
@ -146,6 +174,16 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3_u8(i8* %p, i32 %q, i8 %x) {
|
||||
entry:
|
||||
;CHECK: add.u32 r[[R0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
|
||||
;CHECK-NEXT: st.global.u8 [r[[R0]]], rq{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i8* %p, i32 %q
|
||||
store i8 %x, i8* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1;
|
||||
@ -201,6 +239,16 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global_u8(i8 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8;
|
||||
;CHECK-NEXT: st.global.u8 [r[[R0]]], rq{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i8]* @array_i8, i8 0, i8 0
|
||||
store i8 %x, i8* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
|
||||
@ -251,6 +299,16 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local_u8(i8 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i8;
|
||||
;CHECK-NEXT: st.local.u8 [r[[R0]]], rq{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i8] addrspace(2)* @array_local_i8, i32 0, i32 0
|
||||
store i8 %x, i8 addrspace(2)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16;
|
||||
@ -301,6 +359,16 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared_u8(i8 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i8;
|
||||
;CHECK-NEXT: st.shared.u8 [r[[R0]]], rq{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i8] addrspace(4)* @array_shared_i8, i32 0, i32 0
|
||||
store i8 %x, i8 addrspace(4)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16;
|
||||
@ -351,6 +419,16 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5_u8(i8 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8;
|
||||
;CHECK-NEXT: st.global.u8 [r[[R0]]+1], rq{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i8]* @array_i8, i32 0, i32 1
|
||||
store i8 %x, i8* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
|
||||
|
Loading…
Reference in New Issue
Block a user