From d8149c1bef75e15010cce4c4510b7f68ecde80a5 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Thu, 23 Jun 2011 18:10:13 +0000 Subject: [PATCH] PTX: Always use registers for return values, but use .param space for device parameters if SM >= 2.0 - Update test cases to be more robust against register allocation changes - Bump up the number of registers to 128 per type - Include Python script to re-generate register file with any number of registers git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133736 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PTX/PTXAsmPrinter.cpp | 17 +- lib/Target/PTX/PTXCallingConv.td | 29 +- lib/Target/PTX/PTXISelLowering.cpp | 50 +-- lib/Target/PTX/PTXMachineFunctionInfo.h | 10 +- lib/Target/PTX/PTXRegisterInfo.td | 564 ++++++++++++++++++++++-- lib/Target/PTX/generate-register-td.py | 163 +++++++ test/CodeGen/PTX/add.ll | 20 +- test/CodeGen/PTX/aggregates.ll | 23 + test/CodeGen/PTX/bitwise.ll | 6 +- test/CodeGen/PTX/bra.ll | 6 +- test/CodeGen/PTX/cvt.ll | 80 ++-- test/CodeGen/PTX/fdiv-sm10.ll | 4 +- test/CodeGen/PTX/fdiv-sm13.ll | 4 +- test/CodeGen/PTX/fneg.ll | 4 +- test/CodeGen/PTX/ld.ll | 150 +++---- test/CodeGen/PTX/llvm-intrinsic.ll | 12 +- test/CodeGen/PTX/mad.ll | 4 +- test/CodeGen/PTX/mov.ll | 20 +- test/CodeGen/PTX/mul.ll | 8 +- test/CodeGen/PTX/parameter-order.ll | 4 +- test/CodeGen/PTX/selp.ll | 8 +- test/CodeGen/PTX/setp.ll | 92 ++-- test/CodeGen/PTX/shl.ll | 6 +- test/CodeGen/PTX/shr.ll | 12 +- test/CodeGen/PTX/st.ll | 130 +++--- test/CodeGen/PTX/sub.ll | 20 +- 26 files changed, 1043 insertions(+), 403 deletions(-) create mode 100755 lib/Target/PTX/generate-register-td.py create mode 100644 test/CodeGen/PTX/aggregates.ll diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index 6686cdd1fe7..5d7e4c3ff79 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -433,25 +433,16 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { if (!isKernel) { decl += " ("; - for (PTXMachineFunctionInfo::ret_iterator i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i; i != e; ++i) { if (i != b) { decl += ", "; } - if (ST.getShaderModel() >= PTXSubtarget::PTX_SM_2_0) { - decl += ".param .b"; - decl += utostr(*i); - decl += " "; - decl += RETURN_PREFIX; - decl += utostr(++cnt); - } else { - decl += ".reg ."; - decl += getRegisterTypeName(*i); - decl += " "; - decl += getRegisterName(*i); - } + decl += ".reg ."; + decl += getRegisterTypeName(*i); + decl += " "; + decl += getRegisterName(*i); } decl += ")"; } diff --git a/lib/Target/PTX/PTXCallingConv.td b/lib/Target/PTX/PTXCallingConv.td index 4d7759b14ee..3e3ff489662 100644 --- a/lib/Target/PTX/PTXCallingConv.td +++ b/lib/Target/PTX/PTXCallingConv.td @@ -1,3 +1,4 @@ + //===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure @@ -11,26 +12,18 @@ // //===----------------------------------------------------------------------===// -// Currently, we reserve one register of each type for return values and let -// the rest be used for parameters. This is a dirty hack, but I am not sure -// how to tell LLVM that registers used for parameter passing cannot be used -// for return values. - -// PTX Calling Conventions +// PTX Formal Parameter Calling Convention def CC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[P1, P2, P3, P4, P5, P6, P7]>>, - CCIfType<[i16], CCAssignToReg<[RH1, RH2, RH3, RH4, RH5, RH6, RH7]>>, - CCIfType<[i32, f32], CCAssignToReg<[R1, R2, R3, R4, R5, R6, R7]>>, - CCIfType<[i64, f64], CCAssignToReg<[RD1, RD2, RD3, RD4, RD5, RD6, RD7]>> + CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>, + CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>, + CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>, + CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>> ]>; -//===----------------------------------------------------------------------===// -// Return Value Calling Conventions -//===----------------------------------------------------------------------===// - +// PTX Return Value Calling Convention def RetCC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[P0]>>, - CCIfType<[i16], CCAssignToReg<[RH0]>>, - CCIfType<[i32, f32], CCAssignToReg<[R0]>>, - CCIfType<[i64, f64], CCAssignToReg<[RD0]>> + CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>, + CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>, + CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>, + CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>> ]>; diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index cb4a5d3d924..c82149301a0 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -307,49 +307,35 @@ SDValue PTXTargetLowering:: MachineFunction& MF = DAG.getMachineFunction(); PTXMachineFunctionInfo *MFI = MF.getInfo(); - const PTXSubtarget& ST = getTargetMachine().getSubtarget(); SDValue Flag; - if (ST.getShaderModel() >= PTXSubtarget::PTX_SM_2_0) { - // For SM 2.0+, we return arguments in the param space - for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue ParamIndex = DAG.getTargetConstant(i, MVT::i32); - SDValue Ops[] = { Chain, ParamIndex, OutVals[i], Flag }; - Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, VTs, Ops, - Flag.getNode() ? 4 : 3); - Flag = Chain.getValue(1); - // Instead of storing a physical register in our argument list, we just - // store the total size of the parameter, in bits. The ASM printer - // knows how to process this. - MFI->addRetReg(Outs[i].VT.getStoreSizeInBits()); - } - } else { - // For SM < 2.0, we return arguments in registers - SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + // Even though we could use the .param space for return arguments for + // device functions if SM >= 2.0 and the number of return arguments is + // only 1, we just always use registers since this makes the codegen + // easier. + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeReturn(Outs, RetCC_PTX); + CCInfo.AnalyzeReturn(Outs, RetCC_PTX); - for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { - CCValAssign& VA = RVLocs[i]; + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + CCValAssign& VA = RVLocs[i]; - assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); + assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); - unsigned Reg = VA.getLocReg(); + unsigned Reg = VA.getLocReg(); - DAG.getMachineFunction().getRegInfo().addLiveOut(Reg); + DAG.getMachineFunction().getRegInfo().addLiveOut(Reg); - Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag); + Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag); - // Guarantee that all emitted copies are stuck together, - // avoiding something bad - Flag = Chain.getValue(1); + // Guarantee that all emitted copies are stuck together, + // avoiding something bad + Flag = Chain.getValue(1); - MFI->addRetReg(Reg); - } + MFI->addRetReg(Reg); } if (Flag.getNode() == 0) { diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h index 1da4b5defcd..9d65f5bd1ad 100644 --- a/lib/Target/PTX/PTXMachineFunctionInfo.h +++ b/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -26,7 +26,7 @@ class PTXMachineFunctionInfo : public MachineFunctionInfo { private: bool is_kernel; std::vector reg_arg, reg_local_var; - DenseSet reg_ret; + std::vector reg_ret; bool _isDoneAddArg; public: @@ -40,7 +40,11 @@ public: void addArgReg(unsigned reg) { reg_arg.push_back(reg); } void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); } - void addRetReg(unsigned reg) { reg_ret.insert(reg); } + void addRetReg(unsigned reg) { + if (!isRetReg(reg)) { + reg_ret.push_back(reg); + } + } void doneAddArg(void) { _isDoneAddArg = true; @@ -51,7 +55,7 @@ public: typedef std::vector::const_iterator reg_iterator; typedef std::vector::const_reverse_iterator reg_reverse_iterator; - typedef DenseSet::const_iterator ret_iterator; + typedef std::vector::const_iterator ret_iterator; bool argRegEmpty() const { return reg_arg.empty(); } int getNumArg() const { return reg_arg.size(); } diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td index 08a39a83704..1313d248325 100644 --- a/lib/Target/PTX/PTXRegisterInfo.td +++ b/lib/Target/PTX/PTXRegisterInfo.td @@ -1,3 +1,4 @@ + //===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// // // The LLVM Compiler Infrastructure @@ -21,55 +22,534 @@ class PTXReg : Register { ///===- Predicate Registers -----------------------------------------------===// -def P0 : PTXReg<"p0">; -def P1 : PTXReg<"p1">; -def P2 : PTXReg<"p2">; -def P3 : PTXReg<"p3">; -def P4 : PTXReg<"p4">; -def P5 : PTXReg<"p5">; -def P6 : PTXReg<"p6">; -def P7 : PTXReg<"p7">; +def P0 : PTXReg<"p0">; +def P1 : PTXReg<"p1">; +def P2 : PTXReg<"p2">; +def P3 : PTXReg<"p3">; +def P4 : PTXReg<"p4">; +def P5 : PTXReg<"p5">; +def P6 : PTXReg<"p6">; +def P7 : PTXReg<"p7">; +def P8 : PTXReg<"p8">; +def P9 : PTXReg<"p9">; +def P10 : PTXReg<"p10">; +def P11 : PTXReg<"p11">; +def P12 : PTXReg<"p12">; +def P13 : PTXReg<"p13">; +def P14 : PTXReg<"p14">; +def P15 : PTXReg<"p15">; +def P16 : PTXReg<"p16">; +def P17 : PTXReg<"p17">; +def P18 : PTXReg<"p18">; +def P19 : PTXReg<"p19">; +def P20 : PTXReg<"p20">; +def P21 : PTXReg<"p21">; +def P22 : PTXReg<"p22">; +def P23 : PTXReg<"p23">; +def P24 : PTXReg<"p24">; +def P25 : PTXReg<"p25">; +def P26 : PTXReg<"p26">; +def P27 : PTXReg<"p27">; +def P28 : PTXReg<"p28">; +def P29 : PTXReg<"p29">; +def P30 : PTXReg<"p30">; +def P31 : PTXReg<"p31">; +def P32 : PTXReg<"p32">; +def P33 : PTXReg<"p33">; +def P34 : PTXReg<"p34">; +def P35 : PTXReg<"p35">; +def P36 : PTXReg<"p36">; +def P37 : PTXReg<"p37">; +def P38 : PTXReg<"p38">; +def P39 : PTXReg<"p39">; +def P40 : PTXReg<"p40">; +def P41 : PTXReg<"p41">; +def P42 : PTXReg<"p42">; +def P43 : PTXReg<"p43">; +def P44 : PTXReg<"p44">; +def P45 : PTXReg<"p45">; +def P46 : PTXReg<"p46">; +def P47 : PTXReg<"p47">; +def P48 : PTXReg<"p48">; +def P49 : PTXReg<"p49">; +def P50 : PTXReg<"p50">; +def P51 : PTXReg<"p51">; +def P52 : PTXReg<"p52">; +def P53 : PTXReg<"p53">; +def P54 : PTXReg<"p54">; +def P55 : PTXReg<"p55">; +def P56 : PTXReg<"p56">; +def P57 : PTXReg<"p57">; +def P58 : PTXReg<"p58">; +def P59 : PTXReg<"p59">; +def P60 : PTXReg<"p60">; +def P61 : PTXReg<"p61">; +def P62 : PTXReg<"p62">; +def P63 : PTXReg<"p63">; +def P64 : PTXReg<"p64">; +def P65 : PTXReg<"p65">; +def P66 : PTXReg<"p66">; +def P67 : PTXReg<"p67">; +def P68 : PTXReg<"p68">; +def P69 : PTXReg<"p69">; +def P70 : PTXReg<"p70">; +def P71 : PTXReg<"p71">; +def P72 : PTXReg<"p72">; +def P73 : PTXReg<"p73">; +def P74 : PTXReg<"p74">; +def P75 : PTXReg<"p75">; +def P76 : PTXReg<"p76">; +def P77 : PTXReg<"p77">; +def P78 : PTXReg<"p78">; +def P79 : PTXReg<"p79">; +def P80 : PTXReg<"p80">; +def P81 : PTXReg<"p81">; +def P82 : PTXReg<"p82">; +def P83 : PTXReg<"p83">; +def P84 : PTXReg<"p84">; +def P85 : PTXReg<"p85">; +def P86 : PTXReg<"p86">; +def P87 : PTXReg<"p87">; +def P88 : PTXReg<"p88">; +def P89 : PTXReg<"p89">; +def P90 : PTXReg<"p90">; +def P91 : PTXReg<"p91">; +def P92 : PTXReg<"p92">; +def P93 : PTXReg<"p93">; +def P94 : PTXReg<"p94">; +def P95 : PTXReg<"p95">; +def P96 : PTXReg<"p96">; +def P97 : PTXReg<"p97">; +def P98 : PTXReg<"p98">; +def P99 : PTXReg<"p99">; +def P100 : PTXReg<"p100">; +def P101 : PTXReg<"p101">; +def P102 : PTXReg<"p102">; +def P103 : PTXReg<"p103">; +def P104 : PTXReg<"p104">; +def P105 : PTXReg<"p105">; +def P106 : PTXReg<"p106">; +def P107 : PTXReg<"p107">; +def P108 : PTXReg<"p108">; +def P109 : PTXReg<"p109">; +def P110 : PTXReg<"p110">; +def P111 : PTXReg<"p111">; +def P112 : PTXReg<"p112">; +def P113 : PTXReg<"p113">; +def P114 : PTXReg<"p114">; +def P115 : PTXReg<"p115">; +def P116 : PTXReg<"p116">; +def P117 : PTXReg<"p117">; +def P118 : PTXReg<"p118">; +def P119 : PTXReg<"p119">; +def P120 : PTXReg<"p120">; +def P121 : PTXReg<"p121">; +def P122 : PTXReg<"p122">; +def P123 : PTXReg<"p123">; +def P124 : PTXReg<"p124">; +def P125 : PTXReg<"p125">; +def P126 : PTXReg<"p126">; +def P127 : PTXReg<"p127">; -///===- 16-bit Integer Registers ------------------------------------------===// +///===- 16-Bit Registers --------------------------------------------------===// -def RH0 : PTXReg<"rh0">; -def RH1 : PTXReg<"rh1">; -def RH2 : PTXReg<"rh2">; -def RH3 : PTXReg<"rh3">; -def RH4 : PTXReg<"rh4">; -def RH5 : PTXReg<"rh5">; -def RH6 : PTXReg<"rh6">; -def RH7 : PTXReg<"rh7">; +def RH0 : PTXReg<"rh0">; +def RH1 : PTXReg<"rh1">; +def RH2 : PTXReg<"rh2">; +def RH3 : PTXReg<"rh3">; +def RH4 : PTXReg<"rh4">; +def RH5 : PTXReg<"rh5">; +def RH6 : PTXReg<"rh6">; +def RH7 : PTXReg<"rh7">; +def RH8 : PTXReg<"rh8">; +def RH9 : PTXReg<"rh9">; +def RH10 : PTXReg<"rh10">; +def RH11 : PTXReg<"rh11">; +def RH12 : PTXReg<"rh12">; +def RH13 : PTXReg<"rh13">; +def RH14 : PTXReg<"rh14">; +def RH15 : PTXReg<"rh15">; +def RH16 : PTXReg<"rh16">; +def RH17 : PTXReg<"rh17">; +def RH18 : PTXReg<"rh18">; +def RH19 : PTXReg<"rh19">; +def RH20 : PTXReg<"rh20">; +def RH21 : PTXReg<"rh21">; +def RH22 : PTXReg<"rh22">; +def RH23 : PTXReg<"rh23">; +def RH24 : PTXReg<"rh24">; +def RH25 : PTXReg<"rh25">; +def RH26 : PTXReg<"rh26">; +def RH27 : PTXReg<"rh27">; +def RH28 : PTXReg<"rh28">; +def RH29 : PTXReg<"rh29">; +def RH30 : PTXReg<"rh30">; +def RH31 : PTXReg<"rh31">; +def RH32 : PTXReg<"rh32">; +def RH33 : PTXReg<"rh33">; +def RH34 : PTXReg<"rh34">; +def RH35 : PTXReg<"rh35">; +def RH36 : PTXReg<"rh36">; +def RH37 : PTXReg<"rh37">; +def RH38 : PTXReg<"rh38">; +def RH39 : PTXReg<"rh39">; +def RH40 : PTXReg<"rh40">; +def RH41 : PTXReg<"rh41">; +def RH42 : PTXReg<"rh42">; +def RH43 : PTXReg<"rh43">; +def RH44 : PTXReg<"rh44">; +def RH45 : PTXReg<"rh45">; +def RH46 : PTXReg<"rh46">; +def RH47 : PTXReg<"rh47">; +def RH48 : PTXReg<"rh48">; +def RH49 : PTXReg<"rh49">; +def RH50 : PTXReg<"rh50">; +def RH51 : PTXReg<"rh51">; +def RH52 : PTXReg<"rh52">; +def RH53 : PTXReg<"rh53">; +def RH54 : PTXReg<"rh54">; +def RH55 : PTXReg<"rh55">; +def RH56 : PTXReg<"rh56">; +def RH57 : PTXReg<"rh57">; +def RH58 : PTXReg<"rh58">; +def RH59 : PTXReg<"rh59">; +def RH60 : PTXReg<"rh60">; +def RH61 : PTXReg<"rh61">; +def RH62 : PTXReg<"rh62">; +def RH63 : PTXReg<"rh63">; +def RH64 : PTXReg<"rh64">; +def RH65 : PTXReg<"rh65">; +def RH66 : PTXReg<"rh66">; +def RH67 : PTXReg<"rh67">; +def RH68 : PTXReg<"rh68">; +def RH69 : PTXReg<"rh69">; +def RH70 : PTXReg<"rh70">; +def RH71 : PTXReg<"rh71">; +def RH72 : PTXReg<"rh72">; +def RH73 : PTXReg<"rh73">; +def RH74 : PTXReg<"rh74">; +def RH75 : PTXReg<"rh75">; +def RH76 : PTXReg<"rh76">; +def RH77 : PTXReg<"rh77">; +def RH78 : PTXReg<"rh78">; +def RH79 : PTXReg<"rh79">; +def RH80 : PTXReg<"rh80">; +def RH81 : PTXReg<"rh81">; +def RH82 : PTXReg<"rh82">; +def RH83 : PTXReg<"rh83">; +def RH84 : PTXReg<"rh84">; +def RH85 : PTXReg<"rh85">; +def RH86 : PTXReg<"rh86">; +def RH87 : PTXReg<"rh87">; +def RH88 : PTXReg<"rh88">; +def RH89 : PTXReg<"rh89">; +def RH90 : PTXReg<"rh90">; +def RH91 : PTXReg<"rh91">; +def RH92 : PTXReg<"rh92">; +def RH93 : PTXReg<"rh93">; +def RH94 : PTXReg<"rh94">; +def RH95 : PTXReg<"rh95">; +def RH96 : PTXReg<"rh96">; +def RH97 : PTXReg<"rh97">; +def RH98 : PTXReg<"rh98">; +def RH99 : PTXReg<"rh99">; +def RH100 : PTXReg<"rh100">; +def RH101 : PTXReg<"rh101">; +def RH102 : PTXReg<"rh102">; +def RH103 : PTXReg<"rh103">; +def RH104 : PTXReg<"rh104">; +def RH105 : PTXReg<"rh105">; +def RH106 : PTXReg<"rh106">; +def RH107 : PTXReg<"rh107">; +def RH108 : PTXReg<"rh108">; +def RH109 : PTXReg<"rh109">; +def RH110 : PTXReg<"rh110">; +def RH111 : PTXReg<"rh111">; +def RH112 : PTXReg<"rh112">; +def RH113 : PTXReg<"rh113">; +def RH114 : PTXReg<"rh114">; +def RH115 : PTXReg<"rh115">; +def RH116 : PTXReg<"rh116">; +def RH117 : PTXReg<"rh117">; +def RH118 : PTXReg<"rh118">; +def RH119 : PTXReg<"rh119">; +def RH120 : PTXReg<"rh120">; +def RH121 : PTXReg<"rh121">; +def RH122 : PTXReg<"rh122">; +def RH123 : PTXReg<"rh123">; +def RH124 : PTXReg<"rh124">; +def RH125 : PTXReg<"rh125">; +def RH126 : PTXReg<"rh126">; +def RH127 : PTXReg<"rh127">; -///===- 32-bit Integer Registers ------------------------------------------===// +///===- 32-Bit Registers --------------------------------------------------===// -def R0 : PTXReg<"r0">; -def R1 : PTXReg<"r1">; -def R2 : PTXReg<"r2">; -def R3 : PTXReg<"r3">; -def R4 : PTXReg<"r4">; -def R5 : PTXReg<"r5">; -def R6 : PTXReg<"r6">; -def R7 : PTXReg<"r7">; +def R0 : PTXReg<"r0">; +def R1 : PTXReg<"r1">; +def R2 : PTXReg<"r2">; +def R3 : PTXReg<"r3">; +def R4 : PTXReg<"r4">; +def R5 : PTXReg<"r5">; +def R6 : PTXReg<"r6">; +def R7 : PTXReg<"r7">; +def R8 : PTXReg<"r8">; +def R9 : PTXReg<"r9">; +def R10 : PTXReg<"r10">; +def R11 : PTXReg<"r11">; +def R12 : PTXReg<"r12">; +def R13 : PTXReg<"r13">; +def R14 : PTXReg<"r14">; +def R15 : PTXReg<"r15">; +def R16 : PTXReg<"r16">; +def R17 : PTXReg<"r17">; +def R18 : PTXReg<"r18">; +def R19 : PTXReg<"r19">; +def R20 : PTXReg<"r20">; +def R21 : PTXReg<"r21">; +def R22 : PTXReg<"r22">; +def R23 : PTXReg<"r23">; +def R24 : PTXReg<"r24">; +def R25 : PTXReg<"r25">; +def R26 : PTXReg<"r26">; +def R27 : PTXReg<"r27">; +def R28 : PTXReg<"r28">; +def R29 : PTXReg<"r29">; +def R30 : PTXReg<"r30">; +def R31 : PTXReg<"r31">; +def R32 : PTXReg<"r32">; +def R33 : PTXReg<"r33">; +def R34 : PTXReg<"r34">; +def R35 : PTXReg<"r35">; +def R36 : PTXReg<"r36">; +def R37 : PTXReg<"r37">; +def R38 : PTXReg<"r38">; +def R39 : PTXReg<"r39">; +def R40 : PTXReg<"r40">; +def R41 : PTXReg<"r41">; +def R42 : PTXReg<"r42">; +def R43 : PTXReg<"r43">; +def R44 : PTXReg<"r44">; +def R45 : PTXReg<"r45">; +def R46 : PTXReg<"r46">; +def R47 : PTXReg<"r47">; +def R48 : PTXReg<"r48">; +def R49 : PTXReg<"r49">; +def R50 : PTXReg<"r50">; +def R51 : PTXReg<"r51">; +def R52 : PTXReg<"r52">; +def R53 : PTXReg<"r53">; +def R54 : PTXReg<"r54">; +def R55 : PTXReg<"r55">; +def R56 : PTXReg<"r56">; +def R57 : PTXReg<"r57">; +def R58 : PTXReg<"r58">; +def R59 : PTXReg<"r59">; +def R60 : PTXReg<"r60">; +def R61 : PTXReg<"r61">; +def R62 : PTXReg<"r62">; +def R63 : PTXReg<"r63">; +def R64 : PTXReg<"r64">; +def R65 : PTXReg<"r65">; +def R66 : PTXReg<"r66">; +def R67 : PTXReg<"r67">; +def R68 : PTXReg<"r68">; +def R69 : PTXReg<"r69">; +def R70 : PTXReg<"r70">; +def R71 : PTXReg<"r71">; +def R72 : PTXReg<"r72">; +def R73 : PTXReg<"r73">; +def R74 : PTXReg<"r74">; +def R75 : PTXReg<"r75">; +def R76 : PTXReg<"r76">; +def R77 : PTXReg<"r77">; +def R78 : PTXReg<"r78">; +def R79 : PTXReg<"r79">; +def R80 : PTXReg<"r80">; +def R81 : PTXReg<"r81">; +def R82 : PTXReg<"r82">; +def R83 : PTXReg<"r83">; +def R84 : PTXReg<"r84">; +def R85 : PTXReg<"r85">; +def R86 : PTXReg<"r86">; +def R87 : PTXReg<"r87">; +def R88 : PTXReg<"r88">; +def R89 : PTXReg<"r89">; +def R90 : PTXReg<"r90">; +def R91 : PTXReg<"r91">; +def R92 : PTXReg<"r92">; +def R93 : PTXReg<"r93">; +def R94 : PTXReg<"r94">; +def R95 : PTXReg<"r95">; +def R96 : PTXReg<"r96">; +def R97 : PTXReg<"r97">; +def R98 : PTXReg<"r98">; +def R99 : PTXReg<"r99">; +def R100 : PTXReg<"r100">; +def R101 : PTXReg<"r101">; +def R102 : PTXReg<"r102">; +def R103 : PTXReg<"r103">; +def R104 : PTXReg<"r104">; +def R105 : PTXReg<"r105">; +def R106 : PTXReg<"r106">; +def R107 : PTXReg<"r107">; +def R108 : PTXReg<"r108">; +def R109 : PTXReg<"r109">; +def R110 : PTXReg<"r110">; +def R111 : PTXReg<"r111">; +def R112 : PTXReg<"r112">; +def R113 : PTXReg<"r113">; +def R114 : PTXReg<"r114">; +def R115 : PTXReg<"r115">; +def R116 : PTXReg<"r116">; +def R117 : PTXReg<"r117">; +def R118 : PTXReg<"r118">; +def R119 : PTXReg<"r119">; +def R120 : PTXReg<"r120">; +def R121 : PTXReg<"r121">; +def R122 : PTXReg<"r122">; +def R123 : PTXReg<"r123">; +def R124 : PTXReg<"r124">; +def R125 : PTXReg<"r125">; +def R126 : PTXReg<"r126">; +def R127 : PTXReg<"r127">; -///===- 64-bit Integer Registers ------------------------------------------===// +///===- 64-Bit Registers --------------------------------------------------===// -def RD0 : PTXReg<"rd0">; -def RD1 : PTXReg<"rd1">; -def RD2 : PTXReg<"rd2">; -def RD3 : PTXReg<"rd3">; -def RD4 : PTXReg<"rd4">; -def RD5 : PTXReg<"rd5">; -def RD6 : PTXReg<"rd6">; -def RD7 : PTXReg<"rd7">; +def RD0 : PTXReg<"rd0">; +def RD1 : PTXReg<"rd1">; +def RD2 : PTXReg<"rd2">; +def RD3 : PTXReg<"rd3">; +def RD4 : PTXReg<"rd4">; +def RD5 : PTXReg<"rd5">; +def RD6 : PTXReg<"rd6">; +def RD7 : PTXReg<"rd7">; +def RD8 : PTXReg<"rd8">; +def RD9 : PTXReg<"rd9">; +def RD10 : PTXReg<"rd10">; +def RD11 : PTXReg<"rd11">; +def RD12 : PTXReg<"rd12">; +def RD13 : PTXReg<"rd13">; +def RD14 : PTXReg<"rd14">; +def RD15 : PTXReg<"rd15">; +def RD16 : PTXReg<"rd16">; +def RD17 : PTXReg<"rd17">; +def RD18 : PTXReg<"rd18">; +def RD19 : PTXReg<"rd19">; +def RD20 : PTXReg<"rd20">; +def RD21 : PTXReg<"rd21">; +def RD22 : PTXReg<"rd22">; +def RD23 : PTXReg<"rd23">; +def RD24 : PTXReg<"rd24">; +def RD25 : PTXReg<"rd25">; +def RD26 : PTXReg<"rd26">; +def RD27 : PTXReg<"rd27">; +def RD28 : PTXReg<"rd28">; +def RD29 : PTXReg<"rd29">; +def RD30 : PTXReg<"rd30">; +def RD31 : PTXReg<"rd31">; +def RD32 : PTXReg<"rd32">; +def RD33 : PTXReg<"rd33">; +def RD34 : PTXReg<"rd34">; +def RD35 : PTXReg<"rd35">; +def RD36 : PTXReg<"rd36">; +def RD37 : PTXReg<"rd37">; +def RD38 : PTXReg<"rd38">; +def RD39 : PTXReg<"rd39">; +def RD40 : PTXReg<"rd40">; +def RD41 : PTXReg<"rd41">; +def RD42 : PTXReg<"rd42">; +def RD43 : PTXReg<"rd43">; +def RD44 : PTXReg<"rd44">; +def RD45 : PTXReg<"rd45">; +def RD46 : PTXReg<"rd46">; +def RD47 : PTXReg<"rd47">; +def RD48 : PTXReg<"rd48">; +def RD49 : PTXReg<"rd49">; +def RD50 : PTXReg<"rd50">; +def RD51 : PTXReg<"rd51">; +def RD52 : PTXReg<"rd52">; +def RD53 : PTXReg<"rd53">; +def RD54 : PTXReg<"rd54">; +def RD55 : PTXReg<"rd55">; +def RD56 : PTXReg<"rd56">; +def RD57 : PTXReg<"rd57">; +def RD58 : PTXReg<"rd58">; +def RD59 : PTXReg<"rd59">; +def RD60 : PTXReg<"rd60">; +def RD61 : PTXReg<"rd61">; +def RD62 : PTXReg<"rd62">; +def RD63 : PTXReg<"rd63">; +def RD64 : PTXReg<"rd64">; +def RD65 : PTXReg<"rd65">; +def RD66 : PTXReg<"rd66">; +def RD67 : PTXReg<"rd67">; +def RD68 : PTXReg<"rd68">; +def RD69 : PTXReg<"rd69">; +def RD70 : PTXReg<"rd70">; +def RD71 : PTXReg<"rd71">; +def RD72 : PTXReg<"rd72">; +def RD73 : PTXReg<"rd73">; +def RD74 : PTXReg<"rd74">; +def RD75 : PTXReg<"rd75">; +def RD76 : PTXReg<"rd76">; +def RD77 : PTXReg<"rd77">; +def RD78 : PTXReg<"rd78">; +def RD79 : PTXReg<"rd79">; +def RD80 : PTXReg<"rd80">; +def RD81 : PTXReg<"rd81">; +def RD82 : PTXReg<"rd82">; +def RD83 : PTXReg<"rd83">; +def RD84 : PTXReg<"rd84">; +def RD85 : PTXReg<"rd85">; +def RD86 : PTXReg<"rd86">; +def RD87 : PTXReg<"rd87">; +def RD88 : PTXReg<"rd88">; +def RD89 : PTXReg<"rd89">; +def RD90 : PTXReg<"rd90">; +def RD91 : PTXReg<"rd91">; +def RD92 : PTXReg<"rd92">; +def RD93 : PTXReg<"rd93">; +def RD94 : PTXReg<"rd94">; +def RD95 : PTXReg<"rd95">; +def RD96 : PTXReg<"rd96">; +def RD97 : PTXReg<"rd97">; +def RD98 : PTXReg<"rd98">; +def RD99 : PTXReg<"rd99">; +def RD100 : PTXReg<"rd100">; +def RD101 : PTXReg<"rd101">; +def RD102 : PTXReg<"rd102">; +def RD103 : PTXReg<"rd103">; +def RD104 : PTXReg<"rd104">; +def RD105 : PTXReg<"rd105">; +def RD106 : PTXReg<"rd106">; +def RD107 : PTXReg<"rd107">; +def RD108 : PTXReg<"rd108">; +def RD109 : PTXReg<"rd109">; +def RD110 : PTXReg<"rd110">; +def RD111 : PTXReg<"rd111">; +def RD112 : PTXReg<"rd112">; +def RD113 : PTXReg<"rd113">; +def RD114 : PTXReg<"rd114">; +def RD115 : PTXReg<"rd115">; +def RD116 : PTXReg<"rd116">; +def RD117 : PTXReg<"rd117">; +def RD118 : PTXReg<"rd118">; +def RD119 : PTXReg<"rd119">; +def RD120 : PTXReg<"rd120">; +def RD121 : PTXReg<"rd121">; +def RD122 : PTXReg<"rd122">; +def RD123 : PTXReg<"rd123">; +def RD124 : PTXReg<"rd124">; +def RD125 : PTXReg<"rd125">; +def RD126 : PTXReg<"rd126">; +def RD127 : PTXReg<"rd127">; //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// - -def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 7)>; -def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 7)>; -def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 7)>; -def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 7)>; -def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 7)>; -def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 7)>; +def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>; +def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>; +def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>; +def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>; +def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>; +def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>; diff --git a/lib/Target/PTX/generate-register-td.py b/lib/Target/PTX/generate-register-td.py new file mode 100755 index 00000000000..15286908961 --- /dev/null +++ b/lib/Target/PTX/generate-register-td.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python +##===- generate-register-td.py --------------------------------*-python-*--===## +## +## The LLVM Compiler Infrastructure +## +## This file is distributed under the University of Illinois Open Source +## License. See LICENSE.TXT for details. +## +##===----------------------------------------------------------------------===## +## +## This file describes the PTX register file generator. +## +##===----------------------------------------------------------------------===## + +from sys import argv, exit, stdout + + +if len(argv) != 5: + print('Usage: generate-register-td.py ') + exit(1) + +try: + num_pred = int(argv[1]) + num_16bit = int(argv[2]) + num_32bit = int(argv[3]) + num_64bit = int(argv[4]) +except: + print('ERROR: Invalid integer parameter') + exit(1) + +## Print the register definition file +td_file = open('PTXRegisterInfo.td', 'w') + +td_file.write(''' +//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the PTX register file +//===----------------------------------------------------------------------===// + +class PTXReg : Register { + let Namespace = "PTX"; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// +''') + + +# Print predicate registers +td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n') +for r in range(0, num_pred): + td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r)) + +# Print 16-bit registers +td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_16bit): + td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r)) + +# Print 32-bit registers +td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_32bit): + td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r)) + +# Print 64-bit registers +td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_64bit): + td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r)) + + +td_file.write(''' +//===----------------------------------------------------------------------===// +// Register classes +//===----------------------------------------------------------------------===// +''') + + +# Print register classes + +td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1)) +td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1)) +td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) +td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) +td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) +td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) + + +td_file.close() + +## Now write the PTXCallingConv.td file +td_file = open('PTXCallingConv.td', 'w') + +# Reserve 10% of the available registers for return values, and the other 90% +# for parameters +num_ret_pred = int(0.1 * num_pred) +num_ret_16bit = int(0.1 * num_16bit) +num_ret_32bit = int(0.1 * num_32bit) +num_ret_64bit = int(0.1 * num_64bit) +num_param_pred = num_pred - num_ret_pred +num_param_16bit = num_16bit - num_ret_16bit +num_param_32bit = num_32bit - num_ret_32bit +num_param_64bit = num_64bit - num_ret_64bit + +param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)] +ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)] +param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)] +ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)] +param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)] +ret_regs_32bit = ['R%d' % i for i in range(0, num_ret_32bit)] +param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)] +ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)] + +param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred) +ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred) +param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit) +ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit) +param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit) +ret_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit) +param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit) +ret_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit) + +td_file.write(''' +//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the PTX architecture. +// +//===----------------------------------------------------------------------===// + +// PTX Formal Parameter Calling Convention +def CC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[%s]>>, + CCIfType<[i16], CCAssignToReg<[%s]>>, + CCIfType<[i32,f32], CCAssignToReg<[%s]>>, + CCIfType<[i64,f64], CCAssignToReg<[%s]>> +]>; + +// PTX Return Value Calling Convention +def RetCC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[%s]>>, + CCIfType<[i16], CCAssignToReg<[%s]>>, + CCIfType<[i32,f32], CCAssignToReg<[%s]>>, + CCIfType<[i64,f64], CCAssignToReg<[%s]>> +]>; +''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit, + ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit)) + + +td_file.close() diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll index c16be4933c5..293aebe51e0 100644 --- a/test/CodeGen/PTX/add.ll +++ b/test/CodeGen/PTX/add.ll @@ -1,70 +1,70 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i16 @t1_u16(i16 %x, i16 %y) { -; CHECK: add.u16 rh0, rh1, rh2; +; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; %z = add i16 %x, %y ret i16 %z } define ptx_device i32 @t1_u32(i32 %x, i32 %y) { -; CHECK: add.u32 r0, r1, r2; +; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %z = add i32 %x, %y ret i32 %z } define ptx_device i64 @t1_u64(i64 %x, i64 %y) { -; CHECK: add.u64 rd0, rd1, rd2; +; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %z = add i64 %x, %y ret i64 %z } define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: add.rn.f32 r0, r1, r2 +; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} ; CHECK-NEXT: ret; %z = fadd float %x, %y ret float %z } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: add.rn.f64 rd0, rd1, rd2 +; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}} ; CHECK-NEXT: ret; %z = fadd double %x, %y ret double %z } define ptx_device i16 @t2_u16(i16 %x) { -; CHECK: add.u16 rh0, rh1, 1; +; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, 1; ; CHECK-NEXT: ret; %z = add i16 %x, 1 ret i16 %z } define ptx_device i32 @t2_u32(i32 %x) { -; CHECK: add.u32 r0, r1, 1; +; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, 1; ; CHECK-NEXT: ret; %z = add i32 %x, 1 ret i32 %z } define ptx_device i64 @t2_u64(i64 %x) { -; CHECK: add.u64 rd0, rd1, 1; +; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, 1; ; CHECK-NEXT: ret; %z = add i64 %x, 1 ret i64 %z } define ptx_device float @t2_f32(float %x) { -; CHECK: add.rn.f32 r0, r1, 0F3F800000; +; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0F3F800000; ; CHECK-NEXT: ret; %z = fadd float %x, 1.0 ret float %z } define ptx_device double @t2_f64(double %x) { -; CHECK: add.rn.f64 rd0, rd1, 0D3FF0000000000000; +; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0D3FF0000000000000; ; CHECK-NEXT: ret; %z = fadd double %x, 1.0 ret double %z diff --git a/test/CodeGen/PTX/aggregates.ll b/test/CodeGen/PTX/aggregates.ll new file mode 100644 index 00000000000..23f28a79c0c --- /dev/null +++ b/test/CodeGen/PTX/aggregates.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s + +%complex = type { float, float } + +define ptx_device %complex @complex_add(%complex %a, %complex %b) { +entry: +; CHECK: ld.param.f32 r[[R0:[0-9]+]], [__param_1]; +; CHECK-NEXT: ld.param.f32 r[[R2:[0-9]+]], [__param_3]; +; CHECK-NEXT: ld.param.f32 r[[R1:[0-9]+]], [__param_2]; +; CHECK-NEXT: ld.param.f32 r[[R3:[0-9]+]], [__param_4]; +; CHECK-NEXT: add.rn.f32 r[[R0]], r[[R0]], r[[R2]]; +; CHECK-NEXT: add.rn.f32 r[[R1]], r[[R1]], r[[R3]]; +; CHECK-NEXT: ret; + %a.real = extractvalue %complex %a, 0 + %a.imag = extractvalue %complex %a, 1 + %b.real = extractvalue %complex %b, 0 + %b.imag = extractvalue %complex %b, 1 + %ret.real = fadd float %a.real, %b.real + %ret.imag = fadd float %a.imag, %b.imag + %ret.0 = insertvalue %complex undef, float %ret.real, 0 + %ret.1 = insertvalue %complex %ret.0, float %ret.imag, 1 + ret %complex %ret.1 +} diff --git a/test/CodeGen/PTX/bitwise.ll b/test/CodeGen/PTX/bitwise.ll index dbc77e53330..38592807350 100644 --- a/test/CodeGen/PTX/bitwise.ll +++ b/test/CodeGen/PTX/bitwise.ll @@ -3,21 +3,21 @@ ; preds define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) { -; CHECK: and.pred p0, p1, p2 +; CHECK: and.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}} %c = and i1 %x, %y %d = zext i1 %c to i32 ret i32 %d } define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) { -; CHECK: or.pred p0, p1, p2 +; CHECK: or.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}} %a = or i1 %x, %y %b = zext i1 %a to i32 ret i32 %b } define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) { -; CHECK: xor.pred p0, p1, p2 +; CHECK: xor.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}} %a = xor i1 %x, %y %b = zext i1 %a to i32 ret i32 %b diff --git a/test/CodeGen/PTX/bra.ll b/test/CodeGen/PTX/bra.ll index 49383eb3cf9..7cc944466d2 100644 --- a/test/CodeGen/PTX/bra.ll +++ b/test/CodeGen/PTX/bra.ll @@ -10,15 +10,15 @@ loop: define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) { entry: -; CHECK: setp.le.u32 p0, r1, r2 +; CHECK: setp.le.u32 p0, r[[R0:[0-9]+]], r[[R1:[0-9]+]] %p = icmp ugt i32 %x, %y ; CHECK-NEXT: @p0 bra ; CHECK-NOT: bra br i1 %p, label %clause.if, label %clause.else clause.if: -; CHECK: mov.u32 r0, r1 +; CHECK: mov.u32 r{{[0-9]+}}, r[[R0]] ret i32 %x clause.else: -; CHECK: mov.u32 r0, r2 +; CHECK: mov.u32 r{{[0-9]+}}, r[[R1]] ret i32 %y } diff --git a/test/CodeGen/PTX/cvt.ll b/test/CodeGen/PTX/cvt.ll index f7233697c04..18f7ef365b4 100644 --- a/test/CodeGen/PTX/cvt.ll +++ b/test/CodeGen/PTX/cvt.ll @@ -4,9 +4,9 @@ ; (note: we convert back to i32 to return) define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) { -; CHECK: setp.gt.b16 p0, rh1, 0 -; CHECK-NEXT: and.pred p0, p0, p1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.b16 p[[P0:[0-9]+]], rh{{[0-9]+}}, 0 +; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; ; CHECK-NEXT: ret; %a = trunc i16 %x to i1 %b = and i1 %a, %y @@ -15,9 +15,9 @@ define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) { } define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) { -; CHECK: setp.gt.b32 p0, r1, 0 -; CHECK-NEXT: and.pred p0, p0, p1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.b32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0 +; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; ; CHECK-NEXT: ret; %a = trunc i32 %x to i1 %b = and i1 %a, %y @@ -26,9 +26,9 @@ define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) { } define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) { -; CHECK: setp.gt.b64 p0, rd1, 0 -; CHECK-NEXT: and.pred p0, p0, p1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.b64 p[[P0:[0-9]+]], rd{{[0-9]+}}, 0 +; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; ; CHECK-NEXT: ret; %a = trunc i64 %x to i1 %b = and i1 %a, %y @@ -37,9 +37,9 @@ define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) { } define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) { -; CHECK: setp.gt.b32 p0, r1, 0 -; CHECK-NEXT: and.pred p0, p0, p1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.b32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0 +; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; ; CHECK-NEXT: ret; %a = fptoui float %x to i1 %b = and i1 %a, %y @@ -48,9 +48,9 @@ define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) { } define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) { -; CHECK: setp.gt.b64 p0, rd1, 0 -; CHECK-NEXT: and.pred p0, p0, p1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.b64 p[[P0:[0-9]+]], rd{{[0-9]+}}, 0 +; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; ; CHECK-NEXT: ret; %a = fptoui double %x to i1 %b = and i1 %a, %y @@ -61,35 +61,35 @@ define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) { ; i16 define ptx_device i16 @cvt_i16_preds(i1 %x) { -; CHECK: selp.u16 rh0, 1, 0, p1; +; CHECK: selp.u16 rh{{[0-9]+}}, 1, 0, p{{[0-9]+}}; ; CHECK-NEXT: ret; %a = zext i1 %x to i16 ret i16 %a } define ptx_device i16 @cvt_i16_i32(i32 %x) { -; CHECK: cvt.u16.u32 rh0, r1; +; CHECK: cvt.u16.u32 rh{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = trunc i32 %x to i16 ret i16 %a } define ptx_device i16 @cvt_i16_i64(i64 %x) { -; CHECK: cvt.u16.u64 rh0, rd1; +; CHECK: cvt.u16.u64 rh{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = trunc i64 %x to i16 ret i16 %a } define ptx_device i16 @cvt_i16_f32(float %x) { -; CHECK: cvt.rzi.u16.f32 rh0, r1; +; CHECK: cvt.rzi.u16.f32 rh{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fptoui float %x to i16 ret i16 %a } define ptx_device i16 @cvt_i16_f64(double %x) { -; CHECK: cvt.rzi.u16.f64 rh0, rd1; +; CHECK: cvt.rzi.u16.f64 rh{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fptoui double %x to i16 ret i16 %a @@ -98,35 +98,35 @@ define ptx_device i16 @cvt_i16_f64(double %x) { ; i32 define ptx_device i32 @cvt_i32_preds(i1 %x) { -; CHECK: selp.u32 r0, 1, 0, p1; +; CHECK: selp.u32 r{{[0-9]+}}, 1, 0, p{{[0-9]+}}; ; CHECK-NEXT: ret; %a = zext i1 %x to i32 ret i32 %a } define ptx_device i32 @cvt_i32_i16(i16 %x) { -; CHECK: cvt.u32.u16 r0, rh1; +; CHECK: cvt.u32.u16 r{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; %a = zext i16 %x to i32 ret i32 %a } define ptx_device i32 @cvt_i32_i64(i64 %x) { -; CHECK: cvt.u32.u64 r0, rd1; +; CHECK: cvt.u32.u64 r{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = trunc i64 %x to i32 ret i32 %a } define ptx_device i32 @cvt_i32_f32(float %x) { -; CHECK: cvt.rzi.u32.f32 r0, r1; +; CHECK: cvt.rzi.u32.f32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fptoui float %x to i32 ret i32 %a } define ptx_device i32 @cvt_i32_f64(double %x) { -; CHECK: cvt.rzi.u32.f64 r0, rd1; +; CHECK: cvt.rzi.u32.f64 r{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fptoui double %x to i32 ret i32 %a @@ -135,35 +135,35 @@ define ptx_device i32 @cvt_i32_f64(double %x) { ; i64 define ptx_device i64 @cvt_i64_preds(i1 %x) { -; CHECK: selp.u64 rd0, 1, 0, p1; +; CHECK: selp.u64 rd{{[0-9]+}}, 1, 0, p{{[0-9]+}}; ; CHECK-NEXT: ret; %a = zext i1 %x to i64 ret i64 %a } define ptx_device i64 @cvt_i64_i16(i16 %x) { -; CHECK: cvt.u64.u16 rd0, rh1; +; CHECK: cvt.u64.u16 rd{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; %a = zext i16 %x to i64 ret i64 %a } define ptx_device i64 @cvt_i64_i32(i32 %x) { -; CHECK: cvt.u64.u32 rd0, r1; +; CHECK: cvt.u64.u32 rd{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = zext i32 %x to i64 ret i64 %a } define ptx_device i64 @cvt_i64_f32(float %x) { -; CHECK: cvt.rzi.u64.f32 rd0, r1; +; CHECK: cvt.rzi.u64.f32 rd{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fptoui float %x to i64 ret i64 %a } define ptx_device i64 @cvt_i64_f64(double %x) { -; CHECK: cvt.rzi.u64.f64 rd0, rd1; +; CHECK: cvt.rzi.u64.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK: ret; %a = fptoui double %x to i64 ret i64 %a @@ -172,35 +172,35 @@ define ptx_device i64 @cvt_i64_f64(double %x) { ; f32 define ptx_device float @cvt_f32_preds(i1 %x) { -; CHECK: selp.f32 r0, 0F3F800000, 0F00000000, p1; +; CHECK: selp.f32 r{{[0-9]+}}, 0F3F800000, 0F00000000, p{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i1 %x to float ret float %a } define ptx_device float @cvt_f32_i16(i16 %x) { -; CHECK: cvt.rn.f32.u16 r0, rh1; +; CHECK: cvt.rn.f32.u16 r{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i16 %x to float ret float %a } define ptx_device float @cvt_f32_i32(i32 %x) { -; CHECK: cvt.rn.f32.u32 r0, r1; +; CHECK: cvt.rn.f32.u32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i32 %x to float ret float %a } define ptx_device float @cvt_f32_i64(i64 %x) { -; CHECK: cvt.rn.f32.u64 r0, rd1; +; CHECK: cvt.rn.f32.u64 r{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i64 %x to float ret float %a } define ptx_device float @cvt_f32_f64(double %x) { -; CHECK: cvt.rn.f32.f64 r0, rd1; +; CHECK: cvt.rn.f32.f64 r{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fptrunc double %x to float ret float %a @@ -209,35 +209,35 @@ define ptx_device float @cvt_f32_f64(double %x) { ; f64 define ptx_device double @cvt_f64_preds(i1 %x) { -; CHECK: selp.f64 rd0, 0D3F80000000000000, 0D0000000000000000, p1; +; CHECK: selp.f64 rd{{[0-9]+}}, 0D3F80000000000000, 0D0000000000000000, p{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i1 %x to double ret double %a } define ptx_device double @cvt_f64_i16(i16 %x) { -; CHECK: cvt.rn.f64.u16 rd0, rh1; +; CHECK: cvt.rn.f64.u16 rd{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i16 %x to double ret double %a } define ptx_device double @cvt_f64_i32(i32 %x) { -; CHECK: cvt.rn.f64.u32 rd0, r1; +; CHECK: cvt.rn.f64.u32 rd{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i32 %x to double ret double %a } define ptx_device double @cvt_f64_i64(i64 %x) { -; CHECK: cvt.rn.f64.u64 rd0, rd1; +; CHECK: cvt.rn.f64.u64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i64 %x to double ret double %a } define ptx_device double @cvt_f64_f32(float %x) { -; CHECK: cvt.f64.f32 rd0, r1; +; CHECK: cvt.f64.f32 rd{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fpext float %x to double ret double %a diff --git a/test/CodeGen/PTX/fdiv-sm10.ll b/test/CodeGen/PTX/fdiv-sm10.ll index eb32222f3a3..049d8913b34 100644 --- a/test/CodeGen/PTX/fdiv-sm10.ll +++ b/test/CodeGen/PTX/fdiv-sm10.ll @@ -1,14 +1,14 @@ ; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: div.f32 r0, r1, r2; +; CHECK: div.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fdiv float %x, %y ret float %a } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: div.f64 rd0, rd1, rd2; +; CHECK: div.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fdiv double %x, %y ret double %a diff --git a/test/CodeGen/PTX/fdiv-sm13.ll b/test/CodeGen/PTX/fdiv-sm13.ll index ad24f35b206..2d953397d39 100644 --- a/test/CodeGen/PTX/fdiv-sm13.ll +++ b/test/CodeGen/PTX/fdiv-sm13.ll @@ -1,14 +1,14 @@ ; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: div.rn.f32 r0, r1, r2; +; CHECK: div.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fdiv float %x, %y ret float %a } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: div.rn.f64 rd0, rd1, rd2; +; CHECK: div.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fdiv double %x, %y ret double %a diff --git a/test/CodeGen/PTX/fneg.ll b/test/CodeGen/PTX/fneg.ll index 185c37c879b..66ca74a6ff8 100644 --- a/test/CodeGen/PTX/fneg.ll +++ b/test/CodeGen/PTX/fneg.ll @@ -1,14 +1,14 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device float @t1_f32(float %x) { -; CHECK: neg.f32 r0, r1; +; CHECK: neg.f32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %y = fsub float -0.000000e+00, %x ret float %y } define ptx_device double @t1_f64(double %x) { -; CHECK: neg.f64 rd0, rd1; +; CHECK: neg.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %y = fsub double -0.000000e+00, %x ret double %y diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll index 9b759987f83..d184d1243ab 100644 --- a/test/CodeGen/PTX/ld.ll +++ b/test/CodeGen/PTX/ld.ll @@ -63,7 +63,7 @@ define ptx_device i16 @t1_u16(i16* %p) { entry: -;CHECK: ld.global.u16 rh0, [r1]; +;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}]; ;CHECK-NEXT: ret; %x = load i16* %p ret i16 %x @@ -71,7 +71,7 @@ entry: define ptx_device i32 @t1_u32(i32* %p) { entry: -;CHECK: ld.global.u32 r0, [r1]; +;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}]; ;CHECK-NEXT: ret; %x = load i32* %p ret i32 %x @@ -79,7 +79,7 @@ entry: define ptx_device i64 @t1_u64(i64* %p) { entry: -;CHECK: ld.global.u64 rd0, [r1]; +;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}]; ;CHECK-NEXT: ret; %x = load i64* %p ret i64 %x @@ -87,7 +87,7 @@ entry: define ptx_device float @t1_f32(float* %p) { entry: -;CHECK: ld.global.f32 r0, [r1]; +;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}]; ;CHECK-NEXT: ret; %x = load float* %p ret float %x @@ -95,7 +95,7 @@ entry: define ptx_device double @t1_f64(double* %p) { entry: -;CHECK: ld.global.f64 rd0, [r1]; +;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}]; ;CHECK-NEXT: ret; %x = load double* %p ret double %x @@ -103,7 +103,7 @@ entry: define ptx_device i16 @t2_u16(i16* %p) { entry: -;CHECK: ld.global.u16 rh0, [r1+2]; +;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}+2]; ;CHECK-NEXT: ret; %i = getelementptr i16* %p, i32 1 %x = load i16* %i @@ -112,7 +112,7 @@ entry: define ptx_device i32 @t2_u32(i32* %p) { entry: -;CHECK: ld.global.u32 r0, [r1+4]; +;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}+4]; ;CHECK-NEXT: ret; %i = getelementptr i32* %p, i32 1 %x = load i32* %i @@ -121,7 +121,7 @@ entry: define ptx_device i64 @t2_u64(i64* %p) { entry: -;CHECK: ld.global.u64 rd0, [r1+8]; +;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}+8]; ;CHECK-NEXT: ret; %i = getelementptr i64* %p, i32 1 %x = load i64* %i @@ -130,7 +130,7 @@ entry: define ptx_device float @t2_f32(float* %p) { entry: -;CHECK: ld.global.f32 r0, [r1+4]; +;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}+4]; ;CHECK-NEXT: ret; %i = getelementptr float* %p, i32 1 %x = load float* %i @@ -139,7 +139,7 @@ entry: define ptx_device double @t2_f64(double* %p) { entry: -;CHECK: ld.global.f64 rd0, [r1+8]; +;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}+8]; ;CHECK-NEXT: ret; %i = getelementptr double* %p, i32 1 %x = load double* %i @@ -148,9 +148,9 @@ entry: define ptx_device i16 @t3_u16(i16* %p, i32 %q) { entry: -;CHECK: shl.b32 r0, r2, 1; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: ld.global.u16 rh0, [r0]; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]]; %i = getelementptr i16* %p, i32 %q %x = load i16* %i ret i16 %x @@ -158,9 +158,9 @@ entry: define ptx_device i32 @t3_u32(i32* %p, i32 %q) { entry: -;CHECK: shl.b32 r0, r2, 2; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: ld.global.u32 r0, [r0]; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]]; %i = getelementptr i32* %p, i32 %q %x = load i32* %i ret i32 %x @@ -168,9 +168,9 @@ entry: define ptx_device i64 @t3_u64(i64* %p, i32 %q) { entry: -;CHECK: shl.b32 r0, r2, 3; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: ld.global.u64 rd0, [r0]; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]]; %i = getelementptr i64* %p, i32 %q %x = load i64* %i ret i64 %x @@ -178,9 +178,9 @@ entry: define ptx_device float @t3_f32(float* %p, i32 %q) { entry: -;CHECK: shl.b32 r0, r2, 2; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: ld.global.f32 r0, [r0]; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]]; %i = getelementptr float* %p, i32 %q %x = load float* %i ret float %x @@ -188,9 +188,9 @@ entry: define ptx_device double @t3_f64(double* %p, i32 %q) { entry: -;CHECK: shl.b32 r0, r2, 3; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: ld.global.f64 rd0, [r0]; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]]; %i = getelementptr double* %p, i32 %q %x = load double* %i ret double %x @@ -198,8 +198,8 @@ entry: define ptx_device i16 @t4_global_u16() { entry: -;CHECK: mov.u32 r0, array_i16; -;CHECK-NEXT: ld.global.u16 rh0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; +;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0 %x = load i16* %i @@ -208,8 +208,8 @@ entry: define ptx_device i32 @t4_global_u32() { entry: -;CHECK: mov.u32 r0, array_i32; -;CHECK-NEXT: ld.global.u32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32; +;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0 %x = load i32* %i @@ -218,8 +218,8 @@ entry: define ptx_device i64 @t4_global_u64() { entry: -;CHECK: mov.u32 r0, array_i64; -;CHECK-NEXT: ld.global.u64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64; +;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0 %x = load i64* %i @@ -228,8 +228,8 @@ entry: define ptx_device float @t4_global_f32() { entry: -;CHECK: mov.u32 r0, array_float; -;CHECK-NEXT: ld.global.f32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_float; +;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 %x = load float* %i @@ -238,8 +238,8 @@ entry: define ptx_device double @t4_global_f64() { entry: -;CHECK: mov.u32 r0, array_double; -;CHECK-NEXT: ld.global.f64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_double; +;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 %x = load double* %i @@ -248,8 +248,8 @@ entry: define ptx_device i16 @t4_const_u16() { entry: -;CHECK: mov.u32 r0, array_constant_i16; -;CHECK-NEXT: ld.const.u16 rh0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i16; +;CHECK-NEXT: ld.const.u16 rh{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0 %x = load i16 addrspace(1)* %i @@ -258,8 +258,8 @@ entry: define ptx_device i32 @t4_const_u32() { entry: -;CHECK: mov.u32 r0, array_constant_i32; -;CHECK-NEXT: ld.const.u32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i32; +;CHECK-NEXT: ld.const.u32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0 %x = load i32 addrspace(1)* %i @@ -268,8 +268,8 @@ entry: define ptx_device i64 @t4_const_u64() { entry: -;CHECK: mov.u32 r0, array_constant_i64; -;CHECK-NEXT: ld.const.u64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i64; +;CHECK-NEXT: ld.const.u64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0 %x = load i64 addrspace(1)* %i @@ -278,8 +278,8 @@ entry: define ptx_device float @t4_const_f32() { entry: -;CHECK: mov.u32 r0, array_constant_float; -;CHECK-NEXT: ld.const.f32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_float; +;CHECK-NEXT: ld.const.f32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0 %x = load float addrspace(1)* %i @@ -288,8 +288,8 @@ entry: define ptx_device double @t4_const_f64() { entry: -;CHECK: mov.u32 r0, array_constant_double; -;CHECK-NEXT: ld.const.f64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_double; +;CHECK-NEXT: ld.const.f64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0 %x = load double addrspace(1)* %i @@ -298,8 +298,8 @@ entry: define ptx_device i16 @t4_local_u16() { entry: -;CHECK: mov.u32 r0, array_local_i16; -;CHECK-NEXT: ld.local.u16 rh0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16; +;CHECK-NEXT: ld.local.u16 rh{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0 %x = load i16 addrspace(2)* %i @@ -308,8 +308,8 @@ entry: define ptx_device i32 @t4_local_u32() { entry: -;CHECK: mov.u32 r0, array_local_i32; -;CHECK-NEXT: ld.local.u32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i32; +;CHECK-NEXT: ld.local.u32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0 %x = load i32 addrspace(2)* %i @@ -318,8 +318,8 @@ entry: define ptx_device i64 @t4_local_u64() { entry: -;CHECK: mov.u32 r0, array_local_i64; -;CHECK-NEXT: ld.local.u64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i64; +;CHECK-NEXT: ld.local.u64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0 %x = load i64 addrspace(2)* %i @@ -328,8 +328,8 @@ entry: define ptx_device float @t4_local_f32() { entry: -;CHECK: mov.u32 r0, array_local_float; -;CHECK-NEXT: ld.local.f32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_float; +;CHECK-NEXT: ld.local.f32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 %x = load float addrspace(2)* %i @@ -338,8 +338,8 @@ entry: define ptx_device double @t4_local_f64() { entry: -;CHECK: mov.u32 r0, array_local_double; -;CHECK-NEXT: ld.local.f64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_double; +;CHECK-NEXT: ld.local.f64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 %x = load double addrspace(2)* %i @@ -348,8 +348,8 @@ entry: define ptx_device i16 @t4_shared_u16() { entry: -;CHECK: mov.u32 r0, array_shared_i16; -;CHECK-NEXT: ld.shared.u16 rh0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16; +;CHECK-NEXT: ld.shared.u16 rh{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0 %x = load i16 addrspace(4)* %i @@ -358,8 +358,8 @@ entry: define ptx_device i32 @t4_shared_u32() { entry: -;CHECK: mov.u32 r0, array_shared_i32; -;CHECK-NEXT: ld.shared.u32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i32; +;CHECK-NEXT: ld.shared.u32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0 %x = load i32 addrspace(4)* %i @@ -368,8 +368,8 @@ entry: define ptx_device i64 @t4_shared_u64() { entry: -;CHECK: mov.u32 r0, array_shared_i64; -;CHECK-NEXT: ld.shared.u64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i64; +;CHECK-NEXT: ld.shared.u64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0 %x = load i64 addrspace(4)* %i @@ -378,8 +378,8 @@ entry: define ptx_device float @t4_shared_f32() { entry: -;CHECK: mov.u32 r0, array_shared_float; -;CHECK-NEXT: ld.shared.f32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_float; +;CHECK-NEXT: ld.shared.f32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 %x = load float addrspace(4)* %i @@ -388,8 +388,8 @@ entry: define ptx_device double @t4_shared_f64() { entry: -;CHECK: mov.u32 r0, array_shared_double; -;CHECK-NEXT: ld.shared.f64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_double; +;CHECK-NEXT: ld.shared.f64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 %x = load double addrspace(4)* %i @@ -398,8 +398,8 @@ entry: define ptx_device i16 @t5_u16() { entry: -;CHECK: mov.u32 r0, array_i16; -;CHECK-NEXT: ld.global.u16 rh0, [r0+2]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; +;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]+2]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1 %x = load i16* %i @@ -408,8 +408,8 @@ entry: define ptx_device i32 @t5_u32() { entry: -;CHECK: mov.u32 r0, array_i32; -;CHECK-NEXT: ld.global.u32 r0, [r0+4]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32; +;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]+4]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1 %x = load i32* %i @@ -418,8 +418,8 @@ entry: define ptx_device i64 @t5_u64() { entry: -;CHECK: mov.u32 r0, array_i64; -;CHECK-NEXT: ld.global.u64 rd0, [r0+8]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64; +;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]+8]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1 %x = load i64* %i @@ -428,8 +428,8 @@ entry: define ptx_device float @t5_f32() { entry: -;CHECK: mov.u32 r0, array_float; -;CHECK-NEXT: ld.global.f32 r0, [r0+4]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_float; +;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]+4]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 %x = load float* %i @@ -438,8 +438,8 @@ entry: define ptx_device double @t5_f64() { entry: -;CHECK: mov.u32 r0, array_double; -;CHECK-NEXT: ld.global.f64 rd0, [r0+8]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_double; +;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]+8]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 %x = load double* %i diff --git a/test/CodeGen/PTX/llvm-intrinsic.ll b/test/CodeGen/PTX/llvm-intrinsic.ll index 9c532c1a3c1..4611c54be87 100644 --- a/test/CodeGen/PTX/llvm-intrinsic.ll +++ b/test/CodeGen/PTX/llvm-intrinsic.ll @@ -2,7 +2,7 @@ define ptx_device float @test_sqrt_f32(float %x) { entry: -; CHECK: sqrt.rn.f32 r0, r1; +; CHECK: sqrt.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %y = call float @llvm.sqrt.f32(float %x) ret float %y @@ -10,7 +10,7 @@ entry: define ptx_device double @test_sqrt_f64(double %x) { entry: -; CHECK: sqrt.rn.f64 rd0, rd1; +; CHECK: sqrt.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %y = call double @llvm.sqrt.f64(double %x) ret double %y @@ -18,7 +18,7 @@ entry: define ptx_device float @test_sin_f32(float %x) { entry: -; CHECK: sin.approx.f32 r0, r1; +; CHECK: sin.approx.f32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %y = call float @llvm.sin.f32(float %x) ret float %y @@ -26,7 +26,7 @@ entry: define ptx_device double @test_sin_f64(double %x) { entry: -; CHECK: sin.approx.f64 rd0, rd1; +; CHECK: sin.approx.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %y = call double @llvm.sin.f64(double %x) ret double %y @@ -34,7 +34,7 @@ entry: define ptx_device float @test_cos_f32(float %x) { entry: -; CHECK: cos.approx.f32 r0, r1; +; CHECK: cos.approx.f32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %y = call float @llvm.cos.f32(float %x) ret float %y @@ -42,7 +42,7 @@ entry: define ptx_device double @test_cos_f64(double %x) { entry: -; CHECK: cos.approx.f64 rd0, rd1; +; CHECK: cos.approx.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %y = call double @llvm.cos.f64(double %x) ret double %y diff --git a/test/CodeGen/PTX/mad.ll b/test/CodeGen/PTX/mad.ll index 56d3811aa7c..0e4d3f99538 100644 --- a/test/CodeGen/PTX/mad.ll +++ b/test/CodeGen/PTX/mad.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s define ptx_device float @t1_f32(float %x, float %y, float %z) { -; CHECK: mad.rn.f32 r0, r1, r2, r3; +; CHECK: mad.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fmul float %x, %y %b = fadd float %a, %z @@ -9,7 +9,7 @@ define ptx_device float @t1_f32(float %x, float %y, float %z) { } define ptx_device double @t1_f64(double %x, double %y, double %z) { -; CHECK: mad.rn.f64 rd0, rd1, rd2, rd3; +; CHECK: mad.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fmul double %x, %y %b = fadd double %a, %z diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll index 05ce4c0a887..cce6a5b8976 100644 --- a/test/CodeGen/PTX/mov.ll +++ b/test/CodeGen/PTX/mov.ll @@ -1,61 +1,61 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i16 @t1_u16() { -; CHECK: mov.u16 rh0, 0; +; CHECK: mov.u16 rh{{[0-9]+}}, 0; ; CHECK: ret; ret i16 0 } define ptx_device i32 @t1_u32() { -; CHECK: mov.u32 r0, 0; +; CHECK: mov.u32 r{{[0-9]+}}, 0; ; CHECK: ret; ret i32 0 } define ptx_device i64 @t1_u64() { -; CHECK: mov.u64 rd0, 0; +; CHECK: mov.u64 rd{{[0-9]+}}, 0; ; CHECK: ret; ret i64 0 } define ptx_device float @t1_f32() { -; CHECK: mov.f32 r0, 0F00000000; +; CHECK: mov.f32 r{{[0-9]+}}, 0F00000000; ; CHECK: ret; ret float 0.0 } define ptx_device double @t1_f64() { -; CHECK: mov.f64 rd0, 0D0000000000000000; +; CHECK: mov.f64 rd{{[0-9]+}}, 0D0000000000000000; ; CHECK: ret; ret double 0.0 } define ptx_device i16 @t2_u16(i16 %x) { -; CHECK: mov.u16 rh0, rh1; +; CHECK: mov.u16 rh{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK: ret; ret i16 %x } define ptx_device i32 @t2_u32(i32 %x) { -; CHECK: mov.u32 r0, r1; +; CHECK: mov.u32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK: ret; ret i32 %x } define ptx_device i64 @t2_u64(i64 %x) { -; CHECK: mov.u64 rd0, rd1; +; CHECK: mov.u64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK: ret; ret i64 %x } define ptx_device float @t3_f32(float %x) { -; CHECK: mov.u32 r0, r1; +; CHECK: mov.u32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; ret float %x } define ptx_device double @t3_f64(double %x) { -; CHECK: mov.u64 rd0, rd1; +; CHECK: mov.u64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; ret double %x } diff --git a/test/CodeGen/PTX/mul.ll b/test/CodeGen/PTX/mul.ll index 2093556dac4..491cc747afd 100644 --- a/test/CodeGen/PTX/mul.ll +++ b/test/CodeGen/PTX/mul.ll @@ -11,28 +11,28 @@ ;} define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: mul.rn.f32 r0, r1, r2 +; CHECK: mul.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} ; CHECK-NEXT: ret; %z = fmul float %x, %y ret float %z } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: mul.rn.f64 rd0, rd1, rd2 +; CHECK: mul.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}} ; CHECK-NEXT: ret; %z = fmul double %x, %y ret double %z } define ptx_device float @t2_f32(float %x) { -; CHECK: mul.rn.f32 r0, r1, 0F40A00000; +; CHECK: mul.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0F40A00000; ; CHECK-NEXT: ret; %z = fmul float %x, 5.0 ret float %z } define ptx_device double @t2_f64(double %x) { -; CHECK: mul.rn.f64 rd0, rd1, 0D4014000000000000; +; CHECK: mul.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0D4014000000000000; ; CHECK-NEXT: ret; %z = fmul double %x, 5.0 ret double %z diff --git a/test/CodeGen/PTX/parameter-order.ll b/test/CodeGen/PTX/parameter-order.ll index 54864720992..b16556e0661 100644 --- a/test/CodeGen/PTX/parameter-order.ll +++ b/test/CodeGen/PTX/parameter-order.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s -; CHECK: .func (.reg .b32 r0) test_parameter_order (.reg .b32 r1, .reg .b32 r2, .reg .b32 r3, .reg .b32 r4) +; CHECK: .func (.reg .b32 r{{[0-9]+}}) test_parameter_order (.reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}) define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) { -; CHECK: sub.u32 r0, r2, r3 +; CHECK: sub.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} %result = sub i32 %b, %c ret i32 %result } diff --git a/test/CodeGen/PTX/selp.ll b/test/CodeGen/PTX/selp.ll index 19cfa5399d0..e705fbea27c 100644 --- a/test/CodeGen/PTX/selp.ll +++ b/test/CodeGen/PTX/selp.ll @@ -1,25 +1,25 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @test_selp_i32(i1 %x, i32 %y, i32 %z) { -; CHECK: selp.u32 r0, r1, r2, p1; +; CHECK: selp.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, p{{[0-9]+}}; %a = select i1 %x, i32 %y, i32 %z ret i32 %a } define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) { -; CHECK: selp.u64 rd0, rd1, rd2, p1; +; CHECK: selp.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, p{{[0-9]+}}; %a = select i1 %x, i64 %y, i64 %z ret i64 %a } define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) { -; CHECK: selp.f32 r0, r1, r2, p1; +; CHECK: selp.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, p{{[0-9]+}}; %a = select i1 %x, float %y, float %z ret float %a } define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) { -; CHECK: selp.f64 rd0, rd1, rd2, p1; +; CHECK: selp.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, p{{[0-9]+}}; %a = select i1 %x, double %y, double %z ret double %a } diff --git a/test/CodeGen/PTX/setp.ll b/test/CodeGen/PTX/setp.ll index 3e01a75a709..b8cb364550b 100644 --- a/test/CodeGen/PTX/setp.ll +++ b/test/CodeGen/PTX/setp.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.eq.u32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp eq i32 %x, %y %z = zext i1 %p to i32 @@ -10,8 +10,8 @@ define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.ne.u32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ne i32 %x, %y %z = zext i1 %p to i32 @@ -19,8 +19,8 @@ define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.lt.u32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.lt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ult i32 %x, %y %z = zext i1 %p to i32 @@ -28,8 +28,8 @@ define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.le.u32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.le.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ule i32 %x, %y %z = zext i1 %p to i32 @@ -37,8 +37,8 @@ define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.gt.u32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ugt i32 %x, %y %z = zext i1 %p to i32 @@ -46,8 +46,8 @@ define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.ge.u32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.ge.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp uge i32 %x, %y %z = zext i1 %p to i32 @@ -55,8 +55,8 @@ define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.lt.s32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp slt i32 %x, %y %z = zext i1 %p to i32 @@ -64,8 +64,8 @@ define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.le.s32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.le.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp sle i32 %x, %y %z = zext i1 %p to i32 @@ -73,8 +73,8 @@ define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.gt.s32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp sgt i32 %x, %y %z = zext i1 %p to i32 @@ -82,8 +82,8 @@ define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.ge.s32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.ge.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp sge i32 %x, %y %z = zext i1 %p to i32 @@ -91,8 +91,8 @@ define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) { -; CHECK: setp.eq.u32 p0, r1, 1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp eq i32 %x, 1 %z = zext i1 %p to i32 @@ -100,8 +100,8 @@ define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) { } define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) { -; CHECK: setp.ne.u32 p0, r1, 1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ne i32 %x, 1 %z = zext i1 %p to i32 @@ -109,8 +109,8 @@ define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) { } define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) { -; CHECK: setp.eq.u32 p0, r1, 0; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ult i32 %x, 1 %z = zext i1 %p to i32 @@ -118,8 +118,8 @@ define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) { } define ptx_device i32 @test_setp_le_u32_ri(i32 %x) { -; CHECK: setp.lt.u32 p0, r1, 2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.lt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 2; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ule i32 %x, 1 %z = zext i1 %p to i32 @@ -127,8 +127,8 @@ define ptx_device i32 @test_setp_le_u32_ri(i32 %x) { } define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) { -; CHECK: setp.gt.u32 p0, r1, 1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ugt i32 %x, 1 %z = zext i1 %p to i32 @@ -136,8 +136,8 @@ define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) { } define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) { -; CHECK: setp.ne.u32 p0, r1, 0; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp uge i32 %x, 1 %z = zext i1 %p to i32 @@ -145,8 +145,8 @@ define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) { } define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) { -; CHECK: setp.lt.s32 p0, r1, 1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp slt i32 %x, 1 %z = zext i1 %p to i32 @@ -154,8 +154,8 @@ define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) { } define ptx_device i32 @test_setp_le_s32_ri(i32 %x) { -; CHECK: setp.lt.s32 p0, r1, 2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 2; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp sle i32 %x, 1 %z = zext i1 %p to i32 @@ -163,8 +163,8 @@ define ptx_device i32 @test_setp_le_s32_ri(i32 %x) { } define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) { -; CHECK: setp.gt.s32 p0, r1, 1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp sgt i32 %x, 1 %z = zext i1 %p to i32 @@ -172,8 +172,8 @@ define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) { } define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) { -; CHECK: setp.gt.s32 p0, r1, 0; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp sge i32 %x, 1 %z = zext i1 %p to i32 @@ -181,9 +181,9 @@ define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) { } define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) { -; CHECK: setp.gt.u32 p0, r3, r4; -; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, p0; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: setp.eq.and.u32 p[[P0]], r{{[0-9]+}}, r{{[0-9]+}}, p[[P0]]; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %c = icmp eq i32 %x, %y %d = icmp ugt i32 %u, %v @@ -193,9 +193,9 @@ define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) { } define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) { -; CHECK: setp.gt.b32 p0, r3, 0; -; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, !p0; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.b32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0; +; CHECK-NEXT: setp.eq.and.u32 p[[P0]], r{{[0-9]+}}, r{{[0-9]+}}, !p[[P0]]; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %c = trunc i32 %w to i1 %d = icmp eq i32 %x, %y diff --git a/test/CodeGen/PTX/shl.ll b/test/CodeGen/PTX/shl.ll index 6e72c922132..b3818e1e76c 100644 --- a/test/CodeGen/PTX/shl.ll +++ b/test/CodeGen/PTX/shl.ll @@ -1,21 +1,21 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @t1(i32 %x, i32 %y) { -; CHECK: shl.b32 r0, r1, r2 +; CHECK: shl.b32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} %z = shl i32 %x, %y ; CHECK: ret; ret i32 %z } define ptx_device i32 @t2(i32 %x) { -; CHECK: shl.b32 r0, r1, 3 +; CHECK: shl.b32 r{{[0-9]+}}, r{{[0-9]+}}, 3 %z = shl i32 %x, 3 ; CHECK: ret; ret i32 %z } define ptx_device i32 @t3(i32 %x) { -; CHECK: shl.b32 r0, 3, r1 +; CHECK: shl.b32 r{{[0-9]+}}, 3, r{{[0-9]+}} %z = shl i32 3, %x ; CHECK: ret; ret i32 %z diff --git a/test/CodeGen/PTX/shr.ll b/test/CodeGen/PTX/shr.ll index 8693e0ecf49..cb57546dca1 100644 --- a/test/CodeGen/PTX/shr.ll +++ b/test/CodeGen/PTX/shr.ll @@ -1,42 +1,42 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @t1(i32 %x, i32 %y) { -; CHECK: shr.u32 r0, r1, r2 +; CHECK: shr.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} %z = lshr i32 %x, %y ; CHECK: ret; ret i32 %z } define ptx_device i32 @t2(i32 %x) { -; CHECK: shr.u32 r0, r1, 3 +; CHECK: shr.u32 r{{[0-9]+}}, r{{[0-9]+}}, 3 %z = lshr i32 %x, 3 ; CHECK: ret; ret i32 %z } define ptx_device i32 @t3(i32 %x) { -; CHECK: shr.u32 r0, 3, r1 +; CHECK: shr.u32 r{{[0-9]+}}, 3, r{{[0-9]+}} %z = lshr i32 3, %x ; CHECK: ret; ret i32 %z } define ptx_device i32 @t4(i32 %x, i32 %y) { -; CHECK: shr.s32 r0, r1, r2 +; CHECK: shr.s32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} %z = ashr i32 %x, %y ; CHECK: ret; ret i32 %z } define ptx_device i32 @t5(i32 %x) { -; CHECK: shr.s32 r0, r1, 3 +; CHECK: shr.s32 r{{[0-9]+}}, r{{[0-9]+}}, 3 %z = ashr i32 %x, 3 ; CHECK: ret; ret i32 %z } define ptx_device i32 @t6(i32 %x) { -; CHECK: shr.s32 r0, -3, r1 +; CHECK: shr.s32 r{{[0-9]+}}, -3, r{{[0-9]+}} %z = ashr i32 -3, %x ; CHECK: ret; ret i32 %z diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll index 612967ac830..b08528e1c3c 100644 --- a/test/CodeGen/PTX/st.ll +++ b/test/CodeGen/PTX/st.ll @@ -63,7 +63,7 @@ define ptx_device void @t1_u16(i16* %p, i16 %x) { entry: -;CHECK: st.global.u16 [r1], rh1; +;CHECK: st.global.u16 [r{{[0-9]+}}], rh{{[0-9]+}}; ;CHECK-NEXT: ret; store i16 %x, i16* %p ret void @@ -71,7 +71,7 @@ entry: define ptx_device void @t1_u32(i32* %p, i32 %x) { entry: -;CHECK: st.global.u32 [r1], r2; +;CHECK: st.global.u32 [r{{[0-9]+}}], r{{[0-9]+}}; ;CHECK-NEXT: ret; store i32 %x, i32* %p ret void @@ -79,7 +79,7 @@ entry: define ptx_device void @t1_u64(i64* %p, i64 %x) { entry: -;CHECK: st.global.u64 [r1], rd1; +;CHECK: st.global.u64 [r{{[0-9]+}}], rd{{[0-9]+}}; ;CHECK-NEXT: ret; store i64 %x, i64* %p ret void @@ -87,7 +87,7 @@ entry: define ptx_device void @t1_f32(float* %p, float %x) { entry: -;CHECK: st.global.f32 [r1], r2; +;CHECK: st.global.f32 [r{{[0-9]+}}], r{{[0-9]+}}; ;CHECK-NEXT: ret; store float %x, float* %p ret void @@ -95,7 +95,7 @@ entry: define ptx_device void @t1_f64(double* %p, double %x) { entry: -;CHECK: st.global.f64 [r1], rd1; +;CHECK: st.global.f64 [r{{[0-9]+}}], rd{{[0-9]+}}; ;CHECK-NEXT: ret; store double %x, double* %p ret void @@ -103,7 +103,7 @@ entry: define ptx_device void @t2_u16(i16* %p, i16 %x) { entry: -;CHECK: st.global.u16 [r1+2], rh1; +;CHECK: st.global.u16 [r{{[0-9]+}}+2], rh{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr i16* %p, i32 1 store i16 %x, i16* %i @@ -112,7 +112,7 @@ entry: define ptx_device void @t2_u32(i32* %p, i32 %x) { entry: -;CHECK: st.global.u32 [r1+4], r2; +;CHECK: st.global.u32 [r{{[0-9]+}}+4], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr i32* %p, i32 1 store i32 %x, i32* %i @@ -121,7 +121,7 @@ entry: define ptx_device void @t2_u64(i64* %p, i64 %x) { entry: -;CHECK: st.global.u64 [r1+8], rd1; +;CHECK: st.global.u64 [r{{[0-9]+}}+8], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr i64* %p, i32 1 store i64 %x, i64* %i @@ -130,7 +130,7 @@ entry: define ptx_device void @t2_f32(float* %p, float %x) { entry: -;CHECK: st.global.f32 [r1+4], r2; +;CHECK: st.global.f32 [r{{[0-9]+}}+4], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr float* %p, i32 1 store float %x, float* %i @@ -139,7 +139,7 @@ entry: define ptx_device void @t2_f64(double* %p, double %x) { entry: -;CHECK: st.global.f64 [r1+8], rd1; +;CHECK: st.global.f64 [r{{[0-9]+}}+8], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr double* %p, i32 1 store double %x, double* %i @@ -148,9 +148,9 @@ entry: define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) { entry: -;CHECK: shl.b32 r0, r2, 1; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: st.global.u16 [r0], rh1; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: st.global.u16 [r[[R0]]], rh{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr i16* %p, i32 %q store i16 %x, i16* %i @@ -159,9 +159,9 @@ entry: define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) { entry: -;CHECK: shl.b32 r0, r2, 2; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: st.global.u32 [r0], r3; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: st.global.u32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr i32* %p, i32 %q store i32 %x, i32* %i @@ -170,9 +170,9 @@ entry: define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) { entry: -;CHECK: shl.b32 r0, r2, 3; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: st.global.u64 [r0], rd1; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: st.global.u64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr i64* %p, i32 %q store i64 %x, i64* %i @@ -181,9 +181,9 @@ entry: define ptx_device void @t3_f32(float* %p, i32 %q, float %x) { entry: -;CHECK: shl.b32 r0, r2, 2; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: st.global.f32 [r0], r3; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: st.global.f32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr float* %p, i32 %q store float %x, float* %i @@ -192,9 +192,9 @@ entry: define ptx_device void @t3_f64(double* %p, i32 %q, double %x) { entry: -;CHECK: shl.b32 r0, r2, 3; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: st.global.f64 [r0], rd1; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: st.global.f64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr double* %p, i32 %q store double %x, double* %i @@ -203,8 +203,8 @@ entry: define ptx_device void @t4_global_u16(i16 %x) { entry: -;CHECK: mov.u32 r0, array_i16; -;CHECK-NEXT: st.global.u16 [r0], rh1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; +;CHECK-NEXT: st.global.u16 [r[[R0]]], rh{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0 store i16 %x, i16* %i @@ -213,8 +213,8 @@ entry: define ptx_device void @t4_global_u32(i32 %x) { entry: -;CHECK: mov.u32 r0, array_i32; -;CHECK-NEXT: st.global.u32 [r0], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32; +;CHECK-NEXT: st.global.u32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0 store i32 %x, i32* %i @@ -223,8 +223,8 @@ entry: define ptx_device void @t4_global_u64(i64 %x) { entry: -;CHECK: mov.u32 r0, array_i64; -;CHECK-NEXT: st.global.u64 [r0], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64; +;CHECK-NEXT: st.global.u64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0 store i64 %x, i64* %i @@ -233,8 +233,8 @@ entry: define ptx_device void @t4_global_f32(float %x) { entry: -;CHECK: mov.u32 r0, array_float; -;CHECK-NEXT: st.global.f32 [r0], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_float; +;CHECK-NEXT: st.global.f32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 store float %x, float* %i @@ -243,8 +243,8 @@ entry: define ptx_device void @t4_global_f64(double %x) { entry: -;CHECK: mov.u32 r0, array_double; -;CHECK-NEXT: st.global.f64 [r0], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_double; +;CHECK-NEXT: st.global.f64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 store double %x, double* %i @@ -253,8 +253,8 @@ entry: define ptx_device void @t4_local_u16(i16 %x) { entry: -;CHECK: mov.u32 r0, array_local_i16; -;CHECK-NEXT: st.local.u16 [r0], rh1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16; +;CHECK-NEXT: st.local.u16 [r[[R0]]], rh{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0 store i16 %x, i16 addrspace(2)* %i @@ -263,8 +263,8 @@ entry: define ptx_device void @t4_local_u32(i32 %x) { entry: -;CHECK: mov.u32 r0, array_local_i32; -;CHECK-NEXT: st.local.u32 [r0], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i32; +;CHECK-NEXT: st.local.u32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0 store i32 %x, i32 addrspace(2)* %i @@ -273,8 +273,8 @@ entry: define ptx_device void @t4_local_u64(i64 %x) { entry: -;CHECK: mov.u32 r0, array_local_i64; -;CHECK-NEXT: st.local.u64 [r0], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i64; +;CHECK-NEXT: st.local.u64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0 store i64 %x, i64 addrspace(2)* %i @@ -283,8 +283,8 @@ entry: define ptx_device void @t4_local_f32(float %x) { entry: -;CHECK: mov.u32 r0, array_local_float; -;CHECK-NEXT: st.local.f32 [r0], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_float; +;CHECK-NEXT: st.local.f32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 store float %x, float addrspace(2)* %i @@ -293,8 +293,8 @@ entry: define ptx_device void @t4_local_f64(double %x) { entry: -;CHECK: mov.u32 r0, array_local_double; -;CHECK-NEXT: st.local.f64 [r0], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_double; +;CHECK-NEXT: st.local.f64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 store double %x, double addrspace(2)* %i @@ -303,8 +303,8 @@ entry: define ptx_device void @t4_shared_u16(i16 %x) { entry: -;CHECK: mov.u32 r0, array_shared_i16; -;CHECK-NEXT: st.shared.u16 [r0], rh1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16; +;CHECK-NEXT: st.shared.u16 [r[[R0]]], rh{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0 store i16 %x, i16 addrspace(4)* %i @@ -313,8 +313,8 @@ entry: define ptx_device void @t4_shared_u32(i32 %x) { entry: -;CHECK: mov.u32 r0, array_shared_i32; -;CHECK-NEXT: st.shared.u32 [r0], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i32; +;CHECK-NEXT: st.shared.u32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0 store i32 %x, i32 addrspace(4)* %i @@ -323,8 +323,8 @@ entry: define ptx_device void @t4_shared_u64(i64 %x) { entry: -;CHECK: mov.u32 r0, array_shared_i64; -;CHECK-NEXT: st.shared.u64 [r0], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i64; +;CHECK-NEXT: st.shared.u64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0 store i64 %x, i64 addrspace(4)* %i @@ -333,8 +333,8 @@ entry: define ptx_device void @t4_shared_f32(float %x) { entry: -;CHECK: mov.u32 r0, array_shared_float; -;CHECK-NEXT: st.shared.f32 [r0], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_float; +;CHECK-NEXT: st.shared.f32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 store float %x, float addrspace(4)* %i @@ -343,8 +343,8 @@ entry: define ptx_device void @t4_shared_f64(double %x) { entry: -;CHECK: mov.u32 r0, array_shared_double; -;CHECK-NEXT: st.shared.f64 [r0], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_double; +;CHECK-NEXT: st.shared.f64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 store double %x, double addrspace(4)* %i @@ -353,8 +353,8 @@ entry: define ptx_device void @t5_u16(i16 %x) { entry: -;CHECK: mov.u32 r0, array_i16; -;CHECK-NEXT: st.global.u16 [r0+2], rh1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; +;CHECK-NEXT: st.global.u16 [r[[R0]]+2], rh{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1 store i16 %x, i16* %i @@ -363,8 +363,8 @@ entry: define ptx_device void @t5_u32(i32 %x) { entry: -;CHECK: mov.u32 r0, array_i32; -;CHECK-NEXT: st.global.u32 [r0+4], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32; +;CHECK-NEXT: st.global.u32 [r[[R0]]+4], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1 store i32 %x, i32* %i @@ -373,8 +373,8 @@ entry: define ptx_device void @t5_u64(i64 %x) { entry: -;CHECK: mov.u32 r0, array_i64; -;CHECK-NEXT: st.global.u64 [r0+8], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64; +;CHECK-NEXT: st.global.u64 [r[[R0]]+8], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1 store i64 %x, i64* %i @@ -383,8 +383,8 @@ entry: define ptx_device void @t5_f32(float %x) { entry: -;CHECK: mov.u32 r0, array_float; -;CHECK-NEXT: st.global.f32 [r0+4], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_float; +;CHECK-NEXT: st.global.f32 [r[[R0]]+4], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 store float %x, float* %i @@ -393,8 +393,8 @@ entry: define ptx_device void @t5_f64(double %x) { entry: -;CHECK: mov.u32 r0, array_double; -;CHECK-NEXT: st.global.f64 [r0+8], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_double; +;CHECK-NEXT: st.global.f64 [r[[R0]]+8], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 store double %x, double* %i diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll index 4d552801a47..acef3961bfa 100644 --- a/test/CodeGen/PTX/sub.ll +++ b/test/CodeGen/PTX/sub.ll @@ -1,70 +1,70 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i16 @t1_u16(i16 %x, i16 %y) { -; CHECK: sub.u16 rh0, rh1, rh2; +; CHECK: sub.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; %z = sub i16 %x, %y ret i16 %z } define ptx_device i32 @t1_u32(i32 %x, i32 %y) { -; CHECK: sub.u32 r0, r1, r2; +; CHECK: sub.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %z = sub i32 %x, %y ret i32 %z } define ptx_device i64 @t1_u64(i64 %x, i64 %y) { -; CHECK: sub.u64 rd0, rd1, rd2; +; CHECK: sub.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %z = sub i64 %x, %y ret i64 %z } define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: sub.rn.f32 r0, r1, r2 +; CHECK: sub.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} ; CHECK-NEXT: ret; %z = fsub float %x, %y ret float %z } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: sub.rn.f64 rd0, rd1, rd2 +; CHECK: sub.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}} ; CHECK-NEXT: ret; %z = fsub double %x, %y ret double %z } define ptx_device i16 @t2_u16(i16 %x) { -; CHECK: add.u16 rh0, rh1, -1; +; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, -1; ; CHECK-NEXT: ret; %z = sub i16 %x, 1 ret i16 %z } define ptx_device i32 @t2_u32(i32 %x) { -; CHECK: add.u32 r0, r1, -1; +; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, -1; ; CHECK-NEXT: ret; %z = sub i32 %x, 1 ret i32 %z } define ptx_device i64 @t2_u64(i64 %x) { -; CHECK: add.u64 rd0, rd1, -1; +; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, -1; ; CHECK-NEXT: ret; %z = sub i64 %x, 1 ret i64 %z } define ptx_device float @t2_f32(float %x) { -; CHECK: add.rn.f32 r0, r1, 0FBF800000; +; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0FBF800000; ; CHECK-NEXT: ret; %z = fsub float %x, 1.0 ret float %z } define ptx_device double @t2_f64(double %x) { -; CHECK: add.rn.f64 rd0, rd1, 0DBFF0000000000000; +; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0DBFF0000000000000; ; CHECK-NEXT: ret; %z = fsub double %x, 1.0 ret double %z