Add lower argument and return of device function

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116805 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Che-Liang Chiou 2010-10-19 13:14:40 +00:00
parent cf2561d111
commit b48f2c2e1d
7 changed files with 275 additions and 9 deletions

View File

@ -32,6 +32,8 @@ namespace {
virtual void EmitInstruction(const MachineInstr *MI);
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
// autogen'd.
void printInstruction(const MachineInstr *MI, raw_ostream &OS);
static const char *getRegisterName(unsigned RegNo);
@ -40,10 +42,27 @@ namespace {
void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallString<128> str;
raw_svector_ostream os(str);
printInstruction(MI, os);
os << ';';
OutStreamer.EmitRawText(os.str());
raw_svector_ostream OS(str);
printInstruction(MI, OS);
OS << ';';
OutStreamer.EmitRawText(OS.str());
}
void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &OS) {
const MachineOperand &MO = MI->getOperand(opNum);
switch (MO.getType()) {
default:
llvm_unreachable("<unknown operand type>");
break;
case MachineOperand::MO_Register:
OS << getRegisterName(MO.getReg());
break;
case MachineOperand::MO_Immediate:
OS << (int) MO.getImm();
break;
}
}
#include "PTXGenAsmWriter.inc"

View File

@ -11,9 +11,12 @@
//
//===----------------------------------------------------------------------===//
#include "PTX.h"
#include "PTXISelLowering.h"
#include "PTXRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
@ -22,7 +25,8 @@ using namespace llvm;
PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
// Set up the register classes.
addRegisterClass(MVT::i1, PTX::PredsRegisterClass);
addRegisterClass(MVT::i1, PTX::PredsRegisterClass);
addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass);
// Compute derived properties from the register classes
computeRegisterProperties();
@ -40,6 +44,57 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
static struct argmap_entry {
MVT::SimpleValueType VT;
TargetRegisterClass *RC;
TargetRegisterClass::iterator loc;
argmap_entry(MVT::SimpleValueType _VT, TargetRegisterClass *_RC)
: VT(_VT), RC(_RC), loc(_RC->begin()) {}
void reset(void) { loc = RC->begin(); }
bool operator==(MVT::SimpleValueType _VT) { return VT == _VT; }
} argmap[] = {
argmap_entry(MVT::i1, PTX::PredsRegisterClass),
argmap_entry(MVT::i32, PTX::RRegs32RegisterClass)
};
static SDValue lower_kernel_argument(int i,
SDValue Chain,
DebugLoc dl,
MVT::SimpleValueType VT,
argmap_entry *entry,
SelectionDAG &DAG,
unsigned *argreg) {
// TODO
llvm_unreachable("Not implemented yet");
}
static SDValue lower_device_argument(int i,
SDValue Chain,
DebugLoc dl,
MVT::SimpleValueType VT,
argmap_entry *entry,
SelectionDAG &DAG,
unsigned *argreg) {
MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
unsigned preg = *++(entry->loc); // allocate start from register 1
unsigned vreg = RegInfo.createVirtualRegister(entry->RC);
RegInfo.addLiveIn(preg, vreg);
*argreg = preg;
return DAG.getCopyFromReg(Chain, dl, vreg, VT);
}
typedef SDValue (*lower_argument_func)(int i,
SDValue Chain,
DebugLoc dl,
MVT::SimpleValueType VT,
argmap_entry *entry,
SelectionDAG &DAG,
unsigned *argreg);
SDValue PTXTargetLowering::
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
@ -48,6 +103,40 @@ SDValue PTXTargetLowering::
DebugLoc dl,
SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
if (isVarArg) llvm_unreachable("PTX does not support varargs");
lower_argument_func lower_argument;
switch (CallConv) {
default:
llvm_unreachable("Unsupported calling convention");
break;
case CallingConv::PTX_Kernel:
lower_argument = lower_kernel_argument;
break;
case CallingConv::PTX_Device:
lower_argument = lower_device_argument;
break;
}
// Reset argmap before allocation
for (struct argmap_entry *i = argmap, *e = argmap + array_lengthof(argmap);
i != e; ++ i)
i->reset();
for (int i = 0, e = Ins.size(); i != e; ++ i) {
MVT::SimpleValueType VT = Ins[i].VT.getSimpleVT().SimpleTy;
struct argmap_entry *entry = std::find(argmap,
argmap + array_lengthof(argmap), VT);
if (entry == argmap + array_lengthof(argmap))
llvm_unreachable("Type of argument is not supported");
unsigned reg;
SDValue arg = lower_argument(i, Chain, dl, VT, entry, DAG, &reg);
InVals.push_back(arg);
}
return Chain;
}
@ -59,7 +148,7 @@ SDValue PTXTargetLowering::
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl,
SelectionDAG &DAG) const {
assert(!isVarArg && "PTX does not support var args.");
if (isVarArg) llvm_unreachable("PTX does not support varargs");
switch (CallConv) {
default:
@ -74,10 +163,26 @@ SDValue PTXTargetLowering::
// PTX_Device
// return void
if (Outs.size() == 0)
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
// TODO: allocate return register
assert(Outs[0].VT == MVT::i32 && "Can return only basic types");
SDValue Flag;
unsigned reg = PTX::R0;
// If this is the first return lowered for this function, add the regs to the
// liveout set for the function
if (DAG.getMachineFunction().getRegInfo().liveout_empty())
DAG.getMachineFunction().getRegInfo().addLiveOut(reg);
// Copy the result values into the output registers
Chain = DAG.getCopyToReg(Chain, dl, reg, OutVals[0], Flag);
// Guarantee that all emitted copies are stuck together,
// avoiding something bad
Flag = Chain.getValue(1);
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
}

View File

@ -11,7 +11,9 @@
//
//===----------------------------------------------------------------------===//
#include "PTX.h"
#include "PTXInstrInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
using namespace llvm;
@ -20,3 +22,66 @@ using namespace llvm;
PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM)
: TargetInstrInfoImpl(PTXInsts, array_lengthof(PTXInsts)),
RI(_TM, *this), TM(_TM) {}
static const struct map_entry {
const TargetRegisterClass *cls;
const int opcode;
} map[] = {
{ &PTX::RRegs32RegClass, PTX::MOVrr },
{ &PTX::PredsRegClass, PTX::MOVpp }
};
void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DstReg, unsigned SrcReg,
bool KillSrc) const {
for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
if (PTX::RRegs32RegClass.contains(DstReg, SrcReg)) {
BuildMI(MBB, I, DL,
get(PTX::MOVrr), DstReg).addReg(SrcReg, getKillRegState(KillSrc));
return;
}
llvm_unreachable("Impossible reg-to-reg copy");
}
bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DstReg, unsigned SrcReg,
const TargetRegisterClass *DstRC,
const TargetRegisterClass *SrcRC,
DebugLoc DL) const {
if (DstRC != SrcRC)
return false;
for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
if (DstRC == map[i].cls) {
MachineInstr *MI = BuildMI(MBB, I, DL, get(map[i].opcode),
DstReg).addReg(SrcReg);
if (MI->findFirstPredOperandIdx() == -1) {
MI->addOperand(MachineOperand::CreateReg(0, false));
MI->addOperand(MachineOperand::CreateImm(/*IsInv=*/0));
}
return true;
}
return false;
}
bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
switch (MI.getOpcode()) {
default:
return false;
case PTX::MOVpp:
case PTX::MOVrr:
assert(MI.getNumOperands() >= 2 &&
MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
"Invalid register-register move instruction");
SrcSubIdx = DstSubIdx = 0; // No sub-registers
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
return true;
}
}

View File

@ -29,6 +29,22 @@ class PTXInstrInfo : public TargetInstrInfoImpl {
explicit PTXInstrInfo(PTXTargetMachine &_TM);
virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DstReg, unsigned SrcReg,
bool KillSrc) const;
virtual bool copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DstReg, unsigned SrcReg,
const TargetRegisterClass *DstRC,
const TargetRegisterClass *SrcRC,
DebugLoc DL) const;
virtual bool isMoveInstr(const MachineInstr& MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
}; // class PTXInstrInfo
} // namespace llvm

View File

@ -30,6 +30,27 @@ def PTXret
// Instructions
//===----------------------------------------------------------------------===//
///===- Data Movement and Conversion Instructions -------------------------===//
let neverHasSideEffects = 1 in {
// rely on isMoveInstr to separate MOVpp, MOVrr, etc.
def MOVpp
: InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
def MOVrr
: InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.s32\t$d, $a", []>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOVpi
: InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
[(set Preds:$d, imm:$a)]>;
def MOVri
: InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a",
[(set RRegs32:$d, imm:$a)]>;
}
///===- Control Flow Instructions -----------------------------------------===//
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;

View File

@ -40,10 +40,11 @@ struct PTXRegisterInfo : public PTXGenRegisterInfo {
virtual bool hasFP(const MachineFunction &MF) const { return false; }
// FIXME: Given that PTX does not support stack frame, what should we do here?
virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj,
RegScavenger *RS = NULL) const {}
RegScavenger *RS = NULL) const {
llvm_unreachable("PTX does not support general function call");
}
virtual void emitPrologue(MachineFunction &MF) const {}
virtual void emitEpilogue(MachineFunction &MF,

View File

@ -52,6 +52,39 @@ def P29 : PTXReg<"p29">;
def P30 : PTXReg<"p30">;
def P31 : PTXReg<"p31">;
def R0 : PTXReg<"r0">;
def R1 : PTXReg<"r1">;
def R2 : PTXReg<"r2">;
def R3 : PTXReg<"r3">;
def R4 : PTXReg<"r4">;
def R5 : PTXReg<"r5">;
def R6 : PTXReg<"r6">;
def R7 : PTXReg<"r7">;
def R8 : PTXReg<"r8">;
def R9 : PTXReg<"r9">;
def R10 : PTXReg<"r10">;
def R11 : PTXReg<"r11">;
def R12 : PTXReg<"r12">;
def R13 : PTXReg<"r13">;
def R14 : PTXReg<"r14">;
def R15 : PTXReg<"r15">;
def R16 : PTXReg<"r16">;
def R17 : PTXReg<"r17">;
def R18 : PTXReg<"r18">;
def R19 : PTXReg<"r19">;
def R20 : PTXReg<"r20">;
def R21 : PTXReg<"r21">;
def R22 : PTXReg<"r22">;
def R23 : PTXReg<"r23">;
def R24 : PTXReg<"r24">;
def R25 : PTXReg<"r25">;
def R26 : PTXReg<"r26">;
def R27 : PTXReg<"r27">;
def R28 : PTXReg<"r28">;
def R29 : PTXReg<"r29">;
def R30 : PTXReg<"r30">;
def R31 : PTXReg<"r31">;
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
@ -61,3 +94,9 @@ def Preds : RegisterClass<"PTX", [i1], 8,
P8, P9, P10, P11, P12, P13, P14, P15,
P16, P17, P18, P19, P20, P21, P22, P23,
P24, P25, P26, P27, P28, P29, P30, P31]>;
def RRegs32 : RegisterClass<"PTX", [i32], 32,
[R0, R1, R2, R3, R4, R5, R6, R7,
R8, R9, R10, R11, R12, R13, R14, R15,
R16, R17, R18, R19, R20, R21, R22, R23,
R24, R25, R26, R27, R28, R29, R30, R31]>;