mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-09 05:13:01 +00:00
PTX: Handle FrameIndex nodes
llvm-svn: 140532
This commit is contained in:
parent
90ed5fdd4f
commit
e79db83e87
@ -295,9 +295,9 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
|
||||
for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
|
||||
DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
|
||||
if (FrameInfo->getObjectSize(i) > 0) {
|
||||
std::string def = "\t.reg .b";
|
||||
std::string def = "\t.local .b";
|
||||
def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
|
||||
def += " s";
|
||||
def += " __local_";
|
||||
def += utostr(i);
|
||||
def += ";";
|
||||
OutStreamer.EmitRawText(Twine(def));
|
||||
|
@ -37,6 +37,7 @@ class PTXDAGToDAGISel : public SelectionDAGISel {
|
||||
bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2);
|
||||
bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset);
|
||||
bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset);
|
||||
bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset);
|
||||
|
||||
// Include the pieces auto'gened from the target description
|
||||
#include "PTXGenDAGISel.inc"
|
||||
@ -48,6 +49,7 @@ class PTXDAGToDAGISel : public SelectionDAGISel {
|
||||
|
||||
SDNode *SelectREADPARAM(SDNode *Node);
|
||||
SDNode *SelectWRITEPARAM(SDNode *Node);
|
||||
SDNode *SelectFrameIndex(SDNode *Node);
|
||||
|
||||
bool isImm(const SDValue &operand);
|
||||
bool SelectImm(const SDValue &operand, SDValue &imm);
|
||||
@ -75,6 +77,8 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
|
||||
return SelectREADPARAM(Node);
|
||||
case PTXISD::WRITE_PARAM:
|
||||
return SelectWRITEPARAM(Node);
|
||||
case ISD::FrameIndex:
|
||||
return SelectFrameIndex(Node);
|
||||
default:
|
||||
return SelectCode(Node);
|
||||
}
|
||||
@ -173,6 +177,25 @@ SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) {
|
||||
return Ret;
|
||||
}
|
||||
|
||||
SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) {
|
||||
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
|
||||
//dbgs() << "Selecting FrameIndex at index " << FI << "\n";
|
||||
SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0));
|
||||
|
||||
//unsigned OpCode = PTX::LOAD_LOCAL_F32;
|
||||
|
||||
//for (SDNode::use_iterator i = Node->use_begin(), e = Node->use_end();
|
||||
// i != e; ++i) {
|
||||
// SDNode *Use = *i;
|
||||
// dbgs() << "USE: ";
|
||||
// Use->dumpr(CurDAG);
|
||||
//}
|
||||
|
||||
return Node;
|
||||
//return CurDAG->getMachineNode(OpCode, Node->getDebugLoc(),
|
||||
// Node->getValueType(0), TFI);
|
||||
}
|
||||
|
||||
// Match memory operand of the form [reg+reg]
|
||||
bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
|
||||
if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
|
||||
@ -243,6 +266,41 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
|
||||
bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base,
|
||||
SDValue &Offset) {
|
||||
if (Addr.getOpcode() != ISD::ADD) {
|
||||
// let SelectADDRii handle the [imm] case
|
||||
if (isImm(Addr))
|
||||
return false;
|
||||
// it is [reg]
|
||||
|
||||
assert(Addr.getValueType().isSimple() && "Type must be simple");
|
||||
|
||||
Base = Addr;
|
||||
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Addr.getNumOperands() < 2)
|
||||
return false;
|
||||
|
||||
// let SelectADDRii handle the [imm+imm] case
|
||||
if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
|
||||
return false;
|
||||
|
||||
// try [reg+imm] and [imm+reg]
|
||||
for (int i = 0; i < 2; i ++)
|
||||
if (SelectImm(Addr.getOperand(1-i), Offset)) {
|
||||
Base = Addr.getOperand(i);
|
||||
return true;
|
||||
}
|
||||
|
||||
// neither [reg+imm] nor [imm+reg]
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PTXDAGToDAGISel::isImm(const SDValue &operand) {
|
||||
return ConstantSDNode::classof(operand.getNode());
|
||||
}
|
||||
|
@ -50,7 +50,9 @@ def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">;
|
||||
def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
const Value *Src;
|
||||
const PointerType *PT;
|
||||
if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
|
||||
const SDValue &MemOp = N->getOperand(1);
|
||||
if ((MemOp.getOpcode() != ISD::FrameIndex) &&
|
||||
(Src = cast<LoadSDNode>(N)->getSrcValue()) &&
|
||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||
return PT->getAddressSpace() == PTX::GLOBAL;
|
||||
return false;
|
||||
@ -66,12 +68,8 @@ def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
}]>;
|
||||
|
||||
def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
const Value *Src;
|
||||
const PointerType *PT;
|
||||
if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
|
||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||
return PT->getAddressSpace() == PTX::LOCAL;
|
||||
return false;
|
||||
const SDValue &MemOp = N->getOperand(1);
|
||||
return MemOp.getOpcode() == ISD::FrameIndex;
|
||||
}]>;
|
||||
|
||||
def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
@ -96,7 +94,9 @@ def store_global
|
||||
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
|
||||
const Value *Src;
|
||||
const PointerType *PT;
|
||||
if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
|
||||
const SDValue &MemOp = N->getOperand(2);
|
||||
if ((MemOp.getOpcode() != ISD::FrameIndex) &&
|
||||
(Src = cast<StoreSDNode>(N)->getSrcValue()) &&
|
||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||
return PT->getAddressSpace() == PTX::GLOBAL;
|
||||
return false;
|
||||
@ -104,12 +104,8 @@ def store_global
|
||||
|
||||
def store_local
|
||||
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
|
||||
const Value *Src;
|
||||
const PointerType *PT;
|
||||
if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
|
||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||
return PT->getAddressSpace() == PTX::LOCAL;
|
||||
return false;
|
||||
const SDValue &MemOp = N->getOperand(2);
|
||||
return MemOp.getOpcode() == ISD::FrameIndex;
|
||||
}]>;
|
||||
|
||||
def store_parameter
|
||||
@ -133,12 +129,14 @@ def store_shared
|
||||
}]>;
|
||||
|
||||
// Addressing modes.
|
||||
def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
|
||||
def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
|
||||
def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
|
||||
def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
|
||||
def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
|
||||
def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
|
||||
def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
|
||||
def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
|
||||
def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
|
||||
def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
|
||||
def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
|
||||
def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
|
||||
def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>;
|
||||
def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>;
|
||||
|
||||
// Address operands
|
||||
def MEMri32 : Operand<i32> {
|
||||
@ -903,7 +901,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
|
||||
// Loads
|
||||
defm LDg : PTX_LD_ALL<"ld.global", load_global>;
|
||||
defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
|
||||
defm LDl : PTX_LD_ALL<"ld.local", load_local>;
|
||||
//defm LDl : PTX_LD_ALL<"ld.local", load_local>;
|
||||
defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
|
||||
|
||||
// These instructions are used to load/store from the .param space for
|
||||
@ -949,11 +947,101 @@ let hasSideEffects = 1 in {
|
||||
[(PTXstoreparam timm:$d, RegF64:$a)]>;
|
||||
}
|
||||
|
||||
/*
|
||||
def ri64 : InstPTX<(outs RC:$d),
|
||||
(ins MEMri64:$a),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (pat_load ADDRri64:$a))]>,
|
||||
Requires<[Use64BitAddresses]>;
|
||||
|
||||
def ri64 : InstPTX<(outs),
|
||||
(ins RC:$d, MEMri64:$a),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
|
||||
[(pat_store RC:$d, ADDRri64:$a)]>,
|
||||
Requires<[Use64BitAddresses]>;
|
||||
*/
|
||||
let hasSideEffects = 1 in {
|
||||
def LDLOCALpiPred : InstPTX<(outs RegPred:$d), (ins MEMri32:$a),
|
||||
"ld.local.pred\t$d, [__local_$a]",
|
||||
[(set RegPred:$d, (load_local ADDRlocal32:$a))]>;
|
||||
def LDLOCALpiU16 : InstPTX<(outs RegI16:$d), (ins MEMri32:$a),
|
||||
"ld.local.u16\t$d, [__local_$a]",
|
||||
[(set RegI16:$d, (load_local ADDRlocal32:$a))]>;
|
||||
def LDLOCALpiU32 : InstPTX<(outs RegI32:$d), (ins MEMri32:$a),
|
||||
"ld.local.u32\t$d, [__local_$a]",
|
||||
[(set RegI32:$d, (load_local ADDRlocal32:$a))]>;
|
||||
def LDLOCALpiU64 : InstPTX<(outs RegI64:$d), (ins MEMri32:$a),
|
||||
"ld.local.u64\t$d, [__local_$a]",
|
||||
[(set RegI64:$d, (load_local ADDRlocal32:$a))]>;
|
||||
def LDLOCALpiF32 : InstPTX<(outs RegF32:$d), (ins MEMri32:$a),
|
||||
"ld.local.f32\t$d, [__local_$a]",
|
||||
[(set RegF32:$d, (load_local ADDRlocal32:$a))]>;
|
||||
def LDLOCALpiF64 : InstPTX<(outs RegF64:$d), (ins MEMri32:$a),
|
||||
"ld.local.f64\t$d, [__local_$a]",
|
||||
[(set RegF64:$d, (load_local ADDRlocal32:$a))]>;
|
||||
|
||||
def STLOCALpiPred : InstPTX<(outs), (ins RegPred:$d, MEMri32:$a),
|
||||
"st.local.pred\t[__local_$a], $d",
|
||||
[(store_local RegPred:$d, ADDRlocal32:$a)]>;
|
||||
def STLOCALpiU16 : InstPTX<(outs), (ins RegI16:$d, MEMri32:$a),
|
||||
"st.local.u16\t[__local_$a], $d",
|
||||
[(store_local RegI16:$d, ADDRlocal32:$a)]>;
|
||||
def STLOCALpiU32 : InstPTX<(outs), (ins RegI32:$d, MEMri32:$a),
|
||||
"st.local.u32\t[__local_$a], $d",
|
||||
[(store_local RegI32:$d, ADDRlocal32:$a)]>;
|
||||
def STLOCALpiU64 : InstPTX<(outs), (ins RegI64:$d, MEMri32:$a),
|
||||
"st.local.u64\t[__local_$a], $d",
|
||||
[(store_local RegI64:$d, ADDRlocal32:$a)]>;
|
||||
def STLOCALpiF32 : InstPTX<(outs), (ins RegF32:$d, MEMri32:$a),
|
||||
"st.local.f32\t[__local_$a], $d",
|
||||
[(store_local RegF32:$d, ADDRlocal32:$a)]>;
|
||||
def STLOCALpiF64 : InstPTX<(outs), (ins RegF64:$d, MEMri32:$a),
|
||||
"st.local.f64\t[__local_$a], $d",
|
||||
[(store_local RegF64:$d, ADDRlocal32:$a)]>;
|
||||
|
||||
/*def LDLOCALpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a),
|
||||
"ld.param.u16\t$d, [$a]",
|
||||
[(set RegI16:$d, (PTXloadparam timm:$a))]>;
|
||||
def LDLOCALpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a),
|
||||
"ld.param.u32\t$d, [$a]",
|
||||
[(set RegI32:$d, (PTXloadparam timm:$a))]>;
|
||||
def LDLOCALpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a),
|
||||
"ld.param.u64\t$d, [$a]",
|
||||
[(set RegI64:$d, (PTXloadparam timm:$a))]>;
|
||||
def LDLOCALpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a),
|
||||
"ld.param.f32\t$d, [$a]",
|
||||
[(set RegF32:$d, (PTXloadparam timm:$a))]>;
|
||||
def LDLOCALpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a),
|
||||
"ld.param.f64\t$d, [$a]",
|
||||
[(set RegF64:$d, (PTXloadparam timm:$a))]>;
|
||||
|
||||
def STLOCALpiPred : InstPTX<(outs), (ins MEMpi:$d, RegPred:$a),
|
||||
"st.param.pred\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegPred:$a)]>;
|
||||
def STLOCALpiU16 : InstPTX<(outs), (ins MEMpi:$d, RegI16:$a),
|
||||
"st.param.u16\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegI16:$a)]>;
|
||||
def STLOCALpiU32 : InstPTX<(outs), (ins MEMpi:$d, RegI32:$a),
|
||||
"st.param.u32\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegI32:$a)]>;
|
||||
def STLOCALpiU64 : InstPTX<(outs), (ins MEMpi:$d, RegI64:$a),
|
||||
"st.param.u64\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegI64:$a)]>;
|
||||
def STLOCALpiF32 : InstPTX<(outs), (ins MEMpi:$d, RegF32:$a),
|
||||
"st.param.f32\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegF32:$a)]>;
|
||||
def STLOCALpiF64 : InstPTX<(outs), (ins MEMpi:$d, RegF64:$a),
|
||||
"st.param.f64\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegF64:$a)]>;*/
|
||||
}
|
||||
|
||||
// Stores
|
||||
defm STg : PTX_ST_ALL<"st.global", store_global>;
|
||||
defm STl : PTX_ST_ALL<"st.local", store_local>;
|
||||
//defm STl : PTX_ST_ALL<"st.local", store_local>;
|
||||
defm STs : PTX_ST_ALL<"st.shared", store_shared>;
|
||||
|
||||
|
||||
|
||||
// defm STp : PTX_ST_ALL<"st.param", store_parameter>;
|
||||
// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
|
||||
// TODO: Do something with st.param if/when it is needed.
|
||||
@ -1199,6 +1287,11 @@ def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>;
|
||||
def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>;
|
||||
def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>;
|
||||
|
||||
///===- Stack Variable Loads/Stores ---------------------------------------===//
|
||||
|
||||
def LOAD_LOCAL_F32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a),
|
||||
"ld.local.f32\t$d, [%a]", []>;
|
||||
|
||||
// Call handling
|
||||
// def ADJCALLSTACKUP :
|
||||
// InstPTX<(outs), (ins i32imm:$amt1, i32imm:$amt2), "",
|
||||
|
@ -14,6 +14,9 @@
|
||||
#include "PTX.h"
|
||||
#include "PTXRegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
@ -23,16 +26,21 @@
|
||||
using namespace llvm;
|
||||
|
||||
PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
|
||||
const TargetInstrInfo &TII)
|
||||
const TargetInstrInfo &tii)
|
||||
// PTX does not have a return address register.
|
||||
: PTXGenRegisterInfo(0) {
|
||||
: PTXGenRegisterInfo(0), TII(tii) {
|
||||
}
|
||||
|
||||
void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
int SPAdj,
|
||||
RegScavenger *RS) const {
|
||||
unsigned Index;
|
||||
MachineInstr& MI = *II;
|
||||
MachineInstr &MI = *II;
|
||||
//MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc dl = MI.getDebugLoc();
|
||||
//MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
|
||||
|
||||
//unsigned Reg = MRI.createVirtualRegister(PTX::RegF32RegisterClass);
|
||||
|
||||
Index = 0;
|
||||
while (!MI.getOperand(Index).isFI()) {
|
||||
@ -47,6 +55,15 @@ void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n");
|
||||
DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n");
|
||||
|
||||
//MachineInstr* MI2 = BuildMI(MBB, II, dl, TII.get(PTX::LOAD_LOCAL_F32))
|
||||
//.addReg(Reg, RegState::Define).addImm(FrameIndex);
|
||||
//if (MI2->findFirstPredOperandIdx() == -1) {
|
||||
// MI2->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
|
||||
// MI2->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL));
|
||||
//}
|
||||
//MI2->dump();
|
||||
|
||||
// This frame index is post stack slot re-use assignments
|
||||
MI.getOperand(Index).ChangeToImmediate(FrameIndex);
|
||||
//MI.getOperand(Index).ChangeToRegister(Reg, false);
|
||||
MI.getOperand(Index).ChangeToImmediate(0);
|
||||
}
|
||||
|
@ -25,8 +25,12 @@ class PTXTargetMachine;
|
||||
class MachineFunction;
|
||||
|
||||
struct PTXRegisterInfo : public PTXGenRegisterInfo {
|
||||
private:
|
||||
const TargetInstrInfo &TII;
|
||||
|
||||
public:
|
||||
PTXRegisterInfo(PTXTargetMachine &TM,
|
||||
const TargetInstrInfo &TII);
|
||||
const TargetInstrInfo &tii);
|
||||
|
||||
virtual const unsigned
|
||||
*getCalleeSavedRegs(const MachineFunction *MF = 0) const {
|
||||
|
@ -118,7 +118,7 @@ bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
|
||||
bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
|
||||
CodeGenOpt::Level OptLevel) {
|
||||
// PTXMFInfoExtract must after register allocation!
|
||||
PM.add(createPTXMFInfoExtract(*this, OptLevel));
|
||||
//PM.add(createPTXMFInfoExtract(*this, OptLevel));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -365,5 +365,7 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
|
||||
if (addPreEmitPass(PM, OptLevel))
|
||||
printNoVerify(PM, "After PreEmit passes");
|
||||
|
||||
PM.add(createPTXMFInfoExtract(*this, OptLevel));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user