R600: rework handling of the constants

Remove Cxxx registers, add new special register - "ALU_CONST" and new
operand for each alu src - "sel". ALU_CONST is used to designate that the
new operand contains the value to override src.sel, src.kc_bank, src.chan
for constants in the driver.

Patch by: Vadim Girlin

Vincent Lejeune:
  - Use pointers for constants
  - Fold CONST_ADDRESS when possible

Tom Stellard:
  - Give CONSTANT_BUFFER_0 its own address space
  - Use integer types for constant loads

Reviewed-by: Tom Stellard <thomas.stellard@amd.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173222 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2013-01-23 02:09:06 +00:00
parent c7e1888d93
commit 9f7818d9bd
16 changed files with 483 additions and 104 deletions

View File

@ -23,6 +23,7 @@ class AMDGPUTargetMachine;
// R600 Passes
FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
// SI Passes
FunctionPass *createSIAnnotateControlFlowPass();

View File

@ -136,6 +136,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
addPass(createAMDGPUCFGPreparationPass(*TM));
addPass(createAMDGPUCFGStructurizerPass(*TM));
addPass(createR600ExpandSpecialInstrsPass(*TM));
addPass(createR600LowerConstCopy(*TM));
addPass(&FinalizeMachineBundlesID);
} else {
addPass(createSILowerLiteralConstantsPass(*TM));

View File

@ -90,14 +90,30 @@ namespace AMDGPUAS {
enum AddressSpaces {
PRIVATE_ADDRESS = 0, ///< Address space for private memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
CONSTANT_ADDRESS = 2, ///< Address space for constant memory.
CONSTANT_ADDRESS = 2, ///< Address space for constant memory
LOCAL_ADDRESS = 3, ///< Address space for local memory.
REGION_ADDRESS = 4, ///< Address space for region memory.
ADDRESS_NONE = 5, ///< Address space for unknown memory.
PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
LAST_ADDRESS = 9
CONSTANT_BUFFER_0 = 9,
CONSTANT_BUFFER_1 = 10,
CONSTANT_BUFFER_2 = 11,
CONSTANT_BUFFER_3 = 12,
CONSTANT_BUFFER_4 = 13,
CONSTANT_BUFFER_5 = 14,
CONSTANT_BUFFER_6 = 15,
CONSTANT_BUFFER_7 = 16,
CONSTANT_BUFFER_8 = 17,
CONSTANT_BUFFER_9 = 18,
CONSTANT_BUFFER_10 = 19,
CONSTANT_BUFFER_11 = 20,
CONSTANT_BUFFER_12 = 21,
CONSTANT_BUFFER_13 = 22,
CONSTANT_BUFFER_14 = 23,
CONSTANT_BUFFER_15 = 24,
LAST_ADDRESS = 25
};
} // namespace AMDGPUAS

View File

@ -20,6 +20,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Compiler.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include <list>
#include <queue>
@ -45,6 +46,7 @@ public:
private:
inline SDValue getSmallIPtrImm(unsigned Imm);
bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
// Complex pattern selectors
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
@ -67,6 +69,9 @@ private:
static bool isLocalLoad(const LoadSDNode *N);
static bool isRegionLoad(const LoadSDNode *N);
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
bool SelectGlobalValueVariableOffset(SDValue Addr,
SDValue &BaseReg, SDValue& Offset);
bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
@ -259,7 +264,65 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
break;
}
}
return SelectCode(N);
SDNode *Result = SelectCode(N);
// Fold operands of selected node
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
const R600InstrInfo *TII =
static_cast<const R600InstrInfo*>(TM.getInstrInfo());
if (Result && TII->isALUInstr(Result->getMachineOpcode())) {
bool IsModified = false;
do {
std::vector<SDValue> Ops;
for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
I != E; ++I)
Ops.push_back(*I);
IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
if (IsModified) {
Result = CurDAG->MorphNodeTo(Result, Result->getOpcode(),
Result->getVTList(), Ops.data(), Ops.size());
}
} while (IsModified);
}
}
return Result;
}
bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
int OperandIdx[] = {
TII->getOperandIdx(Opcode, R600Operands::SRC0),
TII->getOperandIdx(Opcode, R600Operands::SRC1),
TII->getOperandIdx(Opcode, R600Operands::SRC2)
};
int SelIdx[] = {
TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL),
TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL),
TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL)
};
for (unsigned i = 0; i < 3; i++) {
if (OperandIdx[i] < 0)
return false;
SDValue Operand = Ops[OperandIdx[i] - 1];
switch (Operand.getOpcode()) {
case AMDGPUISD::CONST_ADDRESS: {
SDValue CstOffset;
if (!Operand.getValueType().isVector() &&
SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
Ops[SelIdx[i] - 1] = CstOffset;
return true;
}
}
break;
default:
break;
}
}
return false;
}
bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
@ -406,6 +469,25 @@ const char *AMDGPUDAGToDAGISel::getPassName() const {
///==== AMDGPU Functions ====///
bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
SDValue& IntPtr) {
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
return true;
}
return false;
}
bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
SDValue& BaseReg, SDValue &Offset) {
if (!dyn_cast<ConstantSDNode>(Addr)) {
BaseReg = Addr;
Offset = CurDAG->getIntPtrConstant(0, true);
return true;
}
return false;
}
bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
SDValue& Offset) {
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||

View File

@ -36,6 +36,7 @@ add_llvm_target(R600CodeGen
R600ExpandSpecialInstrs.cpp
R600InstrInfo.cpp
R600ISelLowering.cpp
R600LowerConstCopy.cpp
R600MachineFunctionInfo.cpp
R600RegisterInfo.cpp
SIAnnotateControlFlow.cpp

View File

@ -129,4 +129,28 @@ void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
}
}
void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const char * chans = "XYZW";
int sel = MI->getOperand(OpNo).getImm();
int chan = sel & 3;
sel >>= 2;
if (sel >= 512) {
sel -= 512;
int cb = sel >> 12;
sel &= 4095;
O << cb << "[" << sel << "]";
} else if (sel >= 448) {
sel -= 448;
O << sel;
} else if (sel >= 0){
O << sel;
}
if (sel >= 0)
O << "." << chans[chan];
}
#include "AMDGPUGenAsmWriter.inc"

View File

@ -45,6 +45,7 @@ private:
void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
} // End namespace llvm

View File

@ -63,8 +63,8 @@ private:
void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const;
void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
raw_ostream &OS) const;
void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
raw_ostream &OS) const;
void EmitDst(const MCInst &MI, raw_ostream &OS) const;
void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const;
@ -163,7 +163,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case AMDGPU::VTX_READ_PARAM_32_eg:
case AMDGPU::VTX_READ_GLOBAL_8_eg:
case AMDGPU::VTX_READ_GLOBAL_32_eg:
case AMDGPU::VTX_READ_GLOBAL_128_eg: {
case AMDGPU::VTX_READ_GLOBAL_128_eg:
case AMDGPU::TEX_VTX_CONSTBUF: {
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
@ -193,7 +194,6 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const {
const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
unsigned NumOperands = MI.getNumOperands();
// Emit instruction type
EmitByte(INSTR_ALU, OS);
@ -209,19 +209,21 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
InstWord01 |= ISAOpCode << 1;
}
unsigned SrcIdx = 0;
for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) {
if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() ||
OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) {
continue;
}
EmitSrcISA(MI, OpIdx, InstWord01, OS);
SrcIdx++;
}
unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
// Emit zeros for unused sources
for ( ; SrcIdx < 3; SrcIdx++) {
EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
EmitByte(SrcNum, OS);
const unsigned SrcOps[3][2] = {
{R600Operands::SRC0, R600Operands::SRC0_SEL},
{R600Operands::SRC1, R600Operands::SRC1_SEL},
{R600Operands::SRC2, R600Operands::SRC2_SEL}
};
for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
}
Emit(InstWord01, OS);
@ -292,34 +294,37 @@ void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
}
void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
uint64_t &Value, raw_ostream &OS) const {
const MCOperand &MO = MI.getOperand(OpIdx);
void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
unsigned SelOpIdx, raw_ostream &OS) const {
const MCOperand &RegMO = MI.getOperand(RegOpIdx);
const MCOperand &SelMO = MI.getOperand(SelOpIdx);
union {
float f;
uint32_t i;
} InlineConstant;
InlineConstant.i = 0;
// Emit the source select (2 bytes). For GPRs, this is the register index.
// For other potential instruction operands, (e.g. constant registers) the
// value of the source select is defined in the r600isa docs.
if (MO.isReg()) {
unsigned Reg = MO.getReg();
if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
EmitByte(1, OS);
} else {
EmitByte(0, OS);
}
// Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
// and select is 0 (GPR index is encoded in the instr encoding. For constants
// type is 1 and select is the original const select passed from the driver.
unsigned Reg = RegMO.getReg();
if (Reg == AMDGPU::ALU_CONST) {
EmitByte(1, OS);
uint32_t Sel = SelMO.getImm();
Emit(Sel, OS);
} else {
EmitByte(0, OS);
Emit((uint32_t)0, OS);
}
if (Reg == AMDGPU::ALU_LITERAL_X) {
unsigned ImmOpIndex = MI.getNumOperands() - 1;
MCOperand ImmOp = MI.getOperand(ImmOpIndex);
if (ImmOp.isFPImm()) {
InlineConstant.f = ImmOp.getFPImm();
} else {
assert(ImmOp.isImm());
InlineConstant.i = ImmOp.getImm();
}
if (Reg == AMDGPU::ALU_LITERAL_X) {
unsigned ImmOpIndex = MI.getNumOperands() - 1;
MCOperand ImmOp = MI.getOperand(ImmOpIndex);
if (ImmOp.isFPImm()) {
InlineConstant.f = ImmOp.getFPImm();
} else {
assert(ImmOp.isImm());
InlineConstant.i = ImmOp.getImm();
}
}

View File

@ -62,18 +62,33 @@ namespace R600Operands {
SRC0_NEG,
SRC0_REL,
SRC0_ABS,
SRC0_SEL,
SRC1,
SRC1_NEG,
SRC1_REL,
SRC1_ABS,
SRC1_SEL,
SRC2,
SRC2_NEG,
SRC2_REL,
SRC2_SEL,
LAST,
PRED_SEL,
IMM,
COUNT
};
const static int ALUOpTable[3][R600Operands::COUNT] = {
// W C S S S S S S S S S S S
// R O D L S R R R R S R R R R S R R R L P
// D U I M R A R C C C C R C C C C R C C C A R I
// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M
// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M
{0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
{0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
{0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
};
}
#endif // R600DEFINES_H_

View File

@ -74,7 +74,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setSchedulingPreference(Sched::VLIW);
}
@ -115,15 +118,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
break;
}
case AMDGPU::R600_LOAD_CONST: {
int64_t RegIndex = MI->getOperand(1).getImm();
unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
.addOperand(MI->getOperand(0))
.addReg(ConstantReg);
break;
}
case AMDGPU::MASK_WRITE: {
unsigned maskedRegister = MI->getOperand(0).getReg();
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
@ -364,6 +358,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::FPOW: return LowerFPOW(Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
@ -527,6 +522,16 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default: return;
case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
return;
case ISD::LOAD: {
SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
Results.push_back(SDValue(Node, 0));
Results.push_back(SDValue(Node, 1));
// XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
// function
DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
return;
}
}
}
@ -832,6 +837,94 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
// return (512 + (kc_bank << 12)
static int
ConstantAddressBlock(unsigned AddressSpace) {
switch (AddressSpace) {
case AMDGPUAS::CONSTANT_BUFFER_0:
return 512;
case AMDGPUAS::CONSTANT_BUFFER_1:
return 512 + 4096;
case AMDGPUAS::CONSTANT_BUFFER_2:
return 512 + 4096 * 2;
case AMDGPUAS::CONSTANT_BUFFER_3:
return 512 + 4096 * 3;
case AMDGPUAS::CONSTANT_BUFFER_4:
return 512 + 4096 * 4;
case AMDGPUAS::CONSTANT_BUFFER_5:
return 512 + 4096 * 5;
case AMDGPUAS::CONSTANT_BUFFER_6:
return 512 + 4096 * 6;
case AMDGPUAS::CONSTANT_BUFFER_7:
return 512 + 4096 * 7;
case AMDGPUAS::CONSTANT_BUFFER_8:
return 512 + 4096 * 8;
case AMDGPUAS::CONSTANT_BUFFER_9:
return 512 + 4096 * 9;
case AMDGPUAS::CONSTANT_BUFFER_10:
return 512 + 4096 * 10;
case AMDGPUAS::CONSTANT_BUFFER_11:
return 512 + 4096 * 11;
case AMDGPUAS::CONSTANT_BUFFER_12:
return 512 + 4096 * 12;
case AMDGPUAS::CONSTANT_BUFFER_13:
return 512 + 4096 * 13;
case AMDGPUAS::CONSTANT_BUFFER_14:
return 512 + 4096 * 14;
case AMDGPUAS::CONSTANT_BUFFER_15:
return 512 + 4096 * 15;
default:
return -1;
}
}
SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
{
EVT VT = Op.getValueType();
DebugLoc DL = Op.getDebugLoc();
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
SDValue Chain = Op.getOperand(0);
SDValue Ptr = Op.getOperand(1);
SDValue LoweredLoad;
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
if (ConstantBlock > -1) {
SDValue Result;
if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
dyn_cast<Constant>(LoadNode->getSrcValue())) {
SDValue Slots[4];
for (unsigned i = 0; i < 4; i++) {
// We want Const position encoded with the following formula :
// (((512 + (kc_bank << 12) + const_index) << 2) + chan)
// const_index is Ptr computed by llvm using an alignment of 16.
// Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
// then div by 4 at the ISel step
SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
}
Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
} else {
// non constant ptr cant be folded, keeps it as a v4f32 load
Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
);
}
if (!VT.isVector()) {
Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
DAG.getConstant(0, MVT::i32));
}
SDValue MergedValues[2] = {
Result,
Chain
};
return DAG.getMergeValues(MergedValues, 2, DL);
}
return SDValue();
}
SDValue R600TargetLowering::LowerFPOW(SDValue Op,
SelectionDAG &DAG) const {
@ -904,6 +997,17 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
// Extract_vec (Build_vector) generated by custom lowering
// also needs to be customly combined
case ISD::EXTRACT_VECTOR_ELT: {
SDValue Arg = N->getOperand(0);
if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
unsigned Element = Const->getZExtValue();
return Arg->getOperand(Element);
}
}
}
}
return SDValue();
}

View File

@ -63,6 +63,7 @@ private:
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
bool isZero(SDValue Op) const;
};

View File

@ -486,13 +486,15 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
.addReg(Src0Reg) // $src0
.addImm(0) // $src0_neg
.addImm(0) // $src0_rel
.addImm(0); // $src0_abs
.addImm(0) // $src0_abs
.addImm(-1); // $src0_sel
if (Src1Reg) {
MIB.addReg(Src1Reg) // $src1
.addImm(0) // $src1_neg
.addImm(0) // $src1_rel
.addImm(0); // $src1_abs
.addImm(0) // $src1_abs
.addImm(-1); // $src1_sel
}
//XXX: The r600g finalizer expects this to be 1, once we've moved the
@ -521,16 +523,6 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
int R600InstrInfo::getOperandIdx(unsigned Opcode,
R600Operands::Ops Op) const {
const static int OpTable[3][R600Operands::COUNT] = {
// W C S S S S S S S S
// R O D L S R R R S R R R S R R L P
// D U I M R A R C C C C C C C R C C A R I
// S E U T O E M C 0 0 0 C 1 1 1 C 2 2 S E M
// T M P E D L P 0 N R A 1 N R A 2 N R T D M
{0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11},
{0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17},
{0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14}
};
unsigned TargetFlags = get(Opcode).TSFlags;
unsigned OpTableIdx;
@ -556,7 +548,7 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode,
OpTableIdx = 2;
}
return OpTable[OpTableIdx][Op];
return R600Operands::ALUOpTable[OpTableIdx][Op];
}
void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,

View File

@ -70,6 +70,11 @@ class InstFlag<string PM = "printOperand", int Default = 0>
let PrintMethod = PM;
}
// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
let PrintMethod = "printSel";
}
def LITERAL : InstFlag<"printLiteral">;
def WRITE : InstFlag <"printWrite", 1>;
@ -89,6 +94,8 @@ def LAST : InstFlag<"printLast", 1>;
def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
class R600ALU_Word0 {
field bits<32> Word0;
@ -263,11 +270,11 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
InstR600 <0,
(outs R600_Reg32:$dst),
(ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
!strconcat(opName,
"$clamp $dst$write$dst_rel$omod, "
"$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
"$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
@ -303,13 +310,13 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
(outs R600_Reg32:$dst),
(ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs,
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
!strconcat(opName,
"$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
"$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
"$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
"$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
"$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
@ -340,14 +347,14 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
InstR600 <0,
(outs R600_Reg32:$dst),
(ins REL:$dst_rel, CLAMP:$clamp,
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel,
R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel,
R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel,
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
!strconcat(opName, "$clamp $dst$dst_rel, "
"$src0_neg$src0$src0_rel, "
"$src1_neg$src1$src1_rel, "
"$src2_neg$src2$src2_rel, "
"$src0_neg$src0$src0_sel$src0_rel, "
"$src1_neg$src1$src1_sel$src1_rel, "
"$src2_neg$src2$src2_sel$src2_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
@ -482,7 +489,7 @@ def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
>;
def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>,
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
[SDNPMayLoad]
>;
@ -1538,12 +1545,6 @@ def MASK_WRITE : AMDGPUShaderInst <
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
def R600_LOAD_CONST : AMDGPUShaderInst <
(outs R600_Reg32:$dst),
(ins i32imm:$src0),
"R600_LOAD_CONST $dst, $src0",
[(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
>;
def RESERVE_REG : AMDGPUShaderInst <
(outs),
@ -1551,7 +1552,6 @@ def RESERVE_REG : AMDGPUShaderInst <
"RESERVE_REG $src",
[(int_AMDGPU_reserve_reg imm:$src)]
>;
def TXD: AMDGPUShaderInst <
(outs R600_Reg128:$dst),
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
@ -1581,6 +1581,78 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
"RETURN", [(IL_retflag)]>;
}
//===----------------------------------------------------------------------===//
// Constant Buffer Addressing Support
//===----------------------------------------------------------------------===//
let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
def CONST_COPY : Instruction {
let OutOperandList = (outs R600_Reg32:$dst);
let InOperandList = (ins i32imm:$src);
let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
let AsmString = "CONST_COPY";
let neverHasSideEffects = 1;
let isAsCheapAsAMove = 1;
let Itinerary = NullALU;
}
} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
def TEX_VTX_CONSTBUF :
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr",
[(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>,
VTX_WORD1_GPR, VTX_WORD0 {
let VC_INST = 0;
let FETCH_TYPE = 2;
let FETCH_WHOLE_QUAD = 0;
let BUFFER_ID = 0;
let SRC_REL = 0;
let SRC_SEL_X = 0;
let DST_REL = 0;
let USE_CONST_FIELDS = 0;
let NUM_FORMAT_ALL = 2;
let FORMAT_COMP_ALL = 1;
let SRF_MODE_ALL = 1;
let MEGA_FETCH_COUNT = 16;
let DST_SEL_X = 0;
let DST_SEL_Y = 1;
let DST_SEL_Z = 2;
let DST_SEL_W = 3;
let DATA_FORMAT = 35;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
// LLVM can only encode 64-bit instructions, so these fields are manually
// encoded in R600CodeEmitter
//
// bits<16> OFFSET;
// bits<2> ENDIAN_SWAP = 0;
// bits<1> CONST_BUF_NO_STRIDE = 0;
// bits<1> MEGA_FETCH = 0;
// bits<1> ALT_CONST = 0;
// bits<2> BUFFER_INDEX_MODE = 0;
// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
// is done in R600CodeEmitter
//
// Inst{79-64} = OFFSET;
// Inst{81-80} = ENDIAN_SWAP;
// Inst{82} = CONST_BUF_NO_STRIDE;
// Inst{83} = MEGA_FETCH;
// Inst{84} = ALT_CONST;
// Inst{86-85} = BUFFER_INDEX_MODE;
// Inst{95-86} = 0; Reserved
// VTX_WORD3 (Padding)
//
// Inst{127-96} = 0;
}
//===--------------------------------------------------------------------===//
// Instructions support
//===--------------------------------------------------------------------===//

View File

@ -0,0 +1,74 @@
//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr.
/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot
/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try
/// to fold them if possible or replace them by MOV otherwise.
/// TODO : Implement the folding part, using Copy Propagation algorithm.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "R600InstrInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/GlobalValue.h"
namespace llvm {
class R600LowerConstCopy : public MachineFunctionPass {
private:
static char ID;
const R600InstrInfo *TII;
public:
R600LowerConstCopy(TargetMachine &tm);
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; }
};
char R600LowerConstCopy::ID = 0;
R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) :
MachineFunctionPass(ID),
TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo()))
{
}
bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E;) {
MachineInstr &MI = *I;
I = llvm::next(I);
if (MI.getOpcode() != AMDGPU::CONST_COPY)
continue;
MachineInstr *NewMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV,
MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
NewMI->getOperand(9).setImm(MI.getOperand(1).getImm());
MI.eraseFromParent();
}
}
return false;
}
FunctionPass *createR600LowerConstCopy(TargetMachine &tm) {
return new R600LowerConstCopy(tm);
}
}

View File

@ -38,16 +38,12 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AMDGPU::NEG_ONE);
Reserved.set(AMDGPU::PV_X);
Reserved.set(AMDGPU::ALU_LITERAL_X);
Reserved.set(AMDGPU::ALU_CONST);
Reserved.set(AMDGPU::PREDICATE_BIT);
Reserved.set(AMDGPU::PRED_SEL_OFF);
Reserved.set(AMDGPU::PRED_SEL_ZERO);
Reserved.set(AMDGPU::PRED_SEL_ONE);
for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
Reserved.set(*I);
}
for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
E = MFI->ReservedRegs.end(); I != E; ++I) {
Reserved.set(*I);

View File

@ -27,10 +27,6 @@ foreach Index = 0-127 in {
foreach Chan = [ "X", "Y", "Z", "W" ] in {
// 32-bit Temporary Registers
def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
// 32-bit Constant Registers (There are more than 128, this the number
// that is currently supported.
def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>;
}
// 128-bit Temporary Registers
def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
@ -64,13 +60,11 @@ def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "ArrayBase%u", 448, 464))>;
def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (interleave
(interleave (sequence "C%u_X", 0, 127),
(sequence "C%u_Z", 0, 127)),
(interleave (sequence "C%u_Y", 0, 127),
(sequence "C%u_W", 0, 127))))>;
// special registers for ALU src operands
// const buffer reference, SRCx_SEL contains index
def ALU_CONST : R600Reg<"CBuf", 0>;
// interpolation param reference, SRCx_SEL contains index
def ALU_PARAM : R600Reg<"Param", 0>;
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "T%u_X", 0, 127))>;
@ -85,15 +79,15 @@ def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "T%u_W", 0, 127))>;
def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (interleave
(interleave R600_TReg32_X, R600_TReg32_Z),
(interleave R600_TReg32_Y, R600_TReg32_W)))>;
(interleave R600_TReg32_X, R600_TReg32_Y,
R600_TReg32_Z, R600_TReg32_W)>;
def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
R600_TReg32,
R600_CReg32,
R600_ArrayBase,
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
ALU_CONST, ALU_PARAM
)>;
def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;