mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-21 09:25:07 +00:00
R600: rework handling of the constants
Remove Cxxx registers, add new special register - "ALU_CONST" and new operand for each alu src - "sel". ALU_CONST is used to designate that the new operand contains the value to override src.sel, src.kc_bank, src.chan for constants in the driver. Patch by: Vadim Girlin Vincent Lejeune: - Use pointers for constants - Fold CONST_ADDRESS when possible Tom Stellard: - Give CONSTANT_BUFFER_0 its own address space - Use integer types for constant loads Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173222 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c7e1888d93
commit
9f7818d9bd
@ -23,6 +23,7 @@ class AMDGPUTargetMachine;
|
||||
// R600 Passes
|
||||
FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
|
||||
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
|
||||
FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
|
||||
|
||||
// SI Passes
|
||||
FunctionPass *createSIAnnotateControlFlowPass();
|
||||
|
@ -136,6 +136,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
|
||||
addPass(createAMDGPUCFGPreparationPass(*TM));
|
||||
addPass(createAMDGPUCFGStructurizerPass(*TM));
|
||||
addPass(createR600ExpandSpecialInstrsPass(*TM));
|
||||
addPass(createR600LowerConstCopy(*TM));
|
||||
addPass(&FinalizeMachineBundlesID);
|
||||
} else {
|
||||
addPass(createSILowerLiteralConstantsPass(*TM));
|
||||
|
@ -90,14 +90,30 @@ namespace AMDGPUAS {
|
||||
enum AddressSpaces {
|
||||
PRIVATE_ADDRESS = 0, ///< Address space for private memory.
|
||||
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
|
||||
CONSTANT_ADDRESS = 2, ///< Address space for constant memory.
|
||||
CONSTANT_ADDRESS = 2, ///< Address space for constant memory
|
||||
LOCAL_ADDRESS = 3, ///< Address space for local memory.
|
||||
REGION_ADDRESS = 4, ///< Address space for region memory.
|
||||
ADDRESS_NONE = 5, ///< Address space for unknown memory.
|
||||
PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
|
||||
PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
|
||||
USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
|
||||
LAST_ADDRESS = 9
|
||||
CONSTANT_BUFFER_0 = 9,
|
||||
CONSTANT_BUFFER_1 = 10,
|
||||
CONSTANT_BUFFER_2 = 11,
|
||||
CONSTANT_BUFFER_3 = 12,
|
||||
CONSTANT_BUFFER_4 = 13,
|
||||
CONSTANT_BUFFER_5 = 14,
|
||||
CONSTANT_BUFFER_6 = 15,
|
||||
CONSTANT_BUFFER_7 = 16,
|
||||
CONSTANT_BUFFER_8 = 17,
|
||||
CONSTANT_BUFFER_9 = 18,
|
||||
CONSTANT_BUFFER_10 = 19,
|
||||
CONSTANT_BUFFER_11 = 20,
|
||||
CONSTANT_BUFFER_12 = 21,
|
||||
CONSTANT_BUFFER_13 = 22,
|
||||
CONSTANT_BUFFER_14 = 23,
|
||||
CONSTANT_BUFFER_15 = 24,
|
||||
LAST_ADDRESS = 25
|
||||
};
|
||||
|
||||
} // namespace AMDGPUAS
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "llvm/CodeGen/PseudoSourceValue.h"
|
||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include <list>
|
||||
#include <queue>
|
||||
|
||||
@ -45,6 +46,7 @@ public:
|
||||
|
||||
private:
|
||||
inline SDValue getSmallIPtrImm(unsigned Imm);
|
||||
bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
|
||||
|
||||
// Complex pattern selectors
|
||||
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
|
||||
@ -67,6 +69,9 @@ private:
|
||||
static bool isLocalLoad(const LoadSDNode *N);
|
||||
static bool isRegionLoad(const LoadSDNode *N);
|
||||
|
||||
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
|
||||
bool SelectGlobalValueVariableOffset(SDValue Addr,
|
||||
SDValue &BaseReg, SDValue& Offset);
|
||||
bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
|
||||
bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
|
||||
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||
@ -259,7 +264,65 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return SelectCode(N);
|
||||
SDNode *Result = SelectCode(N);
|
||||
|
||||
// Fold operands of selected node
|
||||
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
const R600InstrInfo *TII =
|
||||
static_cast<const R600InstrInfo*>(TM.getInstrInfo());
|
||||
if (Result && TII->isALUInstr(Result->getMachineOpcode())) {
|
||||
bool IsModified = false;
|
||||
do {
|
||||
std::vector<SDValue> Ops;
|
||||
for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
|
||||
I != E; ++I)
|
||||
Ops.push_back(*I);
|
||||
IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
|
||||
if (IsModified) {
|
||||
Result = CurDAG->MorphNodeTo(Result, Result->getOpcode(),
|
||||
Result->getVTList(), Ops.data(), Ops.size());
|
||||
}
|
||||
} while (IsModified);
|
||||
}
|
||||
}
|
||||
|
||||
return Result;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
|
||||
const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
|
||||
int OperandIdx[] = {
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC2)
|
||||
};
|
||||
int SelIdx[] = {
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL)
|
||||
};
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
if (OperandIdx[i] < 0)
|
||||
return false;
|
||||
SDValue Operand = Ops[OperandIdx[i] - 1];
|
||||
switch (Operand.getOpcode()) {
|
||||
case AMDGPUISD::CONST_ADDRESS: {
|
||||
SDValue CstOffset;
|
||||
if (!Operand.getValueType().isVector() &&
|
||||
SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
|
||||
Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
|
||||
Ops[SelIdx[i] - 1] = CstOffset;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
|
||||
@ -406,6 +469,25 @@ const char *AMDGPUDAGToDAGISel::getPassName() const {
|
||||
|
||||
///==== AMDGPU Functions ====///
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
|
||||
SDValue& IntPtr) {
|
||||
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
|
||||
IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
|
||||
SDValue& BaseReg, SDValue &Offset) {
|
||||
if (!dyn_cast<ConstantSDNode>(Addr)) {
|
||||
BaseReg = Addr;
|
||||
Offset = CurDAG->getIntPtrConstant(0, true);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
|
||||
SDValue& Offset) {
|
||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
|
@ -36,6 +36,7 @@ add_llvm_target(R600CodeGen
|
||||
R600ExpandSpecialInstrs.cpp
|
||||
R600InstrInfo.cpp
|
||||
R600ISelLowering.cpp
|
||||
R600LowerConstCopy.cpp
|
||||
R600MachineFunctionInfo.cpp
|
||||
R600RegisterInfo.cpp
|
||||
SIAnnotateControlFlow.cpp
|
||||
|
@ -129,4 +129,28 @@ void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
const char * chans = "XYZW";
|
||||
int sel = MI->getOperand(OpNo).getImm();
|
||||
|
||||
int chan = sel & 3;
|
||||
sel >>= 2;
|
||||
|
||||
if (sel >= 512) {
|
||||
sel -= 512;
|
||||
int cb = sel >> 12;
|
||||
sel &= 4095;
|
||||
O << cb << "[" << sel << "]";
|
||||
} else if (sel >= 448) {
|
||||
sel -= 448;
|
||||
O << sel;
|
||||
} else if (sel >= 0){
|
||||
O << sel;
|
||||
}
|
||||
|
||||
if (sel >= 0)
|
||||
O << "." << chans[chan];
|
||||
}
|
||||
|
||||
#include "AMDGPUGenAsmWriter.inc"
|
||||
|
@ -45,6 +45,7 @@ private:
|
||||
void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
@ -63,8 +63,8 @@ private:
|
||||
void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
|
||||
raw_ostream &OS) const;
|
||||
void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
|
||||
void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
|
||||
raw_ostream &OS) const;
|
||||
void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
|
||||
raw_ostream &OS) const;
|
||||
void EmitDst(const MCInst &MI, raw_ostream &OS) const;
|
||||
void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
|
||||
raw_ostream &OS) const;
|
||||
@ -163,7 +163,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
case AMDGPU::VTX_READ_PARAM_32_eg:
|
||||
case AMDGPU::VTX_READ_GLOBAL_8_eg:
|
||||
case AMDGPU::VTX_READ_GLOBAL_32_eg:
|
||||
case AMDGPU::VTX_READ_GLOBAL_128_eg: {
|
||||
case AMDGPU::VTX_READ_GLOBAL_128_eg:
|
||||
case AMDGPU::TEX_VTX_CONSTBUF: {
|
||||
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
|
||||
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
|
||||
|
||||
@ -193,7 +194,6 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
raw_ostream &OS) const {
|
||||
const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
|
||||
unsigned NumOperands = MI.getNumOperands();
|
||||
|
||||
// Emit instruction type
|
||||
EmitByte(INSTR_ALU, OS);
|
||||
@ -209,19 +209,21 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
|
||||
InstWord01 |= ISAOpCode << 1;
|
||||
}
|
||||
|
||||
unsigned SrcIdx = 0;
|
||||
for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) {
|
||||
if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() ||
|
||||
OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) {
|
||||
continue;
|
||||
}
|
||||
EmitSrcISA(MI, OpIdx, InstWord01, OS);
|
||||
SrcIdx++;
|
||||
}
|
||||
unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
|
||||
MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
|
||||
|
||||
// Emit zeros for unused sources
|
||||
for ( ; SrcIdx < 3; SrcIdx++) {
|
||||
EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
|
||||
EmitByte(SrcNum, OS);
|
||||
|
||||
const unsigned SrcOps[3][2] = {
|
||||
{R600Operands::SRC0, R600Operands::SRC0_SEL},
|
||||
{R600Operands::SRC1, R600Operands::SRC1_SEL},
|
||||
{R600Operands::SRC2, R600Operands::SRC2_SEL}
|
||||
};
|
||||
|
||||
for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
|
||||
unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
|
||||
unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
|
||||
EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
|
||||
}
|
||||
|
||||
Emit(InstWord01, OS);
|
||||
@ -292,34 +294,37 @@ void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
|
||||
|
||||
}
|
||||
|
||||
void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
|
||||
uint64_t &Value, raw_ostream &OS) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
|
||||
unsigned SelOpIdx, raw_ostream &OS) const {
|
||||
const MCOperand &RegMO = MI.getOperand(RegOpIdx);
|
||||
const MCOperand &SelMO = MI.getOperand(SelOpIdx);
|
||||
|
||||
union {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} InlineConstant;
|
||||
InlineConstant.i = 0;
|
||||
// Emit the source select (2 bytes). For GPRs, this is the register index.
|
||||
// For other potential instruction operands, (e.g. constant registers) the
|
||||
// value of the source select is defined in the r600isa docs.
|
||||
if (MO.isReg()) {
|
||||
unsigned Reg = MO.getReg();
|
||||
if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
|
||||
EmitByte(1, OS);
|
||||
} else {
|
||||
EmitByte(0, OS);
|
||||
}
|
||||
// Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
|
||||
// and select is 0 (GPR index is encoded in the instr encoding. For constants
|
||||
// type is 1 and select is the original const select passed from the driver.
|
||||
unsigned Reg = RegMO.getReg();
|
||||
if (Reg == AMDGPU::ALU_CONST) {
|
||||
EmitByte(1, OS);
|
||||
uint32_t Sel = SelMO.getImm();
|
||||
Emit(Sel, OS);
|
||||
} else {
|
||||
EmitByte(0, OS);
|
||||
Emit((uint32_t)0, OS);
|
||||
}
|
||||
|
||||
if (Reg == AMDGPU::ALU_LITERAL_X) {
|
||||
unsigned ImmOpIndex = MI.getNumOperands() - 1;
|
||||
MCOperand ImmOp = MI.getOperand(ImmOpIndex);
|
||||
if (ImmOp.isFPImm()) {
|
||||
InlineConstant.f = ImmOp.getFPImm();
|
||||
} else {
|
||||
assert(ImmOp.isImm());
|
||||
InlineConstant.i = ImmOp.getImm();
|
||||
}
|
||||
if (Reg == AMDGPU::ALU_LITERAL_X) {
|
||||
unsigned ImmOpIndex = MI.getNumOperands() - 1;
|
||||
MCOperand ImmOp = MI.getOperand(ImmOpIndex);
|
||||
if (ImmOp.isFPImm()) {
|
||||
InlineConstant.f = ImmOp.getFPImm();
|
||||
} else {
|
||||
assert(ImmOp.isImm());
|
||||
InlineConstant.i = ImmOp.getImm();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -62,18 +62,33 @@ namespace R600Operands {
|
||||
SRC0_NEG,
|
||||
SRC0_REL,
|
||||
SRC0_ABS,
|
||||
SRC0_SEL,
|
||||
SRC1,
|
||||
SRC1_NEG,
|
||||
SRC1_REL,
|
||||
SRC1_ABS,
|
||||
SRC1_SEL,
|
||||
SRC2,
|
||||
SRC2_NEG,
|
||||
SRC2_REL,
|
||||
SRC2_SEL,
|
||||
LAST,
|
||||
PRED_SEL,
|
||||
IMM,
|
||||
COUNT
|
||||
};
|
||||
|
||||
const static int ALUOpTable[3][R600Operands::COUNT] = {
|
||||
// W C S S S S S S S S S S S
|
||||
// R O D L S R R R R S R R R R S R R R L P
|
||||
// D U I M R A R C C C C R C C C C R C C C A R I
|
||||
// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M
|
||||
// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M
|
||||
{0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
|
||||
{0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
|
||||
{0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // R600DEFINES_H_
|
||||
|
@ -74,7 +74,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::STORE, MVT::i32, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::i32, Custom);
|
||||
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
|
||||
setTargetDAGCombine(ISD::FP_ROUND);
|
||||
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
|
||||
|
||||
setSchedulingPreference(Sched::VLIW);
|
||||
}
|
||||
@ -115,15 +118,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::R600_LOAD_CONST: {
|
||||
int64_t RegIndex = MI->getOperand(1).getImm();
|
||||
unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addReg(ConstantReg);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::MASK_WRITE: {
|
||||
unsigned maskedRegister = MI->getOperand(0).getReg();
|
||||
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
|
||||
@ -364,6 +358,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
case ISD::SELECT: return LowerSELECT(Op, DAG);
|
||||
case ISD::SETCC: return LowerSETCC(Op, DAG);
|
||||
case ISD::STORE: return LowerSTORE(Op, DAG);
|
||||
case ISD::LOAD: return LowerLOAD(Op, DAG);
|
||||
case ISD::FPOW: return LowerFPOW(Op, DAG);
|
||||
case ISD::INTRINSIC_VOID: {
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
@ -527,6 +522,16 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
switch (N->getOpcode()) {
|
||||
default: return;
|
||||
case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
|
||||
return;
|
||||
case ISD::LOAD: {
|
||||
SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
|
||||
Results.push_back(SDValue(Node, 0));
|
||||
Results.push_back(SDValue(Node, 1));
|
||||
// XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
|
||||
// function
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -832,6 +837,94 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// return (512 + (kc_bank << 12)
|
||||
static int
|
||||
ConstantAddressBlock(unsigned AddressSpace) {
|
||||
switch (AddressSpace) {
|
||||
case AMDGPUAS::CONSTANT_BUFFER_0:
|
||||
return 512;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_1:
|
||||
return 512 + 4096;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_2:
|
||||
return 512 + 4096 * 2;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_3:
|
||||
return 512 + 4096 * 3;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_4:
|
||||
return 512 + 4096 * 4;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_5:
|
||||
return 512 + 4096 * 5;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_6:
|
||||
return 512 + 4096 * 6;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_7:
|
||||
return 512 + 4096 * 7;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_8:
|
||||
return 512 + 4096 * 8;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_9:
|
||||
return 512 + 4096 * 9;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_10:
|
||||
return 512 + 4096 * 10;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_11:
|
||||
return 512 + 4096 * 11;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_12:
|
||||
return 512 + 4096 * 12;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_13:
|
||||
return 512 + 4096 * 13;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_14:
|
||||
return 512 + 4096 * 14;
|
||||
case AMDGPUAS::CONSTANT_BUFFER_15:
|
||||
return 512 + 4096 * 15;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
EVT VT = Op.getValueType();
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue Ptr = Op.getOperand(1);
|
||||
SDValue LoweredLoad;
|
||||
|
||||
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
|
||||
if (ConstantBlock > -1) {
|
||||
SDValue Result;
|
||||
if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
|
||||
dyn_cast<Constant>(LoadNode->getSrcValue())) {
|
||||
SDValue Slots[4];
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
// We want Const position encoded with the following formula :
|
||||
// (((512 + (kc_bank << 12) + const_index) << 2) + chan)
|
||||
// const_index is Ptr computed by llvm using an alignment of 16.
|
||||
// Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
|
||||
// then div by 4 at the ISel step
|
||||
SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
|
||||
DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
|
||||
Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
|
||||
}
|
||||
Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
|
||||
} else {
|
||||
// non constant ptr cant be folded, keeps it as a v4f32 load
|
||||
Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
|
||||
DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
|
||||
);
|
||||
}
|
||||
|
||||
if (!VT.isVector()) {
|
||||
Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
|
||||
DAG.getConstant(0, MVT::i32));
|
||||
}
|
||||
|
||||
SDValue MergedValues[2] = {
|
||||
Result,
|
||||
Chain
|
||||
};
|
||||
return DAG.getMergeValues(MergedValues, 2, DL);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerFPOW(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
@ -904,6 +997,17 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Extract_vec (Build_vector) generated by custom lowering
|
||||
// also needs to be customly combined
|
||||
case ISD::EXTRACT_VECTOR_ELT: {
|
||||
SDValue Arg = N->getOperand(0);
|
||||
if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
|
||||
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
|
||||
unsigned Element = Const->getZExtValue();
|
||||
return Arg->getOperand(Element);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -63,6 +63,7 @@ private:
|
||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
bool isZero(SDValue Op) const;
|
||||
};
|
||||
|
@ -486,13 +486,15 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
|
||||
.addReg(Src0Reg) // $src0
|
||||
.addImm(0) // $src0_neg
|
||||
.addImm(0) // $src0_rel
|
||||
.addImm(0); // $src0_abs
|
||||
.addImm(0) // $src0_abs
|
||||
.addImm(-1); // $src0_sel
|
||||
|
||||
if (Src1Reg) {
|
||||
MIB.addReg(Src1Reg) // $src1
|
||||
.addImm(0) // $src1_neg
|
||||
.addImm(0) // $src1_rel
|
||||
.addImm(0); // $src1_abs
|
||||
.addImm(0) // $src1_abs
|
||||
.addImm(-1); // $src1_sel
|
||||
}
|
||||
|
||||
//XXX: The r600g finalizer expects this to be 1, once we've moved the
|
||||
@ -521,16 +523,6 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
|
||||
|
||||
int R600InstrInfo::getOperandIdx(unsigned Opcode,
|
||||
R600Operands::Ops Op) const {
|
||||
const static int OpTable[3][R600Operands::COUNT] = {
|
||||
// W C S S S S S S S S
|
||||
// R O D L S R R R S R R R S R R L P
|
||||
// D U I M R A R C C C C C C C R C C A R I
|
||||
// S E U T O E M C 0 0 0 C 1 1 1 C 2 2 S E M
|
||||
// T M P E D L P 0 N R A 1 N R A 2 N R T D M
|
||||
{0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11},
|
||||
{0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17},
|
||||
{0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14}
|
||||
};
|
||||
unsigned TargetFlags = get(Opcode).TSFlags;
|
||||
unsigned OpTableIdx;
|
||||
|
||||
@ -556,7 +548,7 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode,
|
||||
OpTableIdx = 2;
|
||||
}
|
||||
|
||||
return OpTable[OpTableIdx][Op];
|
||||
return R600Operands::ALUOpTable[OpTableIdx][Op];
|
||||
}
|
||||
|
||||
void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
|
||||
|
@ -70,6 +70,11 @@ class InstFlag<string PM = "printOperand", int Default = 0>
|
||||
let PrintMethod = PM;
|
||||
}
|
||||
|
||||
// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
|
||||
def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
|
||||
let PrintMethod = "printSel";
|
||||
}
|
||||
|
||||
def LITERAL : InstFlag<"printLiteral">;
|
||||
|
||||
def WRITE : InstFlag <"printWrite", 1>;
|
||||
@ -89,6 +94,8 @@ def LAST : InstFlag<"printLast", 1>;
|
||||
def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
|
||||
def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
|
||||
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
|
||||
def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
|
||||
def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
|
||||
|
||||
class R600ALU_Word0 {
|
||||
field bits<32> Word0;
|
||||
@ -263,11 +270,11 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
|
||||
InstR600 <0,
|
||||
(outs R600_Reg32:$dst),
|
||||
(ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
|
||||
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
|
||||
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
|
||||
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
|
||||
!strconcat(opName,
|
||||
"$clamp $dst$write$dst_rel$omod, "
|
||||
"$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
|
||||
"$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
|
||||
"$literal $pred_sel$last"),
|
||||
pattern,
|
||||
itin>,
|
||||
@ -303,13 +310,13 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
|
||||
(outs R600_Reg32:$dst),
|
||||
(ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
|
||||
OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
|
||||
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
|
||||
R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs,
|
||||
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
|
||||
R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
|
||||
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
|
||||
!strconcat(opName,
|
||||
"$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
|
||||
"$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
|
||||
"$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
|
||||
"$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
|
||||
"$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, "
|
||||
"$literal $pred_sel$last"),
|
||||
pattern,
|
||||
itin>,
|
||||
@ -340,14 +347,14 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
|
||||
InstR600 <0,
|
||||
(outs R600_Reg32:$dst),
|
||||
(ins REL:$dst_rel, CLAMP:$clamp,
|
||||
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel,
|
||||
R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel,
|
||||
R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel,
|
||||
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
|
||||
R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
|
||||
R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
|
||||
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
|
||||
!strconcat(opName, "$clamp $dst$dst_rel, "
|
||||
"$src0_neg$src0$src0_rel, "
|
||||
"$src1_neg$src1$src1_rel, "
|
||||
"$src2_neg$src2$src2_rel, "
|
||||
"$src0_neg$src0$src0_sel$src0_rel, "
|
||||
"$src1_neg$src1$src1_sel$src1_rel, "
|
||||
"$src2_neg$src2$src2_sel$src2_rel, "
|
||||
"$literal $pred_sel$last"),
|
||||
pattern,
|
||||
itin>,
|
||||
@ -482,7 +489,7 @@ def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
|
||||
>;
|
||||
|
||||
def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
|
||||
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>,
|
||||
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
|
||||
[SDNPMayLoad]
|
||||
>;
|
||||
|
||||
@ -1538,12 +1545,6 @@ def MASK_WRITE : AMDGPUShaderInst <
|
||||
|
||||
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
|
||||
|
||||
def R600_LOAD_CONST : AMDGPUShaderInst <
|
||||
(outs R600_Reg32:$dst),
|
||||
(ins i32imm:$src0),
|
||||
"R600_LOAD_CONST $dst, $src0",
|
||||
[(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
|
||||
>;
|
||||
|
||||
def RESERVE_REG : AMDGPUShaderInst <
|
||||
(outs),
|
||||
@ -1551,7 +1552,6 @@ def RESERVE_REG : AMDGPUShaderInst <
|
||||
"RESERVE_REG $src",
|
||||
[(int_AMDGPU_reserve_reg imm:$src)]
|
||||
>;
|
||||
|
||||
def TXD: AMDGPUShaderInst <
|
||||
(outs R600_Reg128:$dst),
|
||||
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
|
||||
@ -1581,6 +1581,78 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
|
||||
"RETURN", [(IL_retflag)]>;
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Constant Buffer Addressing Support
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
|
||||
def CONST_COPY : Instruction {
|
||||
let OutOperandList = (outs R600_Reg32:$dst);
|
||||
let InOperandList = (ins i32imm:$src);
|
||||
let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
|
||||
let AsmString = "CONST_COPY";
|
||||
let neverHasSideEffects = 1;
|
||||
let isAsCheapAsAMove = 1;
|
||||
let Itinerary = NullALU;
|
||||
}
|
||||
} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
|
||||
|
||||
def TEX_VTX_CONSTBUF :
|
||||
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr",
|
||||
[(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>,
|
||||
VTX_WORD1_GPR, VTX_WORD0 {
|
||||
|
||||
let VC_INST = 0;
|
||||
let FETCH_TYPE = 2;
|
||||
let FETCH_WHOLE_QUAD = 0;
|
||||
let BUFFER_ID = 0;
|
||||
let SRC_REL = 0;
|
||||
let SRC_SEL_X = 0;
|
||||
let DST_REL = 0;
|
||||
let USE_CONST_FIELDS = 0;
|
||||
let NUM_FORMAT_ALL = 2;
|
||||
let FORMAT_COMP_ALL = 1;
|
||||
let SRF_MODE_ALL = 1;
|
||||
let MEGA_FETCH_COUNT = 16;
|
||||
let DST_SEL_X = 0;
|
||||
let DST_SEL_Y = 1;
|
||||
let DST_SEL_Z = 2;
|
||||
let DST_SEL_W = 3;
|
||||
let DATA_FORMAT = 35;
|
||||
|
||||
let Inst{31-0} = Word0;
|
||||
let Inst{63-32} = Word1;
|
||||
|
||||
// LLVM can only encode 64-bit instructions, so these fields are manually
|
||||
// encoded in R600CodeEmitter
|
||||
//
|
||||
// bits<16> OFFSET;
|
||||
// bits<2> ENDIAN_SWAP = 0;
|
||||
// bits<1> CONST_BUF_NO_STRIDE = 0;
|
||||
// bits<1> MEGA_FETCH = 0;
|
||||
// bits<1> ALT_CONST = 0;
|
||||
// bits<2> BUFFER_INDEX_MODE = 0;
|
||||
|
||||
|
||||
|
||||
// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
|
||||
// is done in R600CodeEmitter
|
||||
//
|
||||
// Inst{79-64} = OFFSET;
|
||||
// Inst{81-80} = ENDIAN_SWAP;
|
||||
// Inst{82} = CONST_BUF_NO_STRIDE;
|
||||
// Inst{83} = MEGA_FETCH;
|
||||
// Inst{84} = ALT_CONST;
|
||||
// Inst{86-85} = BUFFER_INDEX_MODE;
|
||||
// Inst{95-86} = 0; Reserved
|
||||
|
||||
// VTX_WORD3 (Padding)
|
||||
//
|
||||
// Inst{127-96} = 0;
|
||||
}
|
||||
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Instructions support
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
74
lib/Target/R600/R600LowerConstCopy.cpp
Normal file
74
lib/Target/R600/R600LowerConstCopy.cpp
Normal file
@ -0,0 +1,74 @@
|
||||
//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr.
|
||||
/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot
|
||||
/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
|
||||
/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try
|
||||
/// to fold them if possible or replace them by MOV otherwise.
|
||||
/// TODO : Implement the folding part, using Copy Propagation algorithm.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class R600LowerConstCopy : public MachineFunctionPass {
|
||||
private:
|
||||
static char ID;
|
||||
const R600InstrInfo *TII;
|
||||
public:
|
||||
R600LowerConstCopy(TargetMachine &tm);
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; }
|
||||
};
|
||||
|
||||
char R600LowerConstCopy::ID = 0;
|
||||
|
||||
|
||||
R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) :
|
||||
MachineFunctionPass(ID),
|
||||
TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo()))
|
||||
{
|
||||
}
|
||||
|
||||
bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) {
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E;) {
|
||||
MachineInstr &MI = *I;
|
||||
I = llvm::next(I);
|
||||
if (MI.getOpcode() != AMDGPU::CONST_COPY)
|
||||
continue;
|
||||
MachineInstr *NewMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV,
|
||||
MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
|
||||
NewMI->getOperand(9).setImm(MI.getOperand(1).getImm());
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
FunctionPass *createR600LowerConstCopy(TargetMachine &tm) {
|
||||
return new R600LowerConstCopy(tm);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -38,16 +38,12 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
Reserved.set(AMDGPU::NEG_ONE);
|
||||
Reserved.set(AMDGPU::PV_X);
|
||||
Reserved.set(AMDGPU::ALU_LITERAL_X);
|
||||
Reserved.set(AMDGPU::ALU_CONST);
|
||||
Reserved.set(AMDGPU::PREDICATE_BIT);
|
||||
Reserved.set(AMDGPU::PRED_SEL_OFF);
|
||||
Reserved.set(AMDGPU::PRED_SEL_ZERO);
|
||||
Reserved.set(AMDGPU::PRED_SEL_ONE);
|
||||
|
||||
for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
|
||||
E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
|
||||
Reserved.set(*I);
|
||||
}
|
||||
|
||||
for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
|
||||
E = MFI->ReservedRegs.end(); I != E; ++I) {
|
||||
Reserved.set(*I);
|
||||
|
@ -27,10 +27,6 @@ foreach Index = 0-127 in {
|
||||
foreach Chan = [ "X", "Y", "Z", "W" ] in {
|
||||
// 32-bit Temporary Registers
|
||||
def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
|
||||
|
||||
// 32-bit Constant Registers (There are more than 128, this the number
|
||||
// that is currently supported.
|
||||
def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>;
|
||||
}
|
||||
// 128-bit Temporary Registers
|
||||
def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
|
||||
@ -64,13 +60,11 @@ def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
|
||||
|
||||
def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
|
||||
(add (sequence "ArrayBase%u", 448, 464))>;
|
||||
|
||||
def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
|
||||
(add (interleave
|
||||
(interleave (sequence "C%u_X", 0, 127),
|
||||
(sequence "C%u_Z", 0, 127)),
|
||||
(interleave (sequence "C%u_Y", 0, 127),
|
||||
(sequence "C%u_W", 0, 127))))>;
|
||||
// special registers for ALU src operands
|
||||
// const buffer reference, SRCx_SEL contains index
|
||||
def ALU_CONST : R600Reg<"CBuf", 0>;
|
||||
// interpolation param reference, SRCx_SEL contains index
|
||||
def ALU_PARAM : R600Reg<"Param", 0>;
|
||||
|
||||
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
|
||||
(add (sequence "T%u_X", 0, 127))>;
|
||||
@ -85,15 +79,15 @@ def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32,
|
||||
(add (sequence "T%u_W", 0, 127))>;
|
||||
|
||||
def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
|
||||
(add (interleave
|
||||
(interleave R600_TReg32_X, R600_TReg32_Z),
|
||||
(interleave R600_TReg32_Y, R600_TReg32_W)))>;
|
||||
(interleave R600_TReg32_X, R600_TReg32_Y,
|
||||
R600_TReg32_Z, R600_TReg32_W)>;
|
||||
|
||||
def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
||||
R600_TReg32,
|
||||
R600_CReg32,
|
||||
R600_ArrayBase,
|
||||
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
|
||||
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
|
||||
ALU_CONST, ALU_PARAM
|
||||
)>;
|
||||
|
||||
def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
|
||||
PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
|
||||
|
Loading…
x
Reference in New Issue
Block a user