R600: improve inputs/interpolation handling

Use one intrinsic for all sorts of interpolation.
Use two separate unexpanded instructions to represent INTERP_XY and _ZW -
this will allow to eliminate one part if it's not used.
Track liveness of special interpolation regs instead of reserving them -
this will allow to reuse those regs, lowering reg pressure.

Patch By: Vadim Girlin

v2[Vincent Lejeune]: Rebased against current llvm master

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174394 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2013-02-05 17:09:14 +00:00
parent ebc535bc4a
commit 29b15a3780
10 changed files with 129 additions and 251 deletions

View File

@ -410,8 +410,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SMIN)
NODE_NAME_CASE(UMIN)
NODE_NAME_CASE(URECIP)
NODE_NAME_CASE(INTERP)
NODE_NAME_CASE(INTERP_P0)
NODE_NAME_CASE(EXPORT)
NODE_NAME_CASE(CONST_ADDRESS)
}

View File

@ -120,8 +120,6 @@ enum {
SMIN,
UMIN,
URECIP,
INTERP,
INTERP_P0,
EXPORT,
CONST_ADDRESS,
LAST_AMDGPU_ISD_NUMBER

View File

@ -55,118 +55,6 @@ FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
return new R600ExpandSpecialInstrsPass(TM);
}
bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
if (MI.getOpcode() != AMDGPU::input_perspective)
return false;
MachineBasicBlock::iterator I = &MI;
unsigned DstReg = MI.getOperand(0).getReg();
R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
->getInfo<R600MachineFunctionInfo>();
unsigned IJIndexBase;
// In Evergreen ISA doc section 8.3.2 :
// We need to interpolate XY and ZW in two different instruction groups.
// An INTERP_* must occupy all 4 slots of an instruction group.
// Output of INTERP_XY is written in X,Y slots
// Output of INTERP_ZW is written in Z,W slots
//
// Thus interpolation requires the following sequences :
//
// AnyGPR.x = INTERP_ZW; (Write Masked Out)
// AnyGPR.y = INTERP_ZW; (Write Masked Out)
// DstGPR.z = INTERP_ZW;
// DstGPR.w = INTERP_ZW; (End of first IG)
// DstGPR.x = INTERP_XY;
// DstGPR.y = INTERP_XY;
// AnyGPR.z = INTERP_XY; (Write Masked Out)
// AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
//
switch (MI.getOperand(1).getImm()) {
case 0:
IJIndexBase = MFI->GetIJPerspectiveIndex();
break;
case 1:
IJIndexBase = MFI->GetIJLinearIndex();
break;
default:
assert(0 && "Unknow ij index");
}
for (unsigned i = 0; i < 8; i++) {
unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
2 * IJIndexBase + ((i + 1) % 2));
unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
MI.getOperand(2).getImm());
unsigned Sel = AMDGPU::sel_x;
switch (i % 4) {
case 0:Sel = AMDGPU::sel_x;break;
case 1:Sel = AMDGPU::sel_y;break;
case 2:Sel = AMDGPU::sel_z;break;
case 3:Sel = AMDGPU::sel_w;break;
default:break;
}
unsigned Res = TRI.getSubReg(DstReg, Sel);
unsigned Opcode = (i < 4)?AMDGPU::INTERP_ZW:AMDGPU::INTERP_XY;
MachineBasicBlock &MBB = *(MI.getParent());
MachineInstr *NewMI =
TII->buildDefaultInstruction(MBB, I, Opcode, Res, IJIndex, ReadReg);
if (!(i> 1 && i < 6)) {
TII->addFlag(NewMI, 0, MO_FLAG_MASK);
}
if (i % 4 != 3)
TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
}
MI.eraseFromParent();
return true;
}
bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
if (MI.getOpcode() != AMDGPU::input_constant)
return false;
MachineBasicBlock::iterator I = &MI;
unsigned DstReg = MI.getOperand(0).getReg();
for (unsigned i = 0; i < 4; i++) {
unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
MI.getOperand(1).getImm());
unsigned Sel = AMDGPU::sel_x;
switch (i % 4) {
case 0:Sel = AMDGPU::sel_x;break;
case 1:Sel = AMDGPU::sel_y;break;
case 2:Sel = AMDGPU::sel_z;break;
case 3:Sel = AMDGPU::sel_w;break;
default:break;
}
unsigned Res = TRI.getSubReg(DstReg, Sel);
MachineBasicBlock &MBB = *(MI.getParent());
MachineInstr *NewMI = TII->buildDefaultInstruction(
MBB, I, AMDGPU::INTERP_LOAD_P0, Res, ReadReg);
if (i % 4 != 3)
TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
}
MI.eraseFromParent();
return true;
}
bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
@ -200,7 +88,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
MI.eraseFromParent();
continue;
}
case AMDGPU::BREAK:
case AMDGPU::BREAK: {
MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
AMDGPU::PRED_SETE_INT,
AMDGPU::PREDICATE_BIT,
@ -214,12 +102,87 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
.addReg(AMDGPU::PREDICATE_BIT);
MI.eraseFromParent();
continue;
}
}
if (ExpandInputPerspective(MI))
continue;
if (ExpandInputConstant(MI))
continue;
case AMDGPU::INTERP_PAIR_XY: {
MachineInstr *BMI;
unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
MI.getOperand(2).getImm());
for (unsigned Chan = 0; Chan < 4; ++Chan) {
unsigned DstReg;
if (Chan < 2)
DstReg = MI.getOperand(Chan).getReg();
else
DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W;
BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY,
DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
if (Chan > 0) {
BMI->bundleWithPred();
}
if (Chan >= 2)
TII->addFlag(BMI, 0, MO_FLAG_MASK);
if (Chan != 3)
TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
}
MI.eraseFromParent();
continue;
}
case AMDGPU::INTERP_PAIR_ZW: {
MachineInstr *BMI;
unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
MI.getOperand(2).getImm());
for (unsigned Chan = 0; Chan < 4; ++Chan) {
unsigned DstReg;
if (Chan < 2)
DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y;
else
DstReg = MI.getOperand(Chan-2).getReg();
BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW,
DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
if (Chan > 0) {
BMI->bundleWithPred();
}
if (Chan < 2)
TII->addFlag(BMI, 0, MO_FLAG_MASK);
if (Chan != 3)
TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
}
MI.eraseFromParent();
continue;
}
case AMDGPU::INTERP_VEC_LOAD: {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
MachineInstr *BMI;
unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
MI.getOperand(1).getImm());
unsigned DstReg = MI.getOperand(0).getReg();
for (unsigned Chan = 0; Chan < 4; ++Chan) {
BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0,
TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg);
if (Chan > 0) {
BMI->bundleWithPred();
}
if (Chan != 3)
TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
}
MI.eraseFromParent();
continue;
}
}
bool IsReduction = TII->isReductionOp(MI.getOpcode());
bool IsVector = TII->isVector(MI);

View File

@ -148,18 +148,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
break;
}
case AMDGPU::RESERVE_REG: {
R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
int64_t ReservedIndex = MI->getOperand(0).getImm();
unsigned ReservedReg =
AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
MFI->ReservedRegs.push_back(ReservedReg);
unsigned SuperReg =
AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4);
MFI->ReservedRegs.push_back(SuperReg);
break;
}
case AMDGPU::TXD: {
unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
@ -244,29 +232,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
break;
}
case AMDGPU::input_perspective: {
R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
// XXX Be more fine about register reservation
for (unsigned i = 0; i < 4; i ++) {
unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
MFI->ReservedRegs.push_back(ReservedReg);
}
switch (MI->getOperand(1).getImm()) {
case 0:// Perspective
MFI->HasPerspectiveInterpolation = true;
break;
case 1:// Linear
MFI->HasLinearInterpolation = true;
break;
default:
assert(0 && "Unknow ij index");
}
return BB;
}
case AMDGPU::EG_ExportSwz:
case AMDGPU::R600_ExportSwz: {
// Instruction is left unmodified if its not the last one of its type
@ -421,38 +386,35 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
}
case AMDGPUIntrinsic::R600_load_input_perspective: {
case AMDGPUIntrinsic::R600_interp_input: {
int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (slot < 0)
return DAG.getUNDEF(MVT::f32);
SDValue FullVector = DAG.getNode(
AMDGPUISD::INTERP,
DL, MVT::v4f32,
DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
}
case AMDGPUIntrinsic::R600_load_input_linear: {
int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (slot < 0)
return DAG.getUNDEF(MVT::f32);
SDValue FullVector = DAG.getNode(
AMDGPUISD::INTERP,
DL, MVT::v4f32,
DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
}
case AMDGPUIntrinsic::R600_load_input_constant: {
int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (slot < 0)
return DAG.getUNDEF(MVT::f32);
SDValue FullVector = DAG.getNode(
AMDGPUISD::INTERP_P0,
DL, MVT::v4f32,
DAG.getConstant(slot / 4 , MVT::i32));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
MachineSDNode *interp;
if (ijb < 0) {
interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
return DAG.getTargetExtractSubreg(
TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
DL, MVT::f32, SDValue(interp, 0));
}
if (slot % 4 < 2)
interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
else
interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
return SDValue(interp, slot % 2);
}
case r600_read_ngroups_x:

View File

@ -104,7 +104,6 @@ bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
switch (Opcode) {
default: return false;
case AMDGPU::RETURN:
case AMDGPU::RESERVE_REG:
return true;
}
}

View File

@ -480,13 +480,17 @@ def isR600toCayman : Predicate<
// R600 SDNodes
//===----------------------------------------------------------------------===//
def INTERP: SDNode<"AMDGPUISD::INTERP",
SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>
>;
def INTERP_PAIR_XY : AMDGPUShaderInst <
(outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
(ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
"INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
[]>;
def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]>
>;
def INTERP_PAIR_ZW : AMDGPUShaderInst <
(outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
(ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
"INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
[]>;
def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
@ -497,21 +501,11 @@ def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
// Interpolation Instructions
//===----------------------------------------------------------------------===//
let usesCustomInserter = 1 in {
def input_perspective : AMDGPUShaderInst <
def INTERP_VEC_LOAD : AMDGPUShaderInst <
(outs R600_Reg128:$dst),
(ins i32imm:$src0, i32imm:$src1),
"input_perspective $src0 $src1 : dst",
[(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>;
} // End usesCustomInserter = 1
def input_constant : AMDGPUShaderInst <
(outs R600_Reg128:$dst),
(ins i32imm:$src),
"input_perspective $src : dst",
[(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>;
(ins i32imm:$src0),
"INTERP_LOAD $src0 : $dst",
[]>;
def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
let bank_swizzle = 5;
@ -1562,12 +1556,6 @@ def MASK_WRITE : AMDGPUShaderInst <
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
def RESERVE_REG : AMDGPUShaderInst <
(outs),
(ins i32imm:$src),
"RESERVE_REG $src",
[(int_AMDGPU_reserve_reg imm:$src)]
>;
def TXD: AMDGPUShaderInst <
(outs R600_Reg128:$dst),
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),

View File

@ -12,15 +12,13 @@
//===----------------------------------------------------------------------===//
let TargetPrefix = "R600", isTarget = 1 in {
def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_R600_load_input_perspective :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_constant :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_linear :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_R600_interp_input :
Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_R600_store_swizzle :
Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_R600_store_stream_output :
Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_R600_store_pixel_color :

View File

@ -13,21 +13,6 @@
using namespace llvm;
R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
: MachineFunctionInfo(),
HasLinearInterpolation(false),
HasPerspectiveInterpolation(false) {
: MachineFunctionInfo() {
memset(Outputs, 0, sizeof(Outputs));
}
unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const {
assert(HasPerspectiveInterpolation);
return 0;
}
unsigned R600MachineFunctionInfo::GetIJLinearIndex() const {
assert(HasLinearInterpolation);
if (HasPerspectiveInterpolation)
return 1;
else
return 0;
}

View File

@ -23,14 +23,7 @@ class R600MachineFunctionInfo : public MachineFunctionInfo {
public:
R600MachineFunctionInfo(const MachineFunction &MF);
std::vector<unsigned> ReservedRegs;
SDNode *Outputs[16];
bool HasLinearInterpolation;
bool HasPerspectiveInterpolation;
unsigned GetIJLinearIndex() const;
unsigned GetIJPerspectiveIndex() const;
};
} // End llvm namespace

View File

@ -28,7 +28,6 @@ R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
Reserved.set(AMDGPU::ZERO);
Reserved.set(AMDGPU::HALF);
@ -44,11 +43,6 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AMDGPU::PRED_SEL_ZERO);
Reserved.set(AMDGPU::PRED_SEL_ONE);
for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
E = MFI->ReservedRegs.end(); I != E; ++I) {
Reserved.set(*I);
}
return Reserved;
}