[AMDGPU] Split R600 and GCN subregs

These are generated and do not need to have the same values.
We are defining separate subregs for R600 and GCN but then
using AMDGPU subregs on R600.

Differential Revision: https://reviews.llvm.org/D74248
This commit is contained in:
Stanislav Mekhanoshin 2020-02-07 12:08:32 -08:00
parent eea9040f42
commit ed3527c648
12 changed files with 102 additions and 70 deletions

View File

@ -698,6 +698,8 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
// 1 = Vector Register Class
SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
Triple::amdgcn;
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
bool IsRegSeq = true;
unsigned NOps = N->getNumOperands();
@ -707,7 +709,8 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
IsRegSeq = false;
break;
}
unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
: R600RegisterInfo::getSubRegFromChannel(i);
RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
}
@ -717,7 +720,8 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, EltVT);
for (unsigned i = NOps; i < NumVectorElts; ++i) {
unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
: R600RegisterInfo::getSubRegFromChannel(i);
RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(Sub, DL, MVT::i32);

View File

@ -21,61 +21,6 @@ using namespace llvm;
AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
// Table of NumRegs sized pieces at every 32-bit offset.
static const uint16_t SubRegFromChannelTable[][32] = {
{ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31
},
{
AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3, AMDGPU::sub3_sub4,
AMDGPU::sub4_sub5, AMDGPU::sub5_sub6, AMDGPU::sub6_sub7, AMDGPU::sub7_sub8,
AMDGPU::sub8_sub9, AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12,
AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15, AMDGPU::sub15_sub16,
AMDGPU::sub16_sub17, AMDGPU::sub17_sub18, AMDGPU::sub18_sub19, AMDGPU::sub19_sub20,
AMDGPU::sub20_sub21, AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24,
AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27, AMDGPU::sub27_sub28,
AMDGPU::sub28_sub29, AMDGPU::sub29_sub30, AMDGPU::sub30_sub31, AMDGPU::NoSubRegister
},
{
AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3, AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5,
AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7, AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9,
AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11, AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13,
AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15, AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17,
AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19, AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21,
AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23, AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25,
AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27, AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29,
AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister
},
{
AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4, AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6,
AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8, AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10,
AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12, AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14,
AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16, AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18,
AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20, AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22,
AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24, AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26,
AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28, AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30,
AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister
}
};
// FIXME: TableGen should generate something to make this manageable for all
// register classes. At a minimum we could use the opposite of
// composeSubRegIndices and go up from the base 32-bit subreg.
unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel, unsigned NumRegs) {
const unsigned NumRegIndex = NumRegs - 1;
assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) &&
"Not implemented");
assert(Channel < array_lengthof(SubRegFromChannelTable[0]));
return SubRegFromChannelTable[NumRegIndex][Channel];
}
void AMDGPURegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
MCRegAliasIterator R(Reg, this, true);

View File

@ -26,10 +26,6 @@ class TargetInstrInfo;
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
AMDGPURegisterInfo();
/// \returns the sub reg enum value for the given \p Channel
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
void reserveRegisterTuples(BitVector &, unsigned Reg) const;
};

View File

@ -308,7 +308,7 @@ private:
DstMI = Reg;
else
DstMI = TRI->getMatchingSuperReg(Reg,
AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
&R600::R600_Reg128RegClass);
}
if (MO.isUse()) {
@ -317,7 +317,7 @@ private:
SrcMI = Reg;
else
SrcMI = TRI->getMatchingSuperReg(Reg,
AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
&R600::R600_Reg128RegClass);
}
}

View File

@ -219,13 +219,13 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
}
}
if (IsReduction) {
unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(Chan);
unsigned SubRegIndex = R600RegisterInfo::getSubRegFromChannel(Chan);
Src0 = TRI.getSubReg(Src0, SubRegIndex);
Src1 = TRI.getSubReg(Src1, SubRegIndex);
} else if (IsCube) {
static const int CubeSrcSwz[] = {2, 2, 0, 1};
unsigned SubRegIndex0 = AMDGPURegisterInfo::getSubRegFromChannel(CubeSrcSwz[Chan]);
unsigned SubRegIndex1 = AMDGPURegisterInfo::getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
unsigned SubRegIndex0 = R600RegisterInfo::getSubRegFromChannel(CubeSrcSwz[Chan]);
unsigned SubRegIndex1 = R600RegisterInfo::getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
Src1 = TRI.getSubReg(Src0, SubRegIndex1);
Src0 = TRI.getSubReg(Src0, SubRegIndex0);
}
@ -234,7 +234,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
bool Mask = false;
bool NotLast = true;
if (IsCube) {
unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(Chan);
unsigned SubRegIndex = R600RegisterInfo::getSubRegFromChannel(Chan);
DstReg = TRI.getSubReg(DstReg, SubRegIndex);
} else {
// Mask the write if the original instruction does not write to

View File

@ -77,7 +77,7 @@ void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (VectorComponents > 0) {
for (unsigned I = 0; I < VectorComponents; I++) {
unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(I);
unsigned SubRegIndex = R600RegisterInfo::getSubRegFromChannel(I);
buildDefaultInstruction(MBB, MI, R600::MOV,
RI.getSubReg(DestReg, SubRegIndex),
RI.getSubReg(SrcReg, SubRegIndex))

View File

@ -28,6 +28,18 @@ R600RegisterInfo::R600RegisterInfo() : R600GenRegisterInfo(0) {
#define GET_REGINFO_TARGET_DESC
#include "R600GenRegisterInfo.inc"
unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) {
static const uint16_t SubRegFromChannelTable[] = {
R600::sub0, R600::sub1, R600::sub2, R600::sub3,
R600::sub4, R600::sub5, R600::sub6, R600::sub7,
R600::sub8, R600::sub9, R600::sub10, R600::sub11,
R600::sub12, R600::sub13, R600::sub14, R600::sub15
};
assert(Channel < array_lengthof(SubRegFromChannelTable));
return SubRegFromChannelTable[Channel];
}
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());

View File

@ -24,6 +24,10 @@ struct R600RegisterInfo final : public R600GenRegisterInfo {
R600RegisterInfo();
/// \returns the sub reg enum value for the given \p Channel
/// (e.g. getSubRegFromChannel(0) -> R600::sub0)
static unsigned getSubRegFromChannel(unsigned Channel);
BitVector getReservedRegs(const MachineFunction &MF) const override;
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
Register getFrameRegister(const MachineFunction &MF) const override;

View File

@ -154,7 +154,7 @@ bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
.addReg(PrevDst)
.addReg(SubReg)
.addImm(AMDGPURegisterInfo::getSubRegFromChannel(CurrIdx));
.addImm(SIRegisterInfo::getSubRegFromChannel(CurrIdx));
PrevDst = NewDst;
}

View File

@ -3303,7 +3303,7 @@ computeIndirectRegAndOffset(const SIRegisterInfo &TRI,
if (Offset >= NumElts || Offset < 0)
return std::make_pair(AMDGPU::sub0, Offset);
return std::make_pair(AMDGPURegisterInfo::getSubRegFromChannel(Offset), 0);
return std::make_pair(SIRegisterInfo::getSubRegFromChannel(Offset), 0);
}
// Return true if the index is an SGPR and was set.

View File

@ -106,6 +106,73 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
AGPRSetID < NumRegPressureSets);
}
// FIXME: TableGen should generate something to make this manageable for all
// register classes. At a minimum we could use the opposite of
// composeSubRegIndices and go up from the base 32-bit subreg.
unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
unsigned NumRegs) {
// Table of NumRegs sized pieces at every 32-bit offset.
static const uint16_t SubRegFromChannelTable[][32] = {
{AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31},
{AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3,
AMDGPU::sub3_sub4, AMDGPU::sub4_sub5, AMDGPU::sub5_sub6,
AMDGPU::sub6_sub7, AMDGPU::sub7_sub8, AMDGPU::sub8_sub9,
AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12,
AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15,
AMDGPU::sub15_sub16, AMDGPU::sub16_sub17, AMDGPU::sub17_sub18,
AMDGPU::sub18_sub19, AMDGPU::sub19_sub20, AMDGPU::sub20_sub21,
AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24,
AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27,
AMDGPU::sub27_sub28, AMDGPU::sub28_sub29, AMDGPU::sub29_sub30,
AMDGPU::sub30_sub31, AMDGPU::NoSubRegister},
{AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3,
AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5,
AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7,
AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9,
AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11,
AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13,
AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15,
AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17,
AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19,
AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21,
AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23,
AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25,
AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27,
AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29,
AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31,
AMDGPU::NoSubRegister, AMDGPU::NoSubRegister},
{AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4,
AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6,
AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8,
AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10,
AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12,
AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14,
AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16,
AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18,
AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20,
AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22,
AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24,
AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26,
AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28,
AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30,
AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister,
AMDGPU::NoSubRegister, AMDGPU::NoSubRegister}};
const unsigned NumRegIndex = NumRegs - 1;
assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) &&
"Not implemented");
assert(Channel < array_lengthof(SubRegFromChannelTable[0]));
return SubRegFromChannelTable[NumRegIndex][Channel];
}
unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
const MachineFunction &MF) const {
unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;

View File

@ -42,6 +42,10 @@ private:
public:
SIRegisterInfo(const GCNSubtarget &ST);
/// \returns the sub reg enum value for the given \p Channel
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
bool spillSGPRToVGPR() const {
return SpillSGPRToVGPR;
}