R600: Some factorization

llvm-svn: 182123
This commit is contained in:
Vincent Lejeune 2013-05-17 16:50:02 +00:00
parent bf991c018d
commit d391d51989
5 changed files with 221 additions and 203 deletions

View File

@ -172,22 +172,20 @@ private:
AMDGPU::ALU_LITERAL_Z,
AMDGPU::ALU_LITERAL_W
};
for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs =
TII->getSrcs(MI);
for (unsigned i = 0, e = Srcs.size(); i < e; ++i) {
if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X)
continue;
if (MO.getReg() != AMDGPU::ALU_LITERAL_X)
continue;
unsigned ImmIdx = TII->getOperandIdx(MI->getOpcode(), R600Operands::IMM);
int64_t Imm = MI->getOperand(ImmIdx).getImm();
int64_t Imm = Srcs[i].second;
std::vector<int64_t>::iterator It =
std::find(Lits.begin(), Lits.end(), Imm);
if (It != Lits.end()) {
unsigned Index = It - Lits.begin();
MO.setReg(LiteralRegs[Index]);
Srcs[i].first->setReg(LiteralRegs[Index]);
} else {
assert(Lits.size() < 4 && "Too many literals in Instruction Group");
MO.setReg(LiteralRegs[Lits.size()]);
Srcs[i].first->setReg(LiteralRegs[Lits.size()]);
Lits.push_back(Imm);
}
}

View File

@ -89,31 +89,6 @@ private:
}
}
// Register Idx, then Const value
std::vector<std::pair<unsigned, unsigned> > ExtractConstRead(MachineInstr *MI)
const {
const R600Operands::Ops OpTable[3][2] = {
{R600Operands::SRC0, R600Operands::SRC0_SEL},
{R600Operands::SRC1, R600Operands::SRC1_SEL},
{R600Operands::SRC2, R600Operands::SRC2_SEL},
};
std::vector<std::pair<unsigned, unsigned> > Result;
if (!TII->isALUInstr(MI->getOpcode()))
return Result;
for (unsigned j = 0; j < 3; j++) {
int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[j][0]);
if (SrcIdx < 0)
break;
if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
unsigned Const = MI->getOperand(
TII->getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
Result.push_back(std::pair<unsigned, unsigned>(SrcIdx, Const));
}
}
return Result;
}
std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
// Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
// (See also R600ISelLowering.cpp)
@ -131,9 +106,12 @@ private:
bool SubstituteKCacheBank(MachineInstr *MI,
std::vector<std::pair<unsigned, unsigned> > &CachedConsts) const {
std::vector<std::pair<unsigned, unsigned> > UsedKCache;
std::vector<std::pair<unsigned, unsigned> > Consts = ExtractConstRead(MI);
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Consts =
TII->getSrcs(MI);
assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const");
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
continue;
unsigned Sel = Consts[i].second;
unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
unsigned KCacheIndex = Index * 4 + Chan;
@ -159,19 +137,22 @@ private:
return false;
}
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
switch(UsedKCache[i].first) {
for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
continue;
switch(UsedKCache[j].first) {
case 0:
MI->getOperand(Consts[i].first).setReg(
AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[i].second));
Consts[i].first->setReg(
AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second));
break;
case 1:
MI->getOperand(Consts[i].first).setReg(
AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[i].second));
Consts[i].first->setReg(
AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second));
break;
default:
llvm_unreachable("Wrong Cache Line");
}
j++;
}
return true;
}

View File

@ -168,6 +168,156 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
usesTextureCache(MI->getOpcode());
}
SmallVector<std::pair<MachineOperand *, int64_t>, 3>
R600InstrInfo::getSrcs(MachineInstr *MI) const {
SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
static const R600Operands::Ops OpTable[3][2] = {
{R600Operands::SRC0, R600Operands::SRC0_SEL},
{R600Operands::SRC1, R600Operands::SRC1_SEL},
{R600Operands::SRC2, R600Operands::SRC2_SEL},
};
for (unsigned j = 0; j < 3; j++) {
int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
if (SrcIdx < 0)
break;
MachineOperand &MO = MI->getOperand(SrcIdx);
unsigned Reg = MI->getOperand(SrcIdx).getReg();
if (Reg == AMDGPU::ALU_CONST) {
unsigned Sel = MI->getOperand(
getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
continue;
}
if (Reg == AMDGPU::ALU_LITERAL_X) {
unsigned Imm = MI->getOperand(
getOperandIdx(MI->getOpcode(), R600Operands::IMM)).getImm();
Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
continue;
}
Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
}
return Result;
}
std::vector<std::pair<int, unsigned> >
R600InstrInfo::ExtractSrcs(MachineInstr *MI,
const DenseMap<unsigned, unsigned> &PV)
const {
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
const std::pair<int, unsigned> DummyPair(-1, 0);
std::vector<std::pair<int, unsigned> > Result;
unsigned i = 0;
for (unsigned n = Srcs.size(); i < n; ++i) {
unsigned Reg = Srcs[i].first->getReg();
unsigned Index = RI.getEncodingValue(Reg) & 0xff;
unsigned Chan = RI.getHWRegChan(Reg);
if (Index > 127) {
Result.push_back(DummyPair);
continue;
}
if (PV.find(Index) != PV.end()) {
Result.push_back(DummyPair);
continue;
}
Result.push_back(std::pair<int, unsigned>(Index, Chan));
}
for (; i < 3; ++i)
Result.push_back(DummyPair);
return Result;
}
static std::vector<std::pair<int, unsigned> >
Swizzle(std::vector<std::pair<int, unsigned> > Src,
R600InstrInfo::BankSwizzle Swz) {
switch (Swz) {
case R600InstrInfo::ALU_VEC_012:
break;
case R600InstrInfo::ALU_VEC_021:
std::swap(Src[1], Src[2]);
break;
case R600InstrInfo::ALU_VEC_102:
std::swap(Src[0], Src[1]);
break;
case R600InstrInfo::ALU_VEC_120:
std::swap(Src[0], Src[1]);
std::swap(Src[0], Src[2]);
break;
case R600InstrInfo::ALU_VEC_201:
std::swap(Src[0], Src[2]);
std::swap(Src[0], Src[1]);
break;
case R600InstrInfo::ALU_VEC_210:
std::swap(Src[0], Src[2]);
break;
}
return Src;
}
static bool
isLegal(const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
const std::vector<R600InstrInfo::BankSwizzle> &Swz,
unsigned CheckedSize) {
int Vector[4][3];
memset(Vector, -1, sizeof(Vector));
for (unsigned i = 0; i < CheckedSize; i++) {
const std::vector<std::pair<int, unsigned> > &Srcs =
Swizzle(IGSrcs[i], Swz[i]);
for (unsigned j = 0; j < 3; j++) {
const std::pair<int, unsigned> &Src = Srcs[j];
if (Src.first < 0)
continue;
if (Vector[Src.second][j] < 0)
Vector[Src.second][j] = Src.first;
if (Vector[Src.second][j] != Src.first)
return false;
}
}
return true;
}
static bool recursiveFitsFPLimitation(
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
unsigned Depth = 0) {
if (!isLegal(IGSrcs, SwzCandidate, Depth))
return false;
if (IGSrcs.size() == Depth)
return true;
unsigned i = SwzCandidate[Depth];
for (; i < 6; i++) {
SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i;
if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1))
return true;
}
SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012;
return false;
}
bool
R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
const DenseMap<unsigned, unsigned> &PV,
std::vector<BankSwizzle> &ValidSwizzle)
const {
//Todo : support shared src0 - src1 operand
std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
ValidSwizzle.clear();
for (unsigned i = 0, e = IG.size(); i < e; ++i) {
IGSrcs.push_back(ExtractSrcs(IG[i], PV));
unsigned Op = getOperandIdx(IG[i]->getOpcode(),
R600Operands::BANK_SWIZZLE);
ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
IG[i]->getOperand(Op).getImm());
}
bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle);
if (!Result)
return false;
return true;
}
bool
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
const {
@ -197,34 +347,22 @@ bool
R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
std::vector<unsigned> Consts;
for (unsigned i = 0, n = MIs.size(); i < n; i++) {
const MachineInstr *MI = MIs[i];
const R600Operands::Ops OpTable[3][2] = {
{R600Operands::SRC0, R600Operands::SRC0_SEL},
{R600Operands::SRC1, R600Operands::SRC1_SEL},
{R600Operands::SRC2, R600Operands::SRC2_SEL},
};
MachineInstr *MI = MIs[i];
if (!isALUInstr(MI->getOpcode()))
continue;
for (unsigned j = 0; j < 3; j++) {
int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
if (SrcIdx < 0)
break;
unsigned Reg = MI->getOperand(SrcIdx).getReg();
if (Reg == AMDGPU::ALU_CONST) {
unsigned Const = MI->getOperand(
getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
Consts.push_back(Const);
continue;
}
if (AMDGPU::R600_KC0RegClass.contains(Reg) ||
AMDGPU::R600_KC1RegClass.contains(Reg)) {
unsigned Index = RI.getEncodingValue(Reg) & 0xff;
unsigned Chan = RI.getHWRegChan(Reg);
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Srcs =
getSrcs(MI);
for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
std::pair<MachineOperand *, unsigned> Src = Srcs[j];
if (Src.first->getReg() == AMDGPU::ALU_CONST)
Consts.push_back(Src.second);
if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
unsigned Chan = RI.getHWRegChan(Src.first->getReg());
Consts.push_back((Index << 2) | Chan);
continue;
}
}
}

View File

@ -36,8 +36,19 @@ namespace llvm {
const AMDGPUSubtarget &ST;
int getBranchInstr(const MachineOperand &op) const;
std::vector<std::pair<int, unsigned> >
ExtractSrcs(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV) const;
public:
enum BankSwizzle {
ALU_VEC_012 = 0,
ALU_VEC_021,
ALU_VEC_120,
ALU_VEC_102,
ALU_VEC_201,
ALU_VEC_210
};
explicit R600InstrInfo(AMDGPUTargetMachine &tm);
const R600RegisterInfo &getRegisterInfo() const;
@ -62,6 +73,23 @@ namespace llvm {
bool usesTextureCache(unsigned Opcode) const;
bool usesTextureCache(const MachineInstr *MI) const;
/// \returns a pair for each src of an ALU instructions.
/// The first member of a pair is the register id.
/// If register is ALU_CONST, second member is SEL.
/// If register is ALU_LITERAL, second member is IMM.
/// Otherwise, second member value is undefined.
SmallVector<std::pair<MachineOperand *, int64_t>, 3>
getSrcs(MachineInstr *MI) const;
/// Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210
/// returns true and the first (in lexical order) BankSwizzle affectation
/// starting from the one already provided in the Instruction Group MIs that
/// fits Read Port limitations in BS if available. Otherwise returns false
/// and undefined content in BS.
/// PV holds GPR to PV registers in the Instruction Group MIs.
bool fitsReadPortLimitations(const std::vector<MachineInstr *> &MIs,
const DenseMap<unsigned, unsigned> &PV,
std::vector<BankSwizzle> &BS) const;
bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
bool canBundle(const std::vector<MachineInstr *> &) const;

View File

@ -60,15 +60,6 @@ private:
const R600InstrInfo *TII;
const R600RegisterInfo &TRI;
enum BankSwizzle {
ALU_VEC_012 = 0,
ALU_VEC_021,
ALU_VEC_120,
ALU_VEC_102,
ALU_VEC_201,
ALU_VEC_210
};
unsigned getSlot(const MachineInstr *MI) const {
return TRI.getHWRegChan(MI->getOperand(0).getReg());
}
@ -222,7 +213,9 @@ public:
});
const DenseMap<unsigned, unsigned> &PV =
getPreviousVector(CurrentPacketMIs.front());
bool FitsReadPortLimits = fitsReadPortLimitation(CurrentPacketMIs, PV);
std::vector<R600InstrInfo::BankSwizzle> BS;
bool FitsReadPortLimits =
TII->fitsReadPortLimitations(CurrentPacketMIs, PV, BS);
DEBUG(
if (!FitsReadPortLimits) {
dbgs() << "Couldn't pack :\n";
@ -235,6 +228,14 @@ public:
dbgs() << "because of Read port limitations\n";
});
bool isBundlable = FitsConstLimits && FitsReadPortLimits;
if (isBundlable) {
for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
MachineInstr *MI = CurrentPacketMIs[i];
unsigned Op = TII->getOperandIdx(MI->getOpcode(),
R600Operands::BANK_SWIZZLE);
MI->getOperand(Op).setImm(BS[i]);
}
}
CurrentPacketMIs.pop_back();
if (!isBundlable) {
endPacket(MI->getParent(), MI);
@ -246,134 +247,6 @@ public:
substitutePV(MI, PV);
return VLIWPacketizerList::addToPacket(MI);
}
private:
std::vector<std::pair<int, unsigned> >
ExtractSrcs(const MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV)
const {
R600Operands::Ops Ops[] = {
R600Operands::SRC0,
R600Operands::SRC1,
R600Operands::SRC2
};
std::vector<std::pair<int, unsigned> > Result;
for (unsigned i = 0; i < 3; i++) {
int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
if (OperandIdx < 0){
Result.push_back(std::pair<int, unsigned>(-1,0));
continue;
}
unsigned Src = MI->getOperand(OperandIdx).getReg();
if (PV.find(Src) != PV.end()) {
Result.push_back(std::pair<int, unsigned>(-1,0));
continue;
}
unsigned Reg = TRI.getEncodingValue(Src) & 0xff;
if (Reg > 127) {
Result.push_back(std::pair<int, unsigned>(-1,0));
continue;
}
unsigned Chan = TRI.getHWRegChan(Src);
Result.push_back(std::pair<int, unsigned>(Reg, Chan));
}
return Result;
}
std::vector<std::pair<int, unsigned> >
Swizzle(std::vector<std::pair<int, unsigned> > Src,
BankSwizzle Swz) const {
switch (Swz) {
case ALU_VEC_012:
break;
case ALU_VEC_021:
std::swap(Src[1], Src[2]);
break;
case ALU_VEC_102:
std::swap(Src[0], Src[1]);
break;
case ALU_VEC_120:
std::swap(Src[0], Src[1]);
std::swap(Src[0], Src[2]);
break;
case ALU_VEC_201:
std::swap(Src[0], Src[2]);
std::swap(Src[0], Src[1]);
break;
case ALU_VEC_210:
std::swap(Src[0], Src[2]);
break;
}
return Src;
}
bool isLegal(const std::vector<MachineInstr *> &IG,
const std::vector<BankSwizzle> &Swz,
const DenseMap<unsigned, unsigned> &PV) const {
assert (Swz.size() == IG.size());
int Vector[4][3];
memset(Vector, -1, sizeof(Vector));
for (unsigned i = 0, e = IG.size(); i < e; i++) {
const std::vector<std::pair<int, unsigned> > &Srcs =
Swizzle(ExtractSrcs(IG[i], PV), Swz[i]);
for (unsigned j = 0; j < 3; j++) {
const std::pair<int, unsigned> &Src = Srcs[j];
if (Src.first < 0)
continue;
if (Vector[Src.second][j] < 0)
Vector[Src.second][j] = Src.first;
if (Vector[Src.second][j] != Src.first)
return false;
}
}
return true;
}
bool recursiveFitsFPLimitation(
std::vector<MachineInstr *> IG,
const DenseMap<unsigned, unsigned> &PV,
std::vector<BankSwizzle> &SwzCandidate,
std::vector<MachineInstr *> CurrentlyChecked)
const {
if (!isLegal(CurrentlyChecked, SwzCandidate, PV))
return false;
if (IG.size() == CurrentlyChecked.size()) {
return true;
}
BankSwizzle AvailableSwizzle[] = {
ALU_VEC_012,
ALU_VEC_021,
ALU_VEC_120,
ALU_VEC_102,
ALU_VEC_201,
ALU_VEC_210
};
CurrentlyChecked.push_back(IG[CurrentlyChecked.size()]);
for (unsigned i = 0; i < 6; i++) {
SwzCandidate.push_back(AvailableSwizzle[i]);
if (recursiveFitsFPLimitation(IG, PV, SwzCandidate, CurrentlyChecked))
return true;
SwzCandidate.pop_back();
}
return false;
}
bool fitsReadPortLimitation(
std::vector<MachineInstr *> IG,
const DenseMap<unsigned, unsigned> &PV)
const {
//Todo : support shared src0 - src1 operand
std::vector<BankSwizzle> SwzCandidate;
bool Result = recursiveFitsFPLimitation(IG, PV, SwzCandidate,
std::vector<MachineInstr *>());
if (!Result)
return false;
for (unsigned i = 0, e = IG.size(); i < e; i++) {
MachineInstr *MI = IG[i];
unsigned Op = TII->getOperandIdx(MI->getOpcode(),
R600Operands::BANK_SWIZZLE);
MI->getOperand(Op).setImm(SwzCandidate[i]);
}
return true;
}
};
bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {