llvm/lib/Target/R600/R600LowerConstCopy.cpp
Vincent Lejeune 7867885737 R600: Do not fold single instruction with more that 3 kcache read
It fixes around 100 tfb piglit tests and 16 glean tests.

NOTE: This is a candidate for the Mesa stable branch.

Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175183 91177308-0d34-0410-b5e6-96231b3b80d8
2013-02-14 16:57:19 +00:00

223 lines
7.4 KiB
C++

//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr.
/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot
/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try
/// to fold them if possible or replace them by MOV otherwise.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "R600InstrInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/GlobalValue.h"
namespace llvm {
class R600LowerConstCopy : public MachineFunctionPass {
private:
static char ID;
const R600InstrInfo *TII;
struct ConstPairs {
unsigned XYPair;
unsigned ZWPair;
};
bool canFoldInBundle(ConstPairs &UsedConst, unsigned ReadConst) const;
public:
R600LowerConstCopy(TargetMachine &tm);
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; }
};
char R600LowerConstCopy::ID = 0;
R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) :
MachineFunctionPass(ID),
TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo()))
{
}
bool R600LowerConstCopy::canFoldInBundle(ConstPairs &UsedConst,
unsigned ReadConst) const {
unsigned ReadConstChan = ReadConst & 3;
unsigned ReadConstIndex = ReadConst & (~3);
if (ReadConstChan < 2) {
if (!UsedConst.XYPair) {
UsedConst.XYPair = ReadConstIndex;
}
return UsedConst.XYPair == ReadConstIndex;
} else {
if (!UsedConst.ZWPair) {
UsedConst.ZWPair = ReadConstIndex;
}
return UsedConst.ZWPair == ReadConstIndex;
}
}
static bool isControlFlow(const MachineInstr &MI) {
return (MI.getOpcode() == AMDGPU::IF_PREDICATE_SET) ||
(MI.getOpcode() == AMDGPU::ENDIF) ||
(MI.getOpcode() == AMDGPU::ELSE) ||
(MI.getOpcode() == AMDGPU::WHILELOOP) ||
(MI.getOpcode() == AMDGPU::BREAK);
}
bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
DenseMap<unsigned, MachineInstr *> RegToConstIndex;
for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
E = MBB.instr_end(); I != E;) {
if (I->getOpcode() == AMDGPU::CONST_COPY) {
MachineInstr &MI = *I;
I = llvm::next(I);
unsigned DstReg = MI.getOperand(0).getReg();
DenseMap<unsigned, MachineInstr *>::iterator SrcMI =
RegToConstIndex.find(DstReg);
if (SrcMI != RegToConstIndex.end()) {
SrcMI->second->eraseFromParent();
RegToConstIndex.erase(SrcMI);
}
MachineInstr *NewMI =
TII->buildDefaultInstruction(MBB, &MI, AMDGPU::MOV,
MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
MI.getOperand(1).getImm());
RegToConstIndex[DstReg] = NewMI;
MI.eraseFromParent();
continue;
}
std::vector<unsigned> Defs;
// We consider all Instructions as bundled because algorithm that handle
// const read port limitations inside an IG is still valid with single
// instructions.
std::vector<MachineInstr *> Bundle;
if (I->isBundle()) {
unsigned BundleSize = I->getBundleSize();
for (unsigned i = 0; i < BundleSize; i++) {
I = llvm::next(I);
Bundle.push_back(I);
}
} else if (TII->isALUInstr(I->getOpcode())){
Bundle.push_back(I);
} else if (isControlFlow(*I)) {
RegToConstIndex.clear();
I = llvm::next(I);
continue;
} else {
MachineInstr &MI = *I;
for (MachineInstr::mop_iterator MOp = MI.operands_begin(),
MOpE = MI.operands_end(); MOp != MOpE; ++MOp) {
MachineOperand &MO = *MOp;
if (!MO.isReg())
continue;
if (MO.isDef()) {
Defs.push_back(MO.getReg());
} else {
// Either a TEX or an Export inst, prevent from erasing def of used
// operand
RegToConstIndex.erase(MO.getReg());
for (MCSubRegIterator SR(MO.getReg(), &TII->getRegisterInfo());
SR.isValid(); ++SR) {
RegToConstIndex.erase(*SR);
}
}
}
}
R600Operands::Ops OpTable[3][2] = {
{R600Operands::SRC0, R600Operands::SRC0_SEL},
{R600Operands::SRC1, R600Operands::SRC1_SEL},
{R600Operands::SRC2, R600Operands::SRC2_SEL},
};
for(std::vector<MachineInstr *>::iterator It = Bundle.begin(),
ItE = Bundle.end(); It != ItE; ++It) {
MachineInstr *MI = *It;
if (TII->isPredicated(MI)) {
// We don't want to erase previous assignment
RegToConstIndex.erase(MI->getOperand(0).getReg());
} else {
int WriteIDX = TII->getOperandIdx(MI->getOpcode(), R600Operands::WRITE);
if (WriteIDX < 0 || MI->getOperand(WriteIDX).getImm())
Defs.push_back(MI->getOperand(0).getReg());
}
}
ConstPairs CP = {0,0};
for (unsigned SrcOp = 0; SrcOp < 3; SrcOp++) {
for(std::vector<MachineInstr *>::iterator It = Bundle.begin(),
ItE = Bundle.end(); It != ItE; ++It) {
MachineInstr *MI = *It;
int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[SrcOp][0]);
if (SrcIdx < 0)
continue;
MachineOperand &MO = MI->getOperand(SrcIdx);
DenseMap<unsigned, MachineInstr *>::iterator SrcMI =
RegToConstIndex.find(MO.getReg());
if (SrcMI != RegToConstIndex.end()) {
MachineInstr *CstMov = SrcMI->second;
int ConstMovSel =
TII->getOperandIdx(CstMov->getOpcode(), R600Operands::SRC0_SEL);
unsigned ConstIndex = CstMov->getOperand(ConstMovSel).getImm();
if (MI->isInsideBundle() && canFoldInBundle(CP, ConstIndex)) {
TII->setImmOperand(MI, OpTable[SrcOp][1], ConstIndex);
MI->getOperand(SrcIdx).setReg(AMDGPU::ALU_CONST);
} else {
RegToConstIndex.erase(SrcMI);
}
}
}
}
for (std::vector<unsigned>::iterator It = Defs.begin(), ItE = Defs.end();
It != ItE; ++It) {
DenseMap<unsigned, MachineInstr *>::iterator SrcMI =
RegToConstIndex.find(*It);
if (SrcMI != RegToConstIndex.end()) {
SrcMI->second->eraseFromParent();
RegToConstIndex.erase(SrcMI);
}
}
I = llvm::next(I);
}
if (MBB.succ_empty()) {
for (DenseMap<unsigned, MachineInstr *>::iterator
DI = RegToConstIndex.begin(), DE = RegToConstIndex.end();
DI != DE; ++DI) {
DI->second->eraseFromParent();
}
}
}
return false;
}
FunctionPass *createR600LowerConstCopy(TargetMachine &tm) {
return new R600LowerConstCopy(tm);
}
}