mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-05-16 12:05:58 +00:00

to reflect the new license. We understand that people may be surprised that we're moving the header entirely to discuss the new license. We checked this carefully with the Foundation's lawyer and we believe this is the correct approach. Essentially, all code in the project is now made available by the LLVM project under our new license, so you will see that the license headers include that license only. Some of our contributors have contributed code under our old license, and accordingly, we have retained a copy of our old license notice in the top-level files in each project and repository. llvm-svn: 351636
723 lines
23 KiB
C++
723 lines
23 KiB
C++
//===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// \file
|
|
/// This pass compute turns all control flow pseudo instructions into native one
|
|
/// computing their address on the fly; it also sets STACK_SIZE info.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPU.h"
|
|
#include "AMDGPUSubtarget.h"
|
|
#include "R600Defines.h"
|
|
#include "R600InstrInfo.h"
|
|
#include "R600MachineFunctionInfo.h"
|
|
#include "R600RegisterInfo.h"
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
|
#include "llvm/IR/CallingConv.h"
|
|
#include "llvm/IR/DebugLoc.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/Pass.h"
|
|
#include "llvm/Support/Compiler.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/MathExtras.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstdint>
|
|
#include <set>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "r600cf"
|
|
|
|
namespace {
|
|
|
|
struct CFStack {
|
|
enum StackItem {
|
|
ENTRY = 0,
|
|
SUB_ENTRY = 1,
|
|
FIRST_NON_WQM_PUSH = 2,
|
|
FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
|
|
};
|
|
|
|
const R600Subtarget *ST;
|
|
std::vector<StackItem> BranchStack;
|
|
std::vector<StackItem> LoopStack;
|
|
unsigned MaxStackSize;
|
|
unsigned CurrentEntries = 0;
|
|
unsigned CurrentSubEntries = 0;
|
|
|
|
CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
|
|
// We need to reserve a stack entry for CALL_FS in vertex shaders.
|
|
MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
|
|
|
|
unsigned getLoopDepth();
|
|
bool branchStackContains(CFStack::StackItem);
|
|
bool requiresWorkAroundForInst(unsigned Opcode);
|
|
unsigned getSubEntrySize(CFStack::StackItem Item);
|
|
void updateMaxStackSize();
|
|
void pushBranch(unsigned Opcode, bool isWQM = false);
|
|
void pushLoop();
|
|
void popBranch();
|
|
void popLoop();
|
|
};
|
|
|
|
unsigned CFStack::getLoopDepth() {
|
|
return LoopStack.size();
|
|
}
|
|
|
|
bool CFStack::branchStackContains(CFStack::StackItem Item) {
|
|
for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
|
|
E = BranchStack.end(); I != E; ++I) {
|
|
if (*I == Item)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
|
|
if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
|
|
getLoopDepth() > 1)
|
|
return true;
|
|
|
|
if (!ST->hasCFAluBug())
|
|
return false;
|
|
|
|
switch(Opcode) {
|
|
default: return false;
|
|
case R600::CF_ALU_PUSH_BEFORE:
|
|
case R600::CF_ALU_ELSE_AFTER:
|
|
case R600::CF_ALU_BREAK:
|
|
case R600::CF_ALU_CONTINUE:
|
|
if (CurrentSubEntries == 0)
|
|
return false;
|
|
if (ST->getWavefrontSize() == 64) {
|
|
// We are being conservative here. We only require this work-around if
|
|
// CurrentSubEntries > 3 &&
|
|
// (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
|
|
//
|
|
// We have to be conservative, because we don't know for certain that
|
|
// our stack allocation algorithm for Evergreen/NI is correct. Applying this
|
|
// work-around when CurrentSubEntries > 3 allows us to over-allocate stack
|
|
// resources without any problems.
|
|
return CurrentSubEntries > 3;
|
|
} else {
|
|
assert(ST->getWavefrontSize() == 32);
|
|
// We are being conservative here. We only require the work-around if
|
|
// CurrentSubEntries > 7 &&
|
|
// (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
|
|
// See the comment on the wavefront size == 64 case for why we are
|
|
// being conservative.
|
|
return CurrentSubEntries > 7;
|
|
}
|
|
}
|
|
}
|
|
|
|
unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
|
|
switch(Item) {
|
|
default:
|
|
return 0;
|
|
case CFStack::FIRST_NON_WQM_PUSH:
|
|
assert(!ST->hasCaymanISA());
|
|
if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
|
|
// +1 For the push operation.
|
|
// +2 Extra space required.
|
|
return 3;
|
|
} else {
|
|
// Some documentation says that this is not necessary on Evergreen,
|
|
// but experimentation has show that we need to allocate 1 extra
|
|
// sub-entry for the first non-WQM push.
|
|
// +1 For the push operation.
|
|
// +1 Extra space required.
|
|
return 2;
|
|
}
|
|
case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
|
|
assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
|
|
// +1 For the push operation.
|
|
// +1 Extra space required.
|
|
return 2;
|
|
case CFStack::SUB_ENTRY:
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
void CFStack::updateMaxStackSize() {
|
|
unsigned CurrentStackSize =
|
|
CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
|
|
MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
|
|
}
|
|
|
|
void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
|
|
CFStack::StackItem Item = CFStack::ENTRY;
|
|
switch(Opcode) {
|
|
case R600::CF_PUSH_EG:
|
|
case R600::CF_ALU_PUSH_BEFORE:
|
|
if (!isWQM) {
|
|
if (!ST->hasCaymanISA() &&
|
|
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
|
|
Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI
|
|
// See comment in
|
|
// CFStack::getSubEntrySize()
|
|
else if (CurrentEntries > 0 &&
|
|
ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
|
|
!ST->hasCaymanISA() &&
|
|
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
|
|
Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
|
|
else
|
|
Item = CFStack::SUB_ENTRY;
|
|
} else
|
|
Item = CFStack::ENTRY;
|
|
break;
|
|
}
|
|
BranchStack.push_back(Item);
|
|
if (Item == CFStack::ENTRY)
|
|
CurrentEntries++;
|
|
else
|
|
CurrentSubEntries += getSubEntrySize(Item);
|
|
updateMaxStackSize();
|
|
}
|
|
|
|
void CFStack::pushLoop() {
|
|
LoopStack.push_back(CFStack::ENTRY);
|
|
CurrentEntries++;
|
|
updateMaxStackSize();
|
|
}
|
|
|
|
void CFStack::popBranch() {
|
|
CFStack::StackItem Top = BranchStack.back();
|
|
if (Top == CFStack::ENTRY)
|
|
CurrentEntries--;
|
|
else
|
|
CurrentSubEntries-= getSubEntrySize(Top);
|
|
BranchStack.pop_back();
|
|
}
|
|
|
|
void CFStack::popLoop() {
|
|
CurrentEntries--;
|
|
LoopStack.pop_back();
|
|
}
|
|
|
|
class R600ControlFlowFinalizer : public MachineFunctionPass {
|
|
private:
|
|
using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
|
|
|
|
enum ControlFlowInstruction {
|
|
CF_TC,
|
|
CF_VC,
|
|
CF_CALL_FS,
|
|
CF_WHILE_LOOP,
|
|
CF_END_LOOP,
|
|
CF_LOOP_BREAK,
|
|
CF_LOOP_CONTINUE,
|
|
CF_JUMP,
|
|
CF_ELSE,
|
|
CF_POP,
|
|
CF_END
|
|
};
|
|
|
|
const R600InstrInfo *TII = nullptr;
|
|
const R600RegisterInfo *TRI = nullptr;
|
|
unsigned MaxFetchInst;
|
|
const R600Subtarget *ST = nullptr;
|
|
|
|
bool IsTrivialInst(MachineInstr &MI) const {
|
|
switch (MI.getOpcode()) {
|
|
case R600::KILL:
|
|
case R600::RETURN:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
|
|
unsigned Opcode = 0;
|
|
bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
|
|
switch (CFI) {
|
|
case CF_TC:
|
|
Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
|
|
break;
|
|
case CF_VC:
|
|
Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
|
|
break;
|
|
case CF_CALL_FS:
|
|
Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
|
|
break;
|
|
case CF_WHILE_LOOP:
|
|
Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
|
|
break;
|
|
case CF_END_LOOP:
|
|
Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
|
|
break;
|
|
case CF_LOOP_BREAK:
|
|
Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
|
|
break;
|
|
case CF_LOOP_CONTINUE:
|
|
Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
|
|
break;
|
|
case CF_JUMP:
|
|
Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
|
|
break;
|
|
case CF_ELSE:
|
|
Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
|
|
break;
|
|
case CF_POP:
|
|
Opcode = isEg ? R600::POP_EG : R600::POP_R600;
|
|
break;
|
|
case CF_END:
|
|
if (ST->hasCaymanISA()) {
|
|
Opcode = R600::CF_END_CM;
|
|
break;
|
|
}
|
|
Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
|
|
break;
|
|
}
|
|
assert (Opcode && "No opcode selected");
|
|
return TII->get(Opcode);
|
|
}
|
|
|
|
bool isCompatibleWithClause(const MachineInstr &MI,
|
|
std::set<unsigned> &DstRegs) const {
|
|
unsigned DstMI, SrcMI;
|
|
for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
|
|
E = MI.operands_end();
|
|
I != E; ++I) {
|
|
const MachineOperand &MO = *I;
|
|
if (!MO.isReg())
|
|
continue;
|
|
if (MO.isDef()) {
|
|
unsigned Reg = MO.getReg();
|
|
if (R600::R600_Reg128RegClass.contains(Reg))
|
|
DstMI = Reg;
|
|
else
|
|
DstMI = TRI->getMatchingSuperReg(Reg,
|
|
AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
|
|
&R600::R600_Reg128RegClass);
|
|
}
|
|
if (MO.isUse()) {
|
|
unsigned Reg = MO.getReg();
|
|
if (R600::R600_Reg128RegClass.contains(Reg))
|
|
SrcMI = Reg;
|
|
else
|
|
SrcMI = TRI->getMatchingSuperReg(Reg,
|
|
AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
|
|
&R600::R600_Reg128RegClass);
|
|
}
|
|
}
|
|
if ((DstRegs.find(SrcMI) == DstRegs.end())) {
|
|
DstRegs.insert(DstMI);
|
|
return true;
|
|
} else
|
|
return false;
|
|
}
|
|
|
|
ClauseFile
|
|
MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
|
|
const {
|
|
MachineBasicBlock::iterator ClauseHead = I;
|
|
std::vector<MachineInstr *> ClauseContent;
|
|
unsigned AluInstCount = 0;
|
|
bool IsTex = TII->usesTextureCache(*ClauseHead);
|
|
std::set<unsigned> DstRegs;
|
|
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
|
|
if (IsTrivialInst(*I))
|
|
continue;
|
|
if (AluInstCount >= MaxFetchInst)
|
|
break;
|
|
if ((IsTex && !TII->usesTextureCache(*I)) ||
|
|
(!IsTex && !TII->usesVertexCache(*I)))
|
|
break;
|
|
if (!isCompatibleWithClause(*I, DstRegs))
|
|
break;
|
|
AluInstCount ++;
|
|
ClauseContent.push_back(&*I);
|
|
}
|
|
MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
|
|
getHWInstrDesc(IsTex?CF_TC:CF_VC))
|
|
.addImm(0) // ADDR
|
|
.addImm(AluInstCount - 1); // COUNT
|
|
return ClauseFile(MIb, std::move(ClauseContent));
|
|
}
|
|
|
|
void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
|
|
static const unsigned LiteralRegs[] = {
|
|
R600::ALU_LITERAL_X,
|
|
R600::ALU_LITERAL_Y,
|
|
R600::ALU_LITERAL_Z,
|
|
R600::ALU_LITERAL_W
|
|
};
|
|
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
|
|
TII->getSrcs(MI);
|
|
for (const auto &Src:Srcs) {
|
|
if (Src.first->getReg() != R600::ALU_LITERAL_X)
|
|
continue;
|
|
int64_t Imm = Src.second;
|
|
std::vector<MachineOperand *>::iterator It =
|
|
llvm::find_if(Lits, [&](MachineOperand *val) {
|
|
return val->isImm() && (val->getImm() == Imm);
|
|
});
|
|
|
|
// Get corresponding Operand
|
|
MachineOperand &Operand = MI.getOperand(
|
|
TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
|
|
|
|
if (It != Lits.end()) {
|
|
// Reuse existing literal reg
|
|
unsigned Index = It - Lits.begin();
|
|
Src.first->setReg(LiteralRegs[Index]);
|
|
} else {
|
|
// Allocate new literal reg
|
|
assert(Lits.size() < 4 && "Too many literals in Instruction Group");
|
|
Src.first->setReg(LiteralRegs[Lits.size()]);
|
|
Lits.push_back(&Operand);
|
|
}
|
|
}
|
|
}
|
|
|
|
MachineBasicBlock::iterator insertLiterals(
|
|
MachineBasicBlock::iterator InsertPos,
|
|
const std::vector<unsigned> &Literals) const {
|
|
MachineBasicBlock *MBB = InsertPos->getParent();
|
|
for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
|
|
unsigned LiteralPair0 = Literals[i];
|
|
unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
|
|
InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
|
|
TII->get(R600::LITERALS))
|
|
.addImm(LiteralPair0)
|
|
.addImm(LiteralPair1);
|
|
}
|
|
return InsertPos;
|
|
}
|
|
|
|
ClauseFile
|
|
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
|
|
const {
|
|
MachineInstr &ClauseHead = *I;
|
|
std::vector<MachineInstr *> ClauseContent;
|
|
I++;
|
|
for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
|
|
if (IsTrivialInst(*I)) {
|
|
++I;
|
|
continue;
|
|
}
|
|
if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
|
|
break;
|
|
std::vector<MachineOperand *>Literals;
|
|
if (I->isBundle()) {
|
|
MachineInstr &DeleteMI = *I;
|
|
MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
|
|
while (++BI != E && BI->isBundledWithPred()) {
|
|
BI->unbundleFromPred();
|
|
for (MachineOperand &MO : BI->operands()) {
|
|
if (MO.isReg() && MO.isInternalRead())
|
|
MO.setIsInternalRead(false);
|
|
}
|
|
getLiteral(*BI, Literals);
|
|
ClauseContent.push_back(&*BI);
|
|
}
|
|
I = BI;
|
|
DeleteMI.eraseFromParent();
|
|
} else {
|
|
getLiteral(*I, Literals);
|
|
ClauseContent.push_back(&*I);
|
|
I++;
|
|
}
|
|
for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
|
|
MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
|
|
TII->get(R600::LITERALS));
|
|
if (Literals[i]->isImm()) {
|
|
MILit.addImm(Literals[i]->getImm());
|
|
} else {
|
|
MILit.addGlobalAddress(Literals[i]->getGlobal(),
|
|
Literals[i]->getOffset());
|
|
}
|
|
if (i + 1 < e) {
|
|
if (Literals[i + 1]->isImm()) {
|
|
MILit.addImm(Literals[i + 1]->getImm());
|
|
} else {
|
|
MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
|
|
Literals[i + 1]->getOffset());
|
|
}
|
|
} else
|
|
MILit.addImm(0);
|
|
ClauseContent.push_back(MILit);
|
|
}
|
|
}
|
|
assert(ClauseContent.size() < 128 && "ALU clause is too big");
|
|
ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
|
|
return ClauseFile(&ClauseHead, std::move(ClauseContent));
|
|
}
|
|
|
|
void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
|
|
const DebugLoc &DL, ClauseFile &Clause,
|
|
unsigned &CfCount) {
|
|
CounterPropagateAddr(*Clause.first, CfCount);
|
|
MachineBasicBlock *BB = Clause.first->getParent();
|
|
BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
|
|
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
|
|
BB->splice(InsertPos, BB, Clause.second[i]);
|
|
}
|
|
CfCount += 2 * Clause.second.size();
|
|
}
|
|
|
|
void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
|
|
ClauseFile &Clause, unsigned &CfCount) {
|
|
Clause.first->getOperand(0).setImm(0);
|
|
CounterPropagateAddr(*Clause.first, CfCount);
|
|
MachineBasicBlock *BB = Clause.first->getParent();
|
|
BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
|
|
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
|
|
BB->splice(InsertPos, BB, Clause.second[i]);
|
|
}
|
|
CfCount += Clause.second.size();
|
|
}
|
|
|
|
void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
|
|
MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
|
|
}
|
|
void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
|
|
unsigned Addr) const {
|
|
for (MachineInstr *MI : MIs) {
|
|
CounterPropagateAddr(*MI, Addr);
|
|
}
|
|
}
|
|
|
|
public:
|
|
static char ID;
|
|
|
|
R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override {
|
|
ST = &MF.getSubtarget<R600Subtarget>();
|
|
MaxFetchInst = ST->getTexVTXClauseSize();
|
|
TII = ST->getInstrInfo();
|
|
TRI = ST->getRegisterInfo();
|
|
|
|
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
|
|
|
|
CFStack CFStack(ST, MF.getFunction().getCallingConv());
|
|
for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
|
|
++MB) {
|
|
MachineBasicBlock &MBB = *MB;
|
|
unsigned CfCount = 0;
|
|
std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
|
|
std::vector<MachineInstr * > IfThenElseStack;
|
|
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
|
|
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
|
|
getHWInstrDesc(CF_CALL_FS));
|
|
CfCount++;
|
|
}
|
|
std::vector<ClauseFile> FetchClauses, AluClauses;
|
|
std::vector<MachineInstr *> LastAlu(1);
|
|
std::vector<MachineInstr *> ToPopAfter;
|
|
|
|
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
|
I != E;) {
|
|
if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
|
|
LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
|
|
FetchClauses.push_back(MakeFetchClause(MBB, I));
|
|
CfCount++;
|
|
LastAlu.back() = nullptr;
|
|
continue;
|
|
}
|
|
|
|
MachineBasicBlock::iterator MI = I;
|
|
if (MI->getOpcode() != R600::ENDIF)
|
|
LastAlu.back() = nullptr;
|
|
if (MI->getOpcode() == R600::CF_ALU)
|
|
LastAlu.back() = &*MI;
|
|
I++;
|
|
bool RequiresWorkAround =
|
|
CFStack.requiresWorkAroundForInst(MI->getOpcode());
|
|
switch (MI->getOpcode()) {
|
|
case R600::CF_ALU_PUSH_BEFORE:
|
|
if (RequiresWorkAround) {
|
|
LLVM_DEBUG(dbgs()
|
|
<< "Applying bug work-around for ALU_PUSH_BEFORE\n");
|
|
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
|
|
.addImm(CfCount + 1)
|
|
.addImm(1);
|
|
MI->setDesc(TII->get(R600::CF_ALU));
|
|
CfCount++;
|
|
CFStack.pushBranch(R600::CF_PUSH_EG);
|
|
} else
|
|
CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
|
|
LLVM_FALLTHROUGH;
|
|
case R600::CF_ALU:
|
|
I = MI;
|
|
AluClauses.push_back(MakeALUClause(MBB, I));
|
|
LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
|
|
CfCount++;
|
|
break;
|
|
case R600::WHILELOOP: {
|
|
CFStack.pushLoop();
|
|
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
|
getHWInstrDesc(CF_WHILE_LOOP))
|
|
.addImm(1);
|
|
std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
|
|
std::set<MachineInstr *>());
|
|
Pair.second.insert(MIb);
|
|
LoopStack.push_back(std::move(Pair));
|
|
MI->eraseFromParent();
|
|
CfCount++;
|
|
break;
|
|
}
|
|
case R600::ENDLOOP: {
|
|
CFStack.popLoop();
|
|
std::pair<unsigned, std::set<MachineInstr *>> Pair =
|
|
std::move(LoopStack.back());
|
|
LoopStack.pop_back();
|
|
CounterPropagateAddr(Pair.second, CfCount);
|
|
BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
|
|
.addImm(Pair.first + 1);
|
|
MI->eraseFromParent();
|
|
CfCount++;
|
|
break;
|
|
}
|
|
case R600::IF_PREDICATE_SET: {
|
|
LastAlu.push_back(nullptr);
|
|
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
|
getHWInstrDesc(CF_JUMP))
|
|
.addImm(0)
|
|
.addImm(0);
|
|
IfThenElseStack.push_back(MIb);
|
|
LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
|
|
MI->eraseFromParent();
|
|
CfCount++;
|
|
break;
|
|
}
|
|
case R600::ELSE: {
|
|
MachineInstr * JumpInst = IfThenElseStack.back();
|
|
IfThenElseStack.pop_back();
|
|
CounterPropagateAddr(*JumpInst, CfCount);
|
|
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
|
getHWInstrDesc(CF_ELSE))
|
|
.addImm(0)
|
|
.addImm(0);
|
|
LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
|
|
IfThenElseStack.push_back(MIb);
|
|
MI->eraseFromParent();
|
|
CfCount++;
|
|
break;
|
|
}
|
|
case R600::ENDIF: {
|
|
CFStack.popBranch();
|
|
if (LastAlu.back()) {
|
|
ToPopAfter.push_back(LastAlu.back());
|
|
} else {
|
|
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
|
getHWInstrDesc(CF_POP))
|
|
.addImm(CfCount + 1)
|
|
.addImm(1);
|
|
(void)MIb;
|
|
LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
|
|
CfCount++;
|
|
}
|
|
|
|
MachineInstr *IfOrElseInst = IfThenElseStack.back();
|
|
IfThenElseStack.pop_back();
|
|
CounterPropagateAddr(*IfOrElseInst, CfCount);
|
|
IfOrElseInst->getOperand(1).setImm(1);
|
|
LastAlu.pop_back();
|
|
MI->eraseFromParent();
|
|
break;
|
|
}
|
|
case R600::BREAK: {
|
|
CfCount ++;
|
|
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
|
getHWInstrDesc(CF_LOOP_BREAK))
|
|
.addImm(0);
|
|
LoopStack.back().second.insert(MIb);
|
|
MI->eraseFromParent();
|
|
break;
|
|
}
|
|
case R600::CONTINUE: {
|
|
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
|
getHWInstrDesc(CF_LOOP_CONTINUE))
|
|
.addImm(0);
|
|
LoopStack.back().second.insert(MIb);
|
|
MI->eraseFromParent();
|
|
CfCount++;
|
|
break;
|
|
}
|
|
case R600::RETURN: {
|
|
DebugLoc DL = MBB.findDebugLoc(MI);
|
|
BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
|
|
CfCount++;
|
|
if (CfCount % 2) {
|
|
BuildMI(MBB, I, DL, TII->get(R600::PAD));
|
|
CfCount++;
|
|
}
|
|
MI->eraseFromParent();
|
|
for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
|
|
EmitFetchClause(I, DL, FetchClauses[i], CfCount);
|
|
for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
|
|
EmitALUClause(I, DL, AluClauses[i], CfCount);
|
|
break;
|
|
}
|
|
default:
|
|
if (TII->isExport(MI->getOpcode())) {
|
|
LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
|
|
CfCount++;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
|
|
MachineInstr *Alu = ToPopAfter[i];
|
|
BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
|
|
TII->get(R600::CF_ALU_POP_AFTER))
|
|
.addImm(Alu->getOperand(0).getImm())
|
|
.addImm(Alu->getOperand(1).getImm())
|
|
.addImm(Alu->getOperand(2).getImm())
|
|
.addImm(Alu->getOperand(3).getImm())
|
|
.addImm(Alu->getOperand(4).getImm())
|
|
.addImm(Alu->getOperand(5).getImm())
|
|
.addImm(Alu->getOperand(6).getImm())
|
|
.addImm(Alu->getOperand(7).getImm())
|
|
.addImm(Alu->getOperand(8).getImm());
|
|
Alu->eraseFromParent();
|
|
}
|
|
MFI->CFStackSize = CFStack.MaxStackSize;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
StringRef getPassName() const override {
|
|
return "R600 Control Flow Finalizer Pass";
|
|
}
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE,
|
|
"R600 Control Flow Finalizer", false, false)
|
|
INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE,
|
|
"R600 Control Flow Finalizer", false, false)
|
|
|
|
char R600ControlFlowFinalizer::ID = 0;
|
|
|
|
char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID;
|
|
|
|
FunctionPass *llvm::createR600ControlFlowFinalizer() {
|
|
return new R600ControlFlowFinalizer();
|
|
}
|