mirror of
https://github.com/RPCS3/llvm.git
synced 2025-05-20 12:26:02 +00:00

This change incorporates an effort by Connor Abbot to change how we deal with WWM operations potentially trashing valid values in inactive lanes. Previously, the SIFixWWMLiveness pass would work out which registers were being trashed within WWM regions, and ensure that the register allocator did not have any values it was depending on resident in those registers if the WWM section would trash them. This worked perfectly well, but would cause sometimes severe register pressure when the WWM section resided before divergent control flow (or at least that is where I mostly observed it). This fix instead runs through the WWM sections and pre allocates some registers for WWM. It then reserves these registers so that the register allocator cannot use them. This results in a significant register saving on some WWM shaders I'm working with (130 -> 104 VGPRs, with just this change!). Differential Revision: https://reviews.llvm.org/D59295 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357400 91177308-0d34-0410-b5e6-96231b3b80d8
222 lines
6.0 KiB
C++
222 lines
6.0 KiB
C++
//===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// \file
|
|
/// Pass to pre-allocated WWM registers
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPU.h"
|
|
#include "AMDGPUSubtarget.h"
|
|
#include "SIInstrInfo.h"
|
|
#include "SIRegisterInfo.h"
|
|
#include "SIMachineFunctionInfo.h"
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
#include "llvm/ADT/PostOrderIterator.h"
|
|
#include "llvm/CodeGen/VirtRegMap.h"
|
|
#include "llvm/CodeGen/LiveInterval.h"
|
|
#include "llvm/CodeGen/LiveIntervals.h"
|
|
#include "llvm/CodeGen/LiveRegMatrix.h"
|
|
#include "llvm/CodeGen/MachineDominators.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/CodeGen/RegisterClassInfo.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "si-pre-allocate-wwm-regs"
|
|
|
|
namespace {
|
|
|
|
class SIPreAllocateWWMRegs : public MachineFunctionPass {
|
|
private:
|
|
const SIInstrInfo *TII;
|
|
const SIRegisterInfo *TRI;
|
|
MachineRegisterInfo *MRI;
|
|
LiveIntervals *LIS;
|
|
LiveRegMatrix *Matrix;
|
|
VirtRegMap *VRM;
|
|
RegisterClassInfo RegClassInfo;
|
|
|
|
std::vector<unsigned> RegsToRewrite;
|
|
|
|
public:
|
|
static char ID;
|
|
|
|
SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
|
|
initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.addRequired<LiveIntervals>();
|
|
AU.addPreserved<LiveIntervals>();
|
|
AU.addRequired<VirtRegMap>();
|
|
AU.addRequired<LiveRegMatrix>();
|
|
AU.addPreserved<SlotIndexes>();
|
|
AU.setPreservesCFG();
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
|
|
private:
|
|
bool processDef(MachineOperand &MO);
|
|
void rewriteRegs(MachineFunction &MF);
|
|
};
|
|
|
|
} // End anonymous namespace.
|
|
|
|
INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
|
|
"SI Pre-allocate WWM Registers", false, false)
|
|
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
|
|
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
|
|
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
|
|
INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
|
|
"SI Pre-allocate WWM Registers", false, false)
|
|
|
|
char SIPreAllocateWWMRegs::ID = 0;
|
|
|
|
char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
|
|
|
|
FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
|
|
return new SIPreAllocateWWMRegs();
|
|
}
|
|
|
|
bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
|
|
if (!MO.isReg())
|
|
return false;
|
|
|
|
unsigned Reg = MO.getReg();
|
|
|
|
if (!TRI->isVGPR(*MRI, Reg))
|
|
return false;
|
|
|
|
if (TRI->isPhysicalRegister(Reg))
|
|
return false;
|
|
|
|
if (VRM->hasPhys(Reg))
|
|
return false;
|
|
|
|
LiveInterval &LI = LIS->getInterval(Reg);
|
|
|
|
for (unsigned PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
|
|
if (!MRI->isPhysRegUsed(PhysReg) &&
|
|
Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
|
|
Matrix->assign(LI, PhysReg);
|
|
assert(PhysReg != 0);
|
|
RegsToRewrite.push_back(Reg);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
llvm_unreachable("physreg not found for WWM expression");
|
|
return false;
|
|
}
|
|
|
|
void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
for (MachineInstr &MI : MBB) {
|
|
for (MachineOperand &MO : MI.operands()) {
|
|
if (!MO.isReg())
|
|
continue;
|
|
|
|
const unsigned VirtReg = MO.getReg();
|
|
if (TRI->isPhysicalRegister(VirtReg))
|
|
continue;
|
|
|
|
if (!VRM->hasPhys(VirtReg))
|
|
continue;
|
|
|
|
unsigned PhysReg = VRM->getPhys(VirtReg);
|
|
const unsigned SubReg = MO.getSubReg();
|
|
if (SubReg != 0) {
|
|
PhysReg = TRI->getSubReg(PhysReg, SubReg);
|
|
MO.setSubReg(0);
|
|
}
|
|
|
|
MO.setReg(PhysReg);
|
|
MO.setIsRenamable(false);
|
|
}
|
|
}
|
|
}
|
|
|
|
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
for (unsigned Reg : RegsToRewrite) {
|
|
LIS->removeInterval(Reg);
|
|
|
|
const unsigned PhysReg = VRM->getPhys(Reg);
|
|
assert(PhysReg != 0);
|
|
MFI->ReserveWWMRegister(PhysReg);
|
|
}
|
|
|
|
RegsToRewrite.clear();
|
|
|
|
// Update the set of reserved registers to include WWM ones.
|
|
MRI->freezeReservedRegs(MF);
|
|
}
|
|
|
|
bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
|
|
LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
TII = ST.getInstrInfo();
|
|
TRI = &TII->getRegisterInfo();
|
|
MRI = &MF.getRegInfo();
|
|
|
|
LIS = &getAnalysis<LiveIntervals>();
|
|
Matrix = &getAnalysis<LiveRegMatrix>();
|
|
VRM = &getAnalysis<VirtRegMap>();
|
|
|
|
RegClassInfo.runOnMachineFunction(MF);
|
|
|
|
bool RegsAssigned = false;
|
|
|
|
// We use a reverse post-order traversal of the control-flow graph to
|
|
// guarantee that we visit definitions in dominance order. Since WWM
|
|
// expressions are guaranteed to never involve phi nodes, and we can only
|
|
// escape WWM through the special WWM instruction, this means that this is a
|
|
// perfect elimination order, so we can never do any better.
|
|
ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
|
|
|
|
for (MachineBasicBlock *MBB : RPOT) {
|
|
bool InWWM = false;
|
|
for (MachineInstr &MI : *MBB) {
|
|
if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
|
|
MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
|
|
RegsAssigned |= processDef(MI.getOperand(0));
|
|
|
|
if (MI.getOpcode() == AMDGPU::ENTER_WWM) {
|
|
LLVM_DEBUG(dbgs() << "entering WWM region: " << MI << "\n");
|
|
InWWM = true;
|
|
continue;
|
|
}
|
|
|
|
if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
|
|
LLVM_DEBUG(dbgs() << "exiting WWM region: " << MI << "\n");
|
|
InWWM = false;
|
|
}
|
|
|
|
if (!InWWM)
|
|
continue;
|
|
|
|
LLVM_DEBUG(dbgs() << "processing " << MI << "\n");
|
|
|
|
for (MachineOperand &DefOpnd : MI.defs()) {
|
|
RegsAssigned |= processDef(DefOpnd);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!RegsAssigned)
|
|
return false;
|
|
|
|
rewriteRegs(MF);
|
|
return true;
|
|
}
|