mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-23 11:49:50 +00:00
Correct register pressure calculation in presence of subregs
If a subreg is used in an instruction it counts as a whole superreg for the purpose of register pressure calculation. This patch corrects improper register pressure calculation by examining operand's lane mask. Differential Revision: https://reviews.llvm.org/D29835 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296009 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5487126153
commit
0bf4d71d50
@ -156,7 +156,7 @@ public:
|
||||
const_iterator begin() const { return &PressureChanges[0]; }
|
||||
const_iterator end() const { return &PressureChanges[MaxPSets]; }
|
||||
|
||||
void addPressureChange(unsigned RegUnit, bool IsDec,
|
||||
void addPressureChange(RegisterMaskPair P, bool IsDec,
|
||||
const MachineRegisterInfo *MRI);
|
||||
|
||||
void dump(const TargetRegisterInfo &TRI) const;
|
||||
|
@ -30,6 +30,7 @@ namespace llvm {
|
||||
|
||||
class BitVector;
|
||||
class MachineFunction;
|
||||
class MachineRegisterInfo;
|
||||
class RegScavenger;
|
||||
template<class T> class SmallVectorImpl;
|
||||
class VirtRegMap;
|
||||
@ -719,6 +720,12 @@ public:
|
||||
/// Get the weight in units of pressure for this register unit.
|
||||
virtual unsigned getRegUnitWeight(unsigned RegUnit) const = 0;
|
||||
|
||||
/// Get the weight in units of pressure for a sub register of this register
|
||||
/// unit given a lane mask.
|
||||
virtual unsigned getRegUnitWeight(const MachineRegisterInfo &MRI,
|
||||
unsigned RegUnit,
|
||||
LaneBitmask LaneMask) const;
|
||||
|
||||
/// Get the number of dimensions of register pressure.
|
||||
virtual unsigned getNumRegPressureSets() const = 0;
|
||||
|
||||
|
@ -1085,7 +1085,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
|
||||
continue;
|
||||
|
||||
PressureDiff &PDiff = getPressureDiff(&SU);
|
||||
PDiff.addPressureChange(Reg, Decrement, &MRI);
|
||||
PDiff.addPressureChange(P, Decrement, &MRI);
|
||||
DEBUG(
|
||||
dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
|
||||
<< PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
|
||||
@ -1123,7 +1123,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
|
||||
LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
|
||||
if (LRQ.valueIn() == VNI) {
|
||||
PressureDiff &PDiff = getPressureDiff(SU);
|
||||
PDiff.addPressureChange(Reg, true, &MRI);
|
||||
PDiff.addPressureChange(P, true, &MRI);
|
||||
DEBUG(
|
||||
dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
|
||||
<< *SU->getInstr();
|
||||
|
@ -46,16 +46,29 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
/// Clamp lane masks to maximum posible value.
|
||||
static void clampMasks(const MachineRegisterInfo &MRI, unsigned Reg,
|
||||
LaneBitmask& LaneMask1, LaneBitmask& LaneMask2) {
|
||||
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
|
||||
LaneBitmask Max = MRI.getMaxLaneMaskForVReg(Reg);
|
||||
LaneMask1 &= Max;
|
||||
LaneMask2 &= Max;
|
||||
}
|
||||
}
|
||||
|
||||
/// Increase pressure for each pressure set provided by TargetRegisterInfo.
|
||||
static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
|
||||
const MachineRegisterInfo &MRI, unsigned Reg,
|
||||
LaneBitmask PrevMask, LaneBitmask NewMask) {
|
||||
assert((PrevMask & ~NewMask).none() && "Must not remove bits");
|
||||
if (PrevMask.any() || NewMask.none())
|
||||
|
||||
clampMasks(MRI, Reg, PrevMask, NewMask);
|
||||
if ((NewMask & ~PrevMask).none())
|
||||
return;
|
||||
|
||||
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
|
||||
unsigned Weight = TRI->getRegUnitWeight(MRI, Reg, NewMask & ~PrevMask);
|
||||
PSetIterator PSetI = MRI.getPressureSets(Reg);
|
||||
unsigned Weight = PSetI.getWeight();
|
||||
for (; PSetI.isValid(); ++PSetI)
|
||||
CurrSetPressure[*PSetI] += Weight;
|
||||
}
|
||||
@ -65,11 +78,13 @@ static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
|
||||
const MachineRegisterInfo &MRI, unsigned Reg,
|
||||
LaneBitmask PrevMask, LaneBitmask NewMask) {
|
||||
//assert((NewMask & !PrevMask) == 0 && "Must not add bits");
|
||||
if (NewMask.any() || PrevMask.none())
|
||||
clampMasks(MRI, Reg, PrevMask, NewMask);
|
||||
if ((~NewMask & PrevMask).none())
|
||||
return;
|
||||
|
||||
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
|
||||
unsigned Weight = TRI->getRegUnitWeight(MRI, Reg, ~NewMask & PrevMask);
|
||||
PSetIterator PSetI = MRI.getPressureSets(Reg);
|
||||
unsigned Weight = PSetI.getWeight();
|
||||
for (; PSetI.isValid(); ++PSetI) {
|
||||
assert(CurrSetPressure[*PSetI] >= Weight && "register pressure underflow");
|
||||
CurrSetPressure[*PSetI] -= Weight;
|
||||
@ -139,11 +154,14 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
|
||||
void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
|
||||
LaneBitmask PreviousMask,
|
||||
LaneBitmask NewMask) {
|
||||
if (PreviousMask.any() || NewMask.none())
|
||||
clampMasks(*MRI, RegUnit, PreviousMask, NewMask);
|
||||
if ((NewMask & ~PreviousMask).none())
|
||||
return;
|
||||
|
||||
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
|
||||
unsigned Weight = TRI->getRegUnitWeight(*MRI, RegUnit,
|
||||
NewMask & ~PreviousMask);
|
||||
PSetIterator PSetI = MRI->getPressureSets(RegUnit);
|
||||
unsigned Weight = PSetI.getWeight();
|
||||
for (; PSetI.isValid(); ++PSetI) {
|
||||
CurrSetPressure[*PSetI] += Weight;
|
||||
P.MaxSetPressure[*PSetI] =
|
||||
@ -644,17 +662,19 @@ void PressureDiffs::addInstruction(unsigned Idx,
|
||||
PressureDiff &PDiff = (*this)[Idx];
|
||||
assert(!PDiff.begin()->isValid() && "stale PDiff");
|
||||
for (const RegisterMaskPair &P : RegOpers.Defs)
|
||||
PDiff.addPressureChange(P.RegUnit, true, &MRI);
|
||||
PDiff.addPressureChange(P, true, &MRI);
|
||||
|
||||
for (const RegisterMaskPair &P : RegOpers.Uses)
|
||||
PDiff.addPressureChange(P.RegUnit, false, &MRI);
|
||||
PDiff.addPressureChange(P, false, &MRI);
|
||||
}
|
||||
|
||||
/// Add a change in pressure to the pressure diff of a given instruction.
|
||||
void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
|
||||
void PressureDiff::addPressureChange(RegisterMaskPair P, bool IsDec,
|
||||
const MachineRegisterInfo *MRI) {
|
||||
PSetIterator PSetI = MRI->getPressureSets(RegUnit);
|
||||
int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight();
|
||||
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
|
||||
int Weight = (int)TRI->getRegUnitWeight(*MRI, P.RegUnit, P.LaneMask);
|
||||
PSetIterator PSetI = MRI->getPressureSets(P.RegUnit);
|
||||
if (IsDec) Weight = -Weight;
|
||||
for (; PSetI.isValid(); ++PSetI) {
|
||||
// Find an existing entry in the pressure diff for this PSet.
|
||||
PressureDiff::iterator I = nonconst_begin(), E = nonconst_end();
|
||||
|
@ -412,6 +412,15 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Get the weight in units of pressure for a sub register of this register
|
||||
/// unit given a lane mask.
|
||||
unsigned TargetRegisterInfo::getRegUnitWeight(const MachineRegisterInfo &MRI,
|
||||
unsigned RegUnit,
|
||||
LaneBitmask LaneMask) const {
|
||||
PSetIterator PSetI = MRI.getPressureSets(RegUnit);
|
||||
return PSetI.getWeight();
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
LLVM_DUMP_METHOD
|
||||
void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "llvm/CodeGen/RegisterScavenging.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -1408,3 +1409,18 @@ const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
|
||||
return Empty;
|
||||
return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getRegUnitWeight(const MachineRegisterInfo &MRI,
|
||||
unsigned RegUnit,
|
||||
LaneBitmask LaneMask) const {
|
||||
unsigned Weight = TargetRegisterInfo::getRegUnitWeight(MRI, RegUnit,
|
||||
LaneMask);
|
||||
if (Weight > 1 && LaneMask.any() && !LaneMask.all() &&
|
||||
isVirtualRegister(RegUnit)) {
|
||||
LaneBitmask Max = MRI.getMaxLaneMaskForVReg(RegUnit);
|
||||
if (Max != LaneMask && !Max.all() && !Max.none())
|
||||
Weight = (Weight * countPopulation(LaneMask.getAsInteger())) /
|
||||
countPopulation(Max.getAsInteger());
|
||||
}
|
||||
return Weight;
|
||||
}
|
||||
|
@ -228,6 +228,10 @@ public:
|
||||
|
||||
const int *getRegUnitPressureSets(unsigned RegUnit) const override;
|
||||
|
||||
unsigned getRegUnitWeight(const MachineRegisterInfo &MRI,
|
||||
unsigned RegUnit,
|
||||
LaneBitmask LaneMask) const override;
|
||||
|
||||
private:
|
||||
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
|
||||
unsigned LoadStoreOp,
|
||||
|
@ -424,25 +424,25 @@ define void @global_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16
|
||||
; GCN-NOHSA: buffer_store_dwordx4
|
||||
; GCN-NOHSA: buffer_store_dwordx4
|
||||
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
|
||||
define void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
|
||||
%ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
|
||||
|
67
test/CodeGen/AMDGPU/schedule-regpressure-subregs.mir
Normal file
67
test/CodeGen/AMDGPU/schedule-regpressure-subregs.mir
Normal file
@ -0,0 +1,67 @@
|
||||
# RUN: llc -march=amdgcn -misched=converge -run-pass machine-scheduler -verify-misched %s -o - -debug-only=misched 2>&1 | FileCheck %s
|
||||
# REQUIRES: asserts
|
||||
|
||||
# CHECK-LABEL: ScheduleDAGMILive::schedule starting
|
||||
|
||||
# Check that def and use subregs count with the same weight
|
||||
# CHECK: %vreg9:sub1<def> = V_MUL_LO_I32 %vreg6:sub1, 3
|
||||
# CHECK: Pressure Diff : {{$}}
|
||||
|
||||
# Check that a subreg does not count as a whole superreg
|
||||
# CHECK: %vreg9:sub0<def> = V_MUL_LO_I32 %vreg6:sub0, %vreg9:sub1
|
||||
# CHECK: Pressure Diff : VGPR_32 1{{$}}
|
||||
|
||||
# Check that two subregs of the same register count as a whole register
|
||||
# CHECK: DS_WRITE2_B32 %vreg7, %vreg9:sub0, %vreg9:sub1
|
||||
# CHECK: Pressure Diff : VGPR_32 3{{$}}
|
||||
|
||||
---
|
||||
name: mo_pset
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sreg_128 }
|
||||
- { id: 1, class: sgpr_64 }
|
||||
- { id: 2, class: sreg_32_xm0 }
|
||||
- { id: 3, class: sgpr_32 }
|
||||
- { id: 4, class: vgpr_32 }
|
||||
- { id: 5, class: sreg_32_xm0_xexec }
|
||||
- { id: 6, class: vreg_64 }
|
||||
- { id: 7, class: vgpr_32 }
|
||||
- { id: 8, class: vgpr_32 }
|
||||
- { id: 9, class: vreg_64 }
|
||||
liveins:
|
||||
- { reg: '%sgpr4_sgpr5', virtual-reg: '%1' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
maxCallFrameSize: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: %sgpr4_sgpr5
|
||||
|
||||
%1 = COPY %sgpr4_sgpr5
|
||||
%5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
|
||||
%m0 = S_MOV_B32 -1
|
||||
%7 = COPY %5
|
||||
%6 = DS_READ2_B32 %7, 0, 1, 0, implicit %m0, implicit %exec
|
||||
undef %9.sub1 = V_MUL_LO_I32 %6.sub1, 3, implicit %exec
|
||||
%9.sub0 = V_MUL_LO_I32 %6.sub0, %9.sub1, implicit %exec
|
||||
DS_WRITE2_B32 %7, %9.sub0, %9.sub1, 4, 5, 0, implicit killed %m0, implicit %exec
|
||||
S_ENDPGM
|
||||
|
||||
...
|
Loading…
Reference in New Issue
Block a user