mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-30 00:32:53 +00:00
Initialize SUnits before DAG building.
Affect on SD scheduling and postRA scheduling: Printing the DAG will display the nodes in top-down topological order. This matches the order within the MBB and makes my life much easier in general. Affect on misched: We don't need to track virtual register uses at all. This is awesome. I also intend to rely on the SUnit ID as a topo-sort index. So if A < B then we cannot have an edge B -> A. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151135 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
44d23825d6
commit
b4566a9999
@ -231,6 +231,7 @@ namespace llvm {
|
||||
public:
|
||||
SUnit *OrigNode; // If not this, the node from which
|
||||
// this node was cloned.
|
||||
// (SD scheduling only)
|
||||
|
||||
// Preds/Succs - The SUnits before/after us in the graph.
|
||||
SmallVector<SDep, 4> Preds; // All sunit predecessors.
|
||||
|
@ -427,6 +427,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
|
||||
|
||||
// Keep a map of the MachineInstr*'s back to the SUnit representing them.
|
||||
// This is used for updating debug information.
|
||||
//
|
||||
// FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap
|
||||
DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
|
||||
|
||||
// Find the node at the bottom of the critical path.
|
||||
|
@ -46,7 +46,7 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
|
||||
|
||||
// Finally, just to provide a stable ordering, use the node number as a
|
||||
// deciding factor.
|
||||
return LHSNum < RHSNum;
|
||||
return RHSNum < LHSNum;
|
||||
}
|
||||
|
||||
|
||||
|
@ -149,7 +149,8 @@ protected:
|
||||
MachineScheduler *Pass;
|
||||
public:
|
||||
ScheduleTopDownLive(MachineScheduler *P):
|
||||
ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false), Pass(P) {}
|
||||
ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false, P->LIS),
|
||||
Pass(P) {}
|
||||
|
||||
/// ScheduleDAGInstrs callback.
|
||||
void Schedule();
|
||||
@ -310,7 +311,8 @@ class DefaultMachineScheduler : public ScheduleDAGInstrs {
|
||||
MachineScheduler *Pass;
|
||||
public:
|
||||
DefaultMachineScheduler(MachineScheduler *P):
|
||||
ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false), Pass(P) {}
|
||||
ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false, P->LIS),
|
||||
Pass(P) {}
|
||||
|
||||
/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
|
||||
/// time to do some work.
|
||||
@ -348,15 +350,14 @@ void DefaultMachineScheduler::Schedule() {
|
||||
|
||||
#ifndef NDEBUG
|
||||
namespace {
|
||||
// Nodes with a higher number have lower priority. This way we attempt to
|
||||
// Nodes with a higher number have higher priority. This way we attempt to
|
||||
// schedule the latest instructions earliest.
|
||||
//
|
||||
// TODO: Relies on the property of the BuildSchedGraph that results in SUnits
|
||||
// being ordered in sequence bottom-up. This will be formalized, probably be
|
||||
// constructing SUnits in a prepass.
|
||||
// being ordered in sequence top-down.
|
||||
struct ShuffleSUnitOrder {
|
||||
bool operator()(SUnit *A, SUnit *B) const {
|
||||
return A->NodeNum > B->NodeNum;
|
||||
return A->NodeNum < B->NodeNum;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "llvm/Operator.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineMemOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
@ -34,11 +35,14 @@ using namespace llvm;
|
||||
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
|
||||
const MachineLoopInfo &mli,
|
||||
const MachineDominatorTree &mdt,
|
||||
bool IsPostRAFlag)
|
||||
bool IsPostRAFlag,
|
||||
LiveIntervals *lis)
|
||||
: ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
|
||||
InstrItins(mf.getTarget().getInstrItineraryData()), IsPostRA(IsPostRAFlag),
|
||||
UnitLatencies(false), Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()),
|
||||
LIS(lis), UnitLatencies(false),
|
||||
Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()),
|
||||
LoopRegs(MLI, MDT), FirstDbgValue(0) {
|
||||
assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
|
||||
DbgValues.clear();
|
||||
assert(!(IsPostRA && MF.getRegInfo().getNumVirtRegs()) &&
|
||||
"Virtual registers must be removed prior to PostRA scheduling");
|
||||
@ -357,8 +361,6 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
|
||||
const MachineInstr *MI = SU->getInstr();
|
||||
unsigned Reg = MI->getOperand(OperIdx).getReg();
|
||||
|
||||
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
|
||||
|
||||
// Add output dependence to the next nearest def of this vreg.
|
||||
//
|
||||
// Unless this definition is dead, the output dependence should be
|
||||
@ -366,60 +368,94 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
|
||||
// uses. We're conservative for now until we have a way to guarantee the uses
|
||||
// are not eliminated sometime during scheduling. The output dependence edge
|
||||
// is also useful if output latency exceeds def-use latency.
|
||||
SUnit *DefSU = VRegDefs[Reg];
|
||||
SUnit *&DefSU = VRegDefs[Reg];
|
||||
if (DefSU && DefSU != SU && DefSU != &ExitSU) {
|
||||
unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx,
|
||||
DefSU->getInstr());
|
||||
DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
|
||||
}
|
||||
VRegDefs[Reg] = SU;
|
||||
|
||||
// Add data dependence to any uses of this vreg before the next nearest def.
|
||||
//
|
||||
// TODO: Handle ExitSU properly.
|
||||
//
|
||||
// TODO: Data dependence could be handled more efficiently at the use-side.
|
||||
std::vector<SUnit*> &UseList = VRegUses[Reg];
|
||||
for (std::vector<SUnit*>::const_iterator UI = UseList.begin(),
|
||||
UE = UseList.end(); UI != UE; ++UI) {
|
||||
SUnit *UseSU = *UI;
|
||||
if (UseSU == SU) continue;
|
||||
|
||||
// TODO: Handle "special" address latencies cleanly.
|
||||
const SDep& dep = SDep(SU, SDep::Data, SU->Latency, Reg);
|
||||
if (!UnitLatencies) {
|
||||
// Adjust the dependence latency using operand def/use information, then
|
||||
// allow the target to perform its own adjustments.
|
||||
ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
|
||||
ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
|
||||
}
|
||||
UseSU->addPred(dep);
|
||||
}
|
||||
UseList.clear();
|
||||
DefSU = SU;
|
||||
}
|
||||
|
||||
/// addVRegUseDeps - Add register antidependencies from this SUnit to
|
||||
/// instructions that occur later in the same scheduling region if they
|
||||
/// write the virtual register referenced at OperIdx.
|
||||
/// addVRegUseDeps - Add a register data dependency if the instruction that
|
||||
/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
|
||||
/// register antidependency from this SUnit to instructions that occur later in
|
||||
/// the same scheduling region if they write the virtual register.
|
||||
///
|
||||
/// TODO: Handle ExitSU "uses" properly.
|
||||
void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
|
||||
unsigned Reg = SU->getInstr()->getOperand(OperIdx).getReg();
|
||||
MachineInstr *MI = SU->getInstr();
|
||||
unsigned Reg = MI->getOperand(OperIdx).getReg();
|
||||
|
||||
// Lookup this operand's reaching definition.
|
||||
assert(LIS && "vreg dependencies requires LiveIntervals");
|
||||
SlotIndex UseIdx = LIS->getSlotIndexes()->getInstructionIndex(MI);
|
||||
LiveInterval *LI = &LIS->getInterval(Reg);
|
||||
VNInfo *VNI = LI->getVNInfoAt(UseIdx);
|
||||
MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
|
||||
if (Def) {
|
||||
SUnit *DefSU = getSUnit(Def);
|
||||
if (DefSU) {
|
||||
// The reaching Def lives within this scheduling region.
|
||||
// Create a data dependence.
|
||||
//
|
||||
// TODO: Handle "special" address latencies cleanly.
|
||||
const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
|
||||
if (!UnitLatencies) {
|
||||
// Adjust the dependence latency using operand def/use information, then
|
||||
// allow the target to perform its own adjustments.
|
||||
ComputeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
|
||||
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
|
||||
ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
|
||||
}
|
||||
SU->addPred(dep);
|
||||
}
|
||||
}
|
||||
|
||||
// Add antidependence to the following def of the vreg it uses.
|
||||
SUnit *DefSU = VRegDefs[Reg];
|
||||
if (DefSU && DefSU != SU)
|
||||
DefSU->addPred(SDep(SU, SDep::Anti, 0, Reg));
|
||||
DenseMap<unsigned, SUnit*>::const_iterator I = VRegDefs.find(Reg);
|
||||
if (I != VRegDefs.end()) {
|
||||
SUnit *DefSU = I->second;
|
||||
if (DefSU != SU)
|
||||
DefSU->addPred(SDep(SU, SDep::Anti, 0, Reg));
|
||||
}
|
||||
}
|
||||
|
||||
// Add this SUnit to the use list of the vreg it uses.
|
||||
//
|
||||
// TODO: pinch the DAG before we see too many uses to avoid quadratic
|
||||
// behavior. Limiting the scheduling window can accomplish the same thing.
|
||||
VRegUses[Reg].push_back(SU);
|
||||
/// Create an SUnit for each real instruction, numbered in top-down toplological
|
||||
/// order. The instruction order A < B, implies that no edge exists from B to A.
|
||||
///
|
||||
/// Map each real instruction to its SUnit.
|
||||
///
|
||||
/// After initSUnits, the SUnits vector is cannot be resized and the scheduler
|
||||
/// may hang onto SUnit pointers. We may relax this in the future by using SUnit
|
||||
/// IDs instead of pointers.
|
||||
void ScheduleDAGInstrs::initSUnits() {
|
||||
// We'll be allocating one SUnit for each real instruction in the region,
|
||||
// which is contained within a basic block.
|
||||
SUnits.reserve(BB->size());
|
||||
|
||||
for (MachineBasicBlock::iterator I = Begin; I != InsertPos; ++I) {
|
||||
MachineInstr *MI = I;
|
||||
if (MI->isDebugValue())
|
||||
continue;
|
||||
|
||||
SUnit *SU = NewSUnit(MI);
|
||||
MISUnitMap[MI] = SU;
|
||||
|
||||
SU->isCall = MI->isCall();
|
||||
SU->isCommutable = MI->isCommutable();
|
||||
|
||||
// Assign the Latency field of SU using target-provided information.
|
||||
if (UnitLatencies)
|
||||
SU->Latency = 1;
|
||||
else
|
||||
ComputeLatency(SU);
|
||||
}
|
||||
}
|
||||
|
||||
void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
|
||||
// We'll be allocating one SUnit for each instruction, plus one for
|
||||
// the region exit node.
|
||||
SUnits.reserve(BB->size());
|
||||
// Create an SUnit for each real instruction.
|
||||
initSUnits();
|
||||
|
||||
// We build scheduling units by walking a block's instruction list from bottom
|
||||
// to top.
|
||||
@ -447,14 +483,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
|
||||
assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs");
|
||||
}
|
||||
|
||||
// Reinitialize the large VReg vectors, while reusing the memory.
|
||||
//
|
||||
// Note: this can be an expensive part of DAG building. We may want to be more
|
||||
// clever. Reevaluate after VRegUses goes away.
|
||||
assert(VRegDefs.size() == 0 && VRegUses.size() == 0 &&
|
||||
"Only BuildSchedGraph should access VRegDefs/Uses");
|
||||
VRegDefs.resize(MF.getRegInfo().getNumVirtRegs());
|
||||
VRegUses.resize(MF.getRegInfo().getNumVirtRegs());
|
||||
assert(VRegDefs.size() == 0 && "Only BuildSchedGraph may access VRegDefs");
|
||||
|
||||
// Walk the list of instructions, from bottom moving up.
|
||||
MachineInstr *PrevMI = NULL;
|
||||
@ -473,16 +502,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
|
||||
|
||||
assert(!MI->isTerminator() && !MI->isLabel() &&
|
||||
"Cannot schedule terminators or labels!");
|
||||
// Create the SUnit for this MI.
|
||||
SUnit *SU = NewSUnit(MI);
|
||||
SU->isCall = MI->isCall();
|
||||
SU->isCommutable = MI->isCommutable();
|
||||
|
||||
// Assign the Latency field of SU using target-provided information.
|
||||
if (UnitLatencies)
|
||||
SU->Latency = 1;
|
||||
else
|
||||
ComputeLatency(SU);
|
||||
SUnit *SU = MISUnitMap[MI];
|
||||
assert(SU && "No SUnit mapped to this MI");
|
||||
|
||||
// Add register-based dependencies (data, anti, and output).
|
||||
for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
|
||||
@ -657,8 +679,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
|
||||
Uses[i].clear();
|
||||
}
|
||||
VRegDefs.clear();
|
||||
VRegUses.clear();
|
||||
PendingLoads.clear();
|
||||
MISUnitMap.clear();
|
||||
}
|
||||
|
||||
void ScheduleDAGInstrs::FinishBlock() {
|
||||
|
@ -27,6 +27,7 @@
|
||||
namespace llvm {
|
||||
class MachineLoopInfo;
|
||||
class MachineDominatorTree;
|
||||
class LiveIntervals;
|
||||
|
||||
/// LoopDependencies - This class analyzes loop-oriented register
|
||||
/// dependencies, which are used to guide scheduling decisions.
|
||||
@ -108,6 +109,11 @@ namespace llvm {
|
||||
/// isPostRA flag indicates vregs cannot be present.
|
||||
bool IsPostRA;
|
||||
|
||||
/// Live Intervals provides reaching defs in preRA scheduling.
|
||||
LiveIntervals *LIS;
|
||||
|
||||
DenseMap<MachineInstr*, SUnit*> MISUnitMap;
|
||||
|
||||
/// UnitLatencies (misnamed) flag avoids computing def-use latencies, using
|
||||
/// the def-side latency only.
|
||||
bool UnitLatencies;
|
||||
@ -119,12 +125,9 @@ namespace llvm {
|
||||
std::vector<std::vector<SUnit *> > Defs;
|
||||
std::vector<std::vector<SUnit *> > Uses;
|
||||
|
||||
// Virtual register Defs and Uses.
|
||||
//
|
||||
// TODO: Eliminate VRegUses by creating SUnits in a prepass and looking up
|
||||
// the live range's reaching def.
|
||||
IndexedMap<SUnit*, VirtReg2IndexFunctor> VRegDefs;
|
||||
IndexedMap<std::vector<SUnit*>, VirtReg2IndexFunctor> VRegUses;
|
||||
// Track the last instructon in this region defining each virtual register.
|
||||
// FIXME: turn this into a sparse set with constant time clear().
|
||||
DenseMap<unsigned, SUnit*> VRegDefs;
|
||||
|
||||
/// PendingLoads - Remember where unknown loads are after the most recent
|
||||
/// unknown store, as we iterate. As with Defs and Uses, this is here
|
||||
@ -152,7 +155,8 @@ namespace llvm {
|
||||
explicit ScheduleDAGInstrs(MachineFunction &mf,
|
||||
const MachineLoopInfo &mli,
|
||||
const MachineDominatorTree &mdt,
|
||||
bool IsPostRAFlag);
|
||||
bool IsPostRAFlag,
|
||||
LiveIntervals *LIS = 0);
|
||||
|
||||
virtual ~ScheduleDAGInstrs() {}
|
||||
|
||||
@ -169,6 +173,7 @@ namespace llvm {
|
||||
return &SUnits.back();
|
||||
}
|
||||
|
||||
|
||||
/// Run - perform scheduling.
|
||||
///
|
||||
void Run(MachineBasicBlock *bb,
|
||||
@ -219,6 +224,14 @@ namespace llvm {
|
||||
virtual std::string getGraphNodeLabel(const SUnit *SU) const;
|
||||
|
||||
protected:
|
||||
SUnit *getSUnit(MachineInstr *MI) const {
|
||||
DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI);
|
||||
if (I == MISUnitMap.end())
|
||||
return 0;
|
||||
return I->second;
|
||||
}
|
||||
|
||||
void initSUnits();
|
||||
void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
|
||||
void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
|
||||
void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
|
||||
|
Loading…
x
Reference in New Issue
Block a user