Initialize SUnits before DAG building.

Affect on SD scheduling and postRA scheduling: Printing the DAG will display the nodes in top-down topological order. This matches the order within the MBB and makes my life much easier in general. Affect on misched: We don't need to track virtual register uses at all. This is awesome. I also intend to rely on the SUnit ID as a topo-sort index. So if A < B then we cannot have an edge B -> A. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151135 91177308-0d34-0410-b5e6-96231b3b80d8
2025-01-30 00:32:53 +00:00 · 2012-02-22 06:08:11 +00:00 · 2012-02-22 06:08:11 +00:00 · b4566a9999
commit b4566a9999
parent 44d23825d6
6 changed files with 115 additions and 76 deletions
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@ -231,6 +231,7 @@ namespace llvm {
  public:
    SUnit *OrigNode;                    // If not this, the node from which
                                        // this node was cloned.
+                                        // (SD scheduling only)

    // Preds/Succs - The SUnits before/after us in the graph.
    SmallVector<SDep, 4> Preds;  // All sunit predecessors.
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@ -427,6 +427,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,

  // Keep a map of the MachineInstr*'s back to the SUnit representing them.
  // This is used for updating debug information.
+  //
+  // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap
  DenseMap<MachineInstr*,const SUnit*> MISUnitMap;

  // Find the node at the bottom of the critical path.
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@ -46,7 +46,7 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {

  // Finally, just to provide a stable ordering, use the node number as a
  // deciding factor.
-  return LHSNum < RHSNum;
+  return RHSNum < LHSNum;
 }


--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@ -149,7 +149,8 @@ protected:
  MachineScheduler *Pass;
 public:
  ScheduleTopDownLive(MachineScheduler *P):
-    ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false), Pass(P) {}
+    ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false, P->LIS),
+    Pass(P) {}

  /// ScheduleDAGInstrs callback.
  void Schedule();
@ -310,7 +311,8 @@ class DefaultMachineScheduler : public ScheduleDAGInstrs {
  MachineScheduler *Pass;
 public:
  DefaultMachineScheduler(MachineScheduler *P):
-    ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false), Pass(P) {}
+    ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false, P->LIS),
+    Pass(P) {}

  /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
  /// time to do some work.
@ -348,15 +350,14 @@ void DefaultMachineScheduler::Schedule() {

 #ifndef NDEBUG
 namespace {
-// Nodes with a higher number have lower priority. This way we attempt to
+// Nodes with a higher number have higher priority. This way we attempt to
 // schedule the latest instructions earliest.
 //
 // TODO: Relies on the property of the BuildSchedGraph that results in SUnits
-// being ordered in sequence bottom-up. This will be formalized, probably be
-// constructing SUnits in a prepass.
+// being ordered in sequence top-down.
 struct ShuffleSUnitOrder {
  bool operator()(SUnit *A, SUnit *B) const {
-    return A->NodeNum > B->NodeNum;
+    return A->NodeNum < B->NodeNum;
  }
 };

--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@ -17,6 +17,7 @@
 #include "llvm/Operator.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@ -34,11 +35,14 @@ using namespace llvm;
 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                     const MachineLoopInfo &mli,
                                     const MachineDominatorTree &mdt,
-                                     bool IsPostRAFlag)
+                                     bool IsPostRAFlag,
+                                     LiveIntervals *lis)
  : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
    InstrItins(mf.getTarget().getInstrItineraryData()), IsPostRA(IsPostRAFlag),
-    UnitLatencies(false), Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()),
+    LIS(lis), UnitLatencies(false),
+    Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()),
    LoopRegs(MLI, MDT), FirstDbgValue(0) {
+  assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
  DbgValues.clear();
  assert(!(IsPostRA && MF.getRegInfo().getNumVirtRegs()) &&
         "Virtual registers must be removed prior to PostRA scheduling");
@ -357,8 +361,6 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
  const MachineInstr *MI = SU->getInstr();
  unsigned Reg = MI->getOperand(OperIdx).getReg();

-  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
-
  // Add output dependence to the next nearest def of this vreg.
  //
  // Unless this definition is dead, the output dependence should be
@ -366,60 +368,94 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
  // uses. We're conservative for now until we have a way to guarantee the uses
  // are not eliminated sometime during scheduling. The output dependence edge
  // is also useful if output latency exceeds def-use latency.
-  SUnit *DefSU = VRegDefs[Reg];
+  SUnit *&DefSU = VRegDefs[Reg];
  if (DefSU && DefSU != SU && DefSU != &ExitSU) {
    unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx,
                                                DefSU->getInstr());
    DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
  }
-  VRegDefs[Reg] = SU;
-
-  // Add data dependence to any uses of this vreg before the next nearest def.
-  //
-  // TODO: Handle ExitSU properly.
-  //
-  // TODO: Data dependence could be handled more efficiently at the use-side.
-  std::vector<SUnit*> &UseList = VRegUses[Reg];
-  for (std::vector<SUnit*>::const_iterator UI = UseList.begin(),
-         UE = UseList.end(); UI != UE; ++UI) {
-    SUnit *UseSU = *UI;
-    if (UseSU == SU) continue;
-
-    // TODO: Handle "special" address latencies cleanly.
-    const SDep& dep = SDep(SU, SDep::Data, SU->Latency, Reg);
-    if (!UnitLatencies) {
-      // Adjust the dependence latency using operand def/use information, then
-      // allow the target to perform its own adjustments.
-      ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
-      ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
-    }
-    UseSU->addPred(dep);
-  }
-  UseList.clear();
+  DefSU = SU;
 }

-/// addVRegUseDeps - Add register antidependencies from this SUnit to
-/// instructions that occur later in the same scheduling region if they
-/// write the virtual register referenced at OperIdx.
+/// addVRegUseDeps - Add a register data dependency if the instruction that
+/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
+/// register antidependency from this SUnit to instructions that occur later in
+/// the same scheduling region if they write the virtual register.
+///
+/// TODO: Handle ExitSU "uses" properly.
 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
-  unsigned Reg = SU->getInstr()->getOperand(OperIdx).getReg();
+  MachineInstr *MI = SU->getInstr();
+  unsigned Reg = MI->getOperand(OperIdx).getReg();
+
+  // Lookup this operand's reaching definition.
+  assert(LIS && "vreg dependencies requires LiveIntervals");
+  SlotIndex UseIdx = LIS->getSlotIndexes()->getInstructionIndex(MI);
+  LiveInterval *LI = &LIS->getInterval(Reg);
+  VNInfo *VNI = LI->getVNInfoAt(UseIdx);
+  MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
+  if (Def) {
+    SUnit *DefSU = getSUnit(Def);
+    if (DefSU) {
+      // The reaching Def lives within this scheduling region.
+      // Create a data dependence.
+      //
+      // TODO: Handle "special" address latencies cleanly.
+      const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
+      if (!UnitLatencies) {
+        // Adjust the dependence latency using operand def/use information, then
+        // allow the target to perform its own adjustments.
+        ComputeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+        const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+        ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
+      }
+      SU->addPred(dep);
+    }
+  }

  // Add antidependence to the following def of the vreg it uses.
-  SUnit *DefSU = VRegDefs[Reg];
-  if (DefSU && DefSU != SU)
-    DefSU->addPred(SDep(SU, SDep::Anti, 0, Reg));
+  DenseMap<unsigned, SUnit*>::const_iterator I = VRegDefs.find(Reg);
+  if (I != VRegDefs.end()) {
+    SUnit *DefSU = I->second;
+    if (DefSU != SU)
+      DefSU->addPred(SDep(SU, SDep::Anti, 0, Reg));
+  }
+}

-  // Add this SUnit to the use list of the vreg it uses.
-  //
-  // TODO: pinch the DAG before we see too many uses to avoid quadratic
-  // behavior. Limiting the scheduling window can accomplish the same thing.
-  VRegUses[Reg].push_back(SU);
+/// Create an SUnit for each real instruction, numbered in top-down toplological
+/// order. The instruction order A < B, implies that no edge exists from B to A.
+///
+/// Map each real instruction to its SUnit.
+///
+/// After initSUnits, the SUnits vector is cannot be resized and the scheduler
+/// may hang onto SUnit pointers. We may relax this in the future by using SUnit
+/// IDs instead of pointers.
+void ScheduleDAGInstrs::initSUnits() {
+  // We'll be allocating one SUnit for each real instruction in the region,
+  // which is contained within a basic block.
+  SUnits.reserve(BB->size());
+
+  for (MachineBasicBlock::iterator I = Begin; I != InsertPos; ++I) {
+    MachineInstr *MI = I;
+    if (MI->isDebugValue())
+      continue;
+
+    SUnit *SU = NewSUnit(MI);
+    MISUnitMap[MI] = SU;
+
+    SU->isCall = MI->isCall();
+    SU->isCommutable = MI->isCommutable();
+
+    // Assign the Latency field of SU using target-provided information.
+    if (UnitLatencies)
+      SU->Latency = 1;
+    else
+      ComputeLatency(SU);
+  }
 }

 void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
-  // We'll be allocating one SUnit for each instruction, plus one for
-  // the region exit node.
-  SUnits.reserve(BB->size());
+  // Create an SUnit for each real instruction.
+  initSUnits();

  // We build scheduling units by walking a block's instruction list from bottom
  // to top.
@ -447,14 +483,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
    assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs");
  }

-  // Reinitialize the large VReg vectors, while reusing the memory.
-  //
-  // Note: this can be an expensive part of DAG building. We may want to be more
-  // clever. Reevaluate after VRegUses goes away.
-  assert(VRegDefs.size() == 0 && VRegUses.size() == 0 &&
-         "Only BuildSchedGraph should access VRegDefs/Uses");
-  VRegDefs.resize(MF.getRegInfo().getNumVirtRegs());
-  VRegUses.resize(MF.getRegInfo().getNumVirtRegs());
+  assert(VRegDefs.size() == 0 && "Only BuildSchedGraph may access VRegDefs");

  // Walk the list of instructions, from bottom moving up.
  MachineInstr *PrevMI = NULL;
@ -473,16 +502,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {

    assert(!MI->isTerminator() && !MI->isLabel() &&
           "Cannot schedule terminators or labels!");
-    // Create the SUnit for this MI.
-    SUnit *SU = NewSUnit(MI);
-    SU->isCall = MI->isCall();
-    SU->isCommutable = MI->isCommutable();

-    // Assign the Latency field of SU using target-provided information.
-    if (UnitLatencies)
-      SU->Latency = 1;
-    else
-      ComputeLatency(SU);
+    SUnit *SU = MISUnitMap[MI];
+    assert(SU && "No SUnit mapped to this MI");

    // Add register-based dependencies (data, anti, and output).
    for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
@ -657,8 +679,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
    Uses[i].clear();
  }
  VRegDefs.clear();
-  VRegUses.clear();
  PendingLoads.clear();
+  MISUnitMap.clear();
 }

 void ScheduleDAGInstrs::FinishBlock() {
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@ -27,6 +27,7 @@
 namespace llvm {
  class MachineLoopInfo;
  class MachineDominatorTree;
+  class LiveIntervals;

  /// LoopDependencies - This class analyzes loop-oriented register
  /// dependencies, which are used to guide scheduling decisions.
@ -108,6 +109,11 @@ namespace llvm {
    /// isPostRA flag indicates vregs cannot be present.
    bool IsPostRA;

+    /// Live Intervals provides reaching defs in preRA scheduling.
+    LiveIntervals *LIS;
+
+    DenseMap<MachineInstr*, SUnit*> MISUnitMap;
+
    /// UnitLatencies (misnamed) flag avoids computing def-use latencies, using
    /// the def-side latency only.
    bool UnitLatencies;
@ -119,12 +125,9 @@ namespace llvm {
    std::vector<std::vector<SUnit *> > Defs;
    std::vector<std::vector<SUnit *> > Uses;

-    // Virtual register Defs and Uses.
-    //
-    // TODO: Eliminate VRegUses by creating SUnits in a prepass and looking up
-    // the live range's reaching def.
-    IndexedMap<SUnit*, VirtReg2IndexFunctor> VRegDefs;
-    IndexedMap<std::vector<SUnit*>, VirtReg2IndexFunctor> VRegUses;
+    // Track the last instructon in this region defining each virtual register.
+    // FIXME: turn this into a sparse set with constant time clear().
+    DenseMap<unsigned, SUnit*> VRegDefs;

    /// PendingLoads - Remember where unknown loads are after the most recent
    /// unknown store, as we iterate. As with Defs and Uses, this is here
@ -152,7 +155,8 @@ namespace llvm {
    explicit ScheduleDAGInstrs(MachineFunction &mf,
                               const MachineLoopInfo &mli,
                               const MachineDominatorTree &mdt,
-                               bool IsPostRAFlag);
+                               bool IsPostRAFlag,
+                               LiveIntervals *LIS = 0);

    virtual ~ScheduleDAGInstrs() {}

@ -169,6 +173,7 @@ namespace llvm {
      return &SUnits.back();
    }

+
    /// Run - perform scheduling.
    ///
    void Run(MachineBasicBlock *bb,
@ -219,6 +224,14 @@ namespace llvm {
    virtual std::string getGraphNodeLabel(const SUnit *SU) const;

  protected:
+    SUnit *getSUnit(MachineInstr *MI) const {
+      DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI);
+      if (I == MISUnitMap.end())
+        return 0;
+      return I->second;
+    }
+
+    void initSUnits();
    void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
    void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
    void addVRegUseDeps(SUnit *SU, unsigned OperIdx);