MI-Sched: Model "reserved" processor resources.

This allows a target to use MI-Sched as an in-order scheduler that will model strict resource conflicts without defining a processor itinerary. Instead, the target can now use the new per-operand machine model and define in-order resources with BufferSize=0. For example, this would allow restricting the type of operations that can be formed into a dispatch group. (Normally NumMicroOps is sufficient to enforce dispatch groups). If the intent is to model latency in in-order pipeline, as opposed to resource conflicts, then a resource with BufferSize=1 should be defined instead. This feature is only casually tested as there are no in-tree targets using it yet. However, Hal will be experimenting with POWER7. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196517 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-20 19:20:23 +00:00 · 2013-12-05 17:56:02 +00:00 · 2013-12-05 17:56:02 +00:00 · 6606ef0e98
commit 6606ef0e98
parent 573931394f
3 changed files with 92 additions and 30 deletions
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@ -292,7 +292,8 @@ namespace llvm {
    bool isScheduleHigh   : 1;          // True if preferable to schedule high.
    bool isScheduleLow    : 1;          // True if preferable to schedule low.
    bool isCloned         : 1;          // True if this node has been cloned.
-    bool isUnbuffered     : 1;          // Reads an unbuffered resource.
+    bool isUnbuffered     : 1;          // Uses an unbuffered resource.
+    bool hasReservedResource : 1;       // Uses a reserved resource.
    Sched::Preference SchedulingPref;   // Scheduling preference.

  private:
@ -318,9 +319,9 @@ namespace llvm {
        hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
        isAvailable(false), isScheduled(false), isScheduleHigh(false),
        isScheduleLow(false), isCloned(false), isUnbuffered(false),
-        SchedulingPref(Sched::None), isDepthCurrent(false),
-        isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0),
-        BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
+        hasReservedResource(false), SchedulingPref(Sched::None),
+        isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
+        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}

    /// SUnit - Construct an SUnit for post-regalloc scheduling to represent
    /// a MachineInstr.
@ -333,9 +334,9 @@ namespace llvm {
        hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
        isAvailable(false), isScheduled(false), isScheduleHigh(false),
        isScheduleLow(false), isCloned(false), isUnbuffered(false),
-        SchedulingPref(Sched::None), isDepthCurrent(false),
-        isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0),
-        BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
+        hasReservedResource(false), SchedulingPref(Sched::None),
+        isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
+        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}

    /// SUnit - Construct a placeholder SUnit.
    SUnit()
@ -347,9 +348,9 @@ namespace llvm {
        hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
        isAvailable(false), isScheduled(false), isScheduleHigh(false),
        isScheduleLow(false), isCloned(false), isUnbuffered(false),
-        SchedulingPref(Sched::None), isDepthCurrent(false),
-        isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0),
-        BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
+        hasReservedResource(false), SchedulingPref(Sched::None),
+        isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
+        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}

    /// \brief Boundary nodes are placeholders for the boundary of the
    /// scheduling region.
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@ -1322,6 +1322,8 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) {
 // GenericScheduler - Implementation of the generic MachineSchedStrategy.
 //===----------------------------------------------------------------------===//

+static const unsigned InvalidCycle = ~0U;
+
 namespace {
 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
 /// the schedule.
@ -1491,6 +1493,10 @@ public:
    // Is the scheduled region resource limited vs. latency limited.
    bool IsResourceLimited;

+    // Record the highest cycle at which each resource has been reserved by a
+    // scheduled instruction.
+    SmallVector<unsigned, 16> ReservedCycles;
+
 #ifndef NDEBUG
    // Remember the greatest operand latency as an upper bound on the number of
    // times we should retry the pending queue because of a hazard.
@ -1518,6 +1524,7 @@ public:
      MaxExecutedResCount = 0;
      ZoneCritResIdx = 0;
      IsResourceLimited = false;
+      ReservedCycles.clear();
 #ifndef NDEBUG
      MaxObservedLatency = 0;
 #endif
@ -1587,6 +1594,8 @@ public:
    /// cycle.
    unsigned getLatencyStallCycles(SUnit *SU);

+    unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles);
+
    bool checkHazard(SUnit *SU);

    unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
@ -1708,8 +1717,10 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
  DAG = dag;
  SchedModel = smodel;
  Rem = rem;
-  if (SchedModel->hasInstrSchedModel())
+  if (SchedModel->hasInstrSchedModel()) {
    ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
+    ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
+  }
 }

 /// Initialize the per-region scheduling policy.
@ -1890,6 +1901,20 @@ unsigned GenericScheduler::SchedBoundary::getLatencyStallCycles(SUnit *SU) {
  return 0;
 }

+/// Compute the next cycle at which the given processor resource can be
+/// scheduled.
+unsigned GenericScheduler::SchedBoundary::
+getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
+  unsigned NextUnreserved = ReservedCycles[PIdx];
+  // If this resource has never been used, always return cycle zero.
+  if (NextUnreserved == InvalidCycle)
+    return 0;
+  // For bottom-up scheduling add the cycles needed for the current operation.
+  if (!isTop())
+    NextUnreserved += Cycles;
+  return NextUnreserved;
+}
+
 /// Does this SU have a hazard within the current instruction group.
 ///
 /// The scheduler supports two modes of hazard recognition. The first is the
@ -1913,6 +1938,15 @@ bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) {
          << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
    return true;
  }
+  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
+    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel->getWriteProcResBegin(SC),
+           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+      if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle)
+        return true;
+    }
+  }
  return false;
 }

@ -2097,7 +2131,7 @@ void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
 /// \return the next cycle at which the instruction may execute without
 /// oversubscribing resources.
 unsigned GenericScheduler::SchedBoundary::
-countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
+countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
  unsigned Factor = SchedModel->getResourceFactor(PIdx);
  unsigned Count = Factor * Cycles;
  DEBUG(dbgs() << "  " << getResourceName(PIdx)
@ -2116,8 +2150,14 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
          << getResourceName(PIdx) << ": "
          << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
  }
-  // TODO: We don't yet model reserved resources. It's not hard though.
-  return CurrCycle;
+  // For reserved resources, record the highest cycle using the resource.
+  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
+  if (NextAvailable > CurrCycle) {
+    DEBUG(dbgs() << "  Resource conflict: "
+          << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
+          << NextAvailable << "\n");
+  }
+  return NextAvailable;
 }

 /// Move the boundary of scheduled code by one SUnit.
@ -2131,25 +2171,17 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
    }
    HazardRec->EmitInstruction(SU);
  }
+  // checkHazard should prevent scheduling multiple instructions per cycle that
+  // exceed the issue width.
  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
-  CurrMOps += IncMOps;
-  // checkHazard prevents scheduling multiple instructions per cycle that exceed
-  // issue width. However, we commonly reach the maximum. In this case
-  // opportunistically bump the cycle to avoid uselessly checking everything in
-  // the readyQ. Furthermore, a single instruction may produce more than one
-  // cycle's worth of micro-ops.
-  //
-  // TODO: Also check if this SU must end a dispatch group.
-  unsigned NextCycle = CurrCycle;
-  if (CurrMOps >= SchedModel->getIssueWidth()) {
-    ++NextCycle;
-    DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
-          << " at cycle " << CurrCycle << '\n');
-  }
+  assert(CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth() &&
+         "Cannot scheduling this instructions MicroOps in the current cycle.");
+
  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
  DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");

+  unsigned NextCycle = CurrCycle;
  switch (SchedModel->getMicroOpBufferSize()) {
  case 0:
    assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
@ -2194,10 +2226,23 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
           PI = SchedModel->getWriteProcResBegin(SC),
           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
      unsigned RCycle =
-        countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle);
+        countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
      if (RCycle > NextCycle)
        NextCycle = RCycle;
    }
+    if (SU->hasReservedResource) {
+      // For reserved resources, record the highest cycle using the resource.
+      // For top-down scheduling, this is the cycle in which we schedule this
+      // instruction plus the number of cycles the operations reserves the
+      // resource. For bottom-up is it simply the instruction's cycle.
+      for (TargetSchedModel::ProcResIter
+             PI = SchedModel->getWriteProcResBegin(SC),
+             PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+        unsigned PIdx = PI->ProcResourceIdx;
+        if (SchedModel->getProcResource(PIdx)->BufferSize == 0)
+          ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
+      }
+    }
  }
  // Update ExpectedLatency and DependentLatency.
  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
@ -2224,6 +2269,16 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
      (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
      > (int)LFactor;
  }
+  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
+  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
+  // one cycle.  Since we commonly reach the max MOps here, opportunistically
+  // bump the cycle to avoid uselessly checking everything in the readyQ.
+  CurrMOps += IncMOps;
+  while (CurrMOps >= SchedModel->getIssueWidth()) {
+    bumpCycle(++NextCycle);
+    DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
+          << " at cycle " << CurrCycle << '\n');
+  }
  DEBUG(dumpScheduledState());
 }

--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@ -697,9 +697,15 @@ void ScheduleDAGInstrs::initSUnits() {
      for (TargetSchedModel::ProcResIter
             PI = SchedModel.getWriteProcResBegin(SC),
             PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
-        if (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize == 1) {
+        switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) {
+        case 0:
+          SU->hasReservedResource = true;
+          break;
+        case 1:
          SU->isUnbuffered = true;
          break;
+        default:
+          break;
        }
      }
    }