Use the schedule itinerary operand use/def cycle information to adjust dependence edge latency for post-RA scheduling.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79425 91177308-0d34-0410-b5e6-96231b3b80d8
2025-02-21 13:33:26 +00:00 · 2009-08-19 16:08:58 +00:00 · 2009-08-19 16:08:58 +00:00 · dc4bdcdef1
commit dc4bdcdef1
parent 774350a606
6 changed files with 107 additions and 19 deletions
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@ -495,6 +495,12 @@ namespace llvm {
    ///
    virtual void ComputeLatency(SUnit *SU) = 0;

+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const { };
+
    /// Schedule - Order nodes according to selected style, filling
    /// in the Sequence member.
    ///
--- a/include/llvm/Target/TargetInstrItineraries.h
+++ b/include/llvm/Target/TargetInstrItineraries.h
@ -103,7 +103,7 @@ struct InstrItineraryData {
  /// isEmpty - Returns true if there are no itineraries.
  ///
  bool isEmpty() const { return Itineratries == 0; }
-  
+
  /// beginStage - Return the first stage of the itinerary.
  /// 
  const InstrStage *beginStage(unsigned ItinClassIndx) const {
@ -118,20 +118,17 @@ struct InstrItineraryData {
    return Stages + StageIdx;
  }

-  /// getLatency - Return the scheduling latency of the given class.  A
-  /// simple latency value for an instruction is an over-simplification
-  /// for some architectures, but it's a reasonable first approximation.
+  /// getStageLatency - Return the total stage latency of the given
+  /// class.  The latency is the maximum completion time for any stage
+  /// in the itinerary.
  ///
-  unsigned getLatency(unsigned ItinClassIndx) const {
-    // If the target doesn't provide latency information, use a simple
-    // non-zero default value for all instructions.
+  unsigned getStageLatency(unsigned ItinClassIndx) const {
+    // If the target doesn't provide itinerary information, use a
+    // simple non-zero default value for all instructions.
    if (isEmpty())
      return 1;

-    // Caclulate the maximum completion time for any stage. The
-    // assumption is that all inputs are consumed at the start of the
-    // first stage and that all outputs are produced at the end of the
-    // latest completing last stage.
+    // Calculate the maximum completion time for any stage.
    unsigned Latency = 0, StartCycle = 0;
    for (const InstrStage *IS = beginStage(ItinClassIndx),
           *E = endStage(ItinClassIndx); IS != E; ++IS) {
@ -141,6 +138,21 @@ struct InstrItineraryData {

    return Latency;
  }
+
+  /// getOperandCycle - Return the cycle for the given class and
+  /// operand. Return -1 if no cycle is specified for the operand.
+  ///
+  int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const {
+    if (isEmpty())
+      return -1;
+
+    unsigned FirstIdx = Itineratries[ItinClassIndx].FirstOperandCycle;
+    unsigned LastIdx = Itineratries[ItinClassIndx].LastOperandCycle;
+    if ((FirstIdx + OperandIdx) >= LastIdx)
+      return -1;
+
+    return (int)OperandCycles[FirstIdx + OperandIdx];
+  }
 };


--- a/include/llvm/Target/TargetSubtarget.h
+++ b/include/llvm/Target/TargetSubtarget.h
@ -17,6 +17,7 @@
 namespace llvm {

 class SDep;
+class SUnit;

 //===----------------------------------------------------------------------===//
 ///
@ -40,7 +41,8 @@ public:

  // adjustSchedDependency - Perform target specific adjustments to
  // the latency of a schedule dependency.
-  virtual void adjustSchedDependency(SDep&) const { };
+  virtual void adjustSchedDependency(SUnit *def, SUnit *use, 
+                                     SDep& dep) const { };
 };

 } // End llvm namespace
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@ -210,6 +210,10 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
            // Optionally add in a special extra latency for nodes that
            // feed addresses.
            // TODO: Do this for register aliases too.
+            // TODO: Perhaps we should get rid of
+            // SpecialAddressLatency and just move this into
+            // adjustSchedDependency for the targets that care about
+            // it.
            if (SpecialAddressLatency != 0 && !UnitLatencies) {
              MachineInstr *UseMI = UseSU->getInstr();
              const TargetInstrDesc &UseTID = UseMI->getDesc();
@ -220,8 +224,14 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
                  UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
                LDataLatency += SpecialAddressLatency;
            }
+            // Adjust the dependence latency using operand def/use
+            // information (if any), and then allow the target to
+            // perform its own adjustments.
            const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
-            ST.adjustSchedDependency((SDep &)dep);
+            if (!UnitLatencies) {
+              ComputeOperandLatency(SU, UseSU, (SDep &)dep);
+              ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+            }
            UseSU->addPred(dep);
          }
        }
@ -231,7 +241,10 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
            SUnit *UseSU = UseList[i];
            if (UseSU != SU) {
              const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
-              ST.adjustSchedDependency((SDep &)dep);
+              if (!UnitLatencies) {
+                ComputeOperandLatency(SU, UseSU, (SDep &)dep);
+                ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+              }
              UseSU->addPred(dep);
            }
          }
@ -410,7 +423,7 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {

  // Compute the latency for the node.
  SU->Latency =
-    InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass());
+    InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass());

  // Simplistic target-independent heuristic: assume that loads take
  // extra time.
@ -419,6 +432,50 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
      SU->Latency += 2;
 }

+void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 
+                                              SDep& dep) const {
+  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+  if (InstrItins.isEmpty())
+    return;
+  
+  // For a data dependency with a known register...
+  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
+    return;
+
+  const unsigned Reg = dep.getReg();
+
+  // ... find the definition of the register in the defining
+  // instruction
+  MachineInstr *DefMI = Def->getInstr();
+  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
+  if (DefIdx != -1) {
+    int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx);
+    if (DefCycle >= 0) {
+      MachineInstr *UseMI = Use->getInstr();
+      const unsigned UseClass = UseMI->getDesc().getSchedClass();
+
+      // For all uses of the register, calculate the maxmimum latency
+      int Latency = -1;
+      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = UseMI->getOperand(i);
+        if (!MO.isReg() || !MO.isUse())
+          continue;
+        unsigned MOReg = MO.getReg();
+        if (MOReg != Reg)
+          continue;
+
+        int UseCycle = InstrItins.getOperandCycle(UseClass, i);
+        if (UseCycle >= 0)
+          Latency = std::max(Latency, DefCycle - UseCycle + 1);
+      }
+
+      // If we found a latency, then replace the existing dependence latency.
+      if (Latency >= 0)
+        dep.setLatency(Latency);
+    }
+  }
+}
+
 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
  SU->getInstr()->dump();
 }
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@ -160,6 +160,12 @@ namespace llvm {
    ///
    virtual void ComputeLatency(SUnit *SU);

+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const;
+
    virtual MachineBasicBlock *EmitSchedule();

    /// StartBlock - Prepare to perform scheduling in the given block.
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@ -155,6 +155,9 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
 void ScheduleDAGSDNodes::AddSchedEdges() {
  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();

+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
  // Pass 2: add the preds, succs, etc.
  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
    SUnit *SU = &SUnits[su];
@ -212,8 +215,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() {

        const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                               OpSU->Latency, PhysReg);
-        if (!isChain)
-          ST.adjustSchedDependency((SDep &)dep);
+        if (!isChain && !UnitLatencies) {
+          ComputeOperandLatency(OpSU, SU, (SDep &)dep);
+          ST.adjustSchedDependency(OpSU, SU, (SDep &)dep);
+        }

        SU->addPred(dep);
      }
@ -242,8 +247,8 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
  for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
    if (N->isMachineOpcode()) {
      SawMachineOpcode = true;
-      SU->Latency +=
-        InstrItins.getLatency(TII->get(N->getMachineOpcode()).getSchedClass());
+      SU->Latency += InstrItins.
+        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
    }
 }