Post RA scheduler changes. Introduce a hazard recognizer that uses the target schedule information to accurately model the pipeline. Update the scheduler to correctly handle multi-issue targets.

llvm-svn: 78563
This commit is contained in:
David Goodwin 2009-08-10 15:55:25 +00:00
parent 0a08d1bb9c
commit 3245141543
8 changed files with 342 additions and 82 deletions

View File

@ -43,6 +43,11 @@ public:
return NoHazard;
}
/// Reset - This callback is invoked when a new block of
/// instructions is about to be schedule. The hazard state should be
/// set to an initialized state.
virtual void Reset() {}
/// EmitInstruction - This callback is invoked when an instruction is
/// emitted, to advance the hazard state.
virtual void EmitInstruction(SUnit *) {}

View File

@ -20,9 +20,9 @@ namespace llvm {
//===----------------------------------------------------------------------===//
/// Instruction stage - These values represent a step in the execution of an
/// instruction. The latency represents the number of discrete time slots used
/// need to complete the stage. Units represent the choice of functional units
/// that can be used to complete the stage. Eg. IntUnit1, IntUnit2.
/// instruction. The latency represents the number of discrete time slots
/// needed to complete the stage. Units represent the choice of functional
/// units that can be used to complete the stage. Eg. IntUnit1, IntUnit2.
///
struct InstrStage {
unsigned Cycles; ///< Length of stage in machine cycles
@ -84,7 +84,9 @@ struct InstrItineraryData {
if (isEmpty())
return 1;
// Just sum the cycle count for each stage.
// Just sum the cycle count for each stage. The assumption is that all
// inputs are consumed at the start of the first stage and that all
// outputs are produced at the end of the last stage.
unsigned Latency = 0;
for (const InstrStage *IS = begin(ItinClassIndx), *E = end(ItinClassIndx);
IS != E; ++IS)

View File

@ -6,6 +6,7 @@ add_llvm_library(LLVMCodeGen
DwarfEHPrepare.cpp
ELFCodeEmitter.cpp
ELFWriter.cpp
ExactHazardRecognizer.cpp
GCMetadata.cpp
GCMetadataPrinter.cpp
GCStrategy.cpp

View File

@ -0,0 +1,149 @@
//===----- ExactHazardRecognizer.cpp - hazard recognizer -------- ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This implements a a hazard recognizer using the instructions itineraries
// defined for the current target.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "exact-hazards"
#include "ExactHazardRecognizer.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrItineraries.h"
namespace llvm {
ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData) :
ScheduleHazardRecognizer(), ItinData(LItinData)
{
// Determine the maximum depth of any itinerary. This determines the
// depth of the scoreboard. We always make the scoreboard at least 1
// cycle deep to avoid dealing with the boundary condition.
ScoreboardDepth = 1;
if (!ItinData.isEmpty()) {
for (unsigned idx = 0; ; ++idx) {
// If the begin stage of an itinerary has 0 cycles and units,
// then we have reached the end of the itineraries.
const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
if ((IS->Cycles == 0) && (IS->Units == 0))
break;
unsigned ItinDepth = 0;
for (; IS != E; ++IS)
ItinDepth += IS->Cycles;
ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
}
}
Scoreboard = new unsigned[ScoreboardDepth];
ScoreboardHead = 0;
DOUT << "Using exact hazard recognizer: ScoreboardDepth = "
<< ScoreboardDepth << '\n';
}
ExactHazardRecognizer::~ExactHazardRecognizer() {
delete Scoreboard;
}
void ExactHazardRecognizer::Reset() {
memset(Scoreboard, 0, ScoreboardDepth * sizeof(unsigned));
ScoreboardHead = 0;
}
unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) {
return (ScoreboardHead + offset) % ScoreboardDepth;
}
void ExactHazardRecognizer::dumpScoreboard() {
DOUT << "Scoreboard:\n";
unsigned last = ScoreboardDepth - 1;
while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0))
last--;
for (unsigned i = 0; i <= last; i++) {
unsigned FUs = Scoreboard[getFutureIndex(i)];
DOUT << "\t";
for (int j = 31; j >= 0; j--)
DOUT << ((FUs & (1 << j)) ? '1' : '0');
DOUT << '\n';
}
}
ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU) {
unsigned cycle = 0;
// Use the itinerary for the underlying instruction to check for
// free FU's in the scoreboard at the appropriate future cycles.
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
IS != E; ++IS) {
// We must find one of the stage's units free for every cycle the
// stage is occupied.
for (unsigned int i = 0; i < IS->Cycles; ++i) {
assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
unsigned index = getFutureIndex(cycle);
unsigned freeUnits = IS->Units & ~Scoreboard[index];
if (!freeUnits) {
DOUT << "*** Hazard in cycle " << cycle << ", ";
DOUT << "SU(" << SU->NodeNum << "): ";
DEBUG(SU->getInstr()->dump());
return Hazard;
}
++cycle;
}
}
return NoHazard;
}
void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
unsigned cycle = 0;
// Use the itinerary for the underlying instruction to reserve FU's
// in the scoreboard at the appropriate future cycles.
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
IS != E; ++IS) {
// We must reserve one of the stage's units for every cycle the
// stage is occupied.
for (unsigned int i = 0; i < IS->Cycles; ++i) {
assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
unsigned index = getFutureIndex(cycle);
unsigned freeUnits = IS->Units & ~Scoreboard[index];
// reduce to a single unit
unsigned freeUnit = 0;
do {
freeUnit = freeUnits;
freeUnits = freeUnit & (freeUnit - 1);
} while (freeUnits);
assert(freeUnit && "No function unit available!");
Scoreboard[index] |= freeUnit;
++cycle;
}
}
DEBUG(dumpScoreboard());
}
void ExactHazardRecognizer::AdvanceCycle() {
Scoreboard[ScoreboardHead] = 0;
ScoreboardHead = getFutureIndex(1);
}
} /* namespace llvm */

View File

@ -0,0 +1,61 @@
//=- llvm/CodeGen/ExactHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the ExactHazardRecognizer class, which
// implements hazard-avoidance heuristics for scheduling, based on the
// scheduling itineraries specified for the target.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H
#define LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Target/TargetInstrItineraries.h"
namespace llvm {
class ExactHazardRecognizer : public ScheduleHazardRecognizer {
// Itinerary data for the target.
const InstrItineraryData &ItinData;
// Scoreboard to track function unit usage. Scoreboard[0] is a
// mask of the FUs in use in the cycle currently being
// schedule. Scoreboard[1] is a mask for the next cycle. The
// Scoreboard is used as a circular buffer with the current cycle
// indicated by ScoreboardHead.
unsigned *Scoreboard;
// The maximum number of cycles monitored by the Scoreboard. This
// value is determined based on the target itineraries to ensure
// that all hazards can be tracked.
unsigned ScoreboardDepth;
// Indices into the Scoreboard that represent the current cycle.
unsigned ScoreboardHead;
// Return the scoreboard index to use for 'offset' cycles in the
// future. 'offset' of 0 returns ScoreboardHead.
unsigned getFutureIndex(unsigned offset);
// Print the scoreboard.
void dumpScoreboard();
public:
ExactHazardRecognizer(const InstrItineraryData &ItinData);
~ExactHazardRecognizer();
virtual HazardType getHazardType(SUnit *SU);
virtual void Reset();
virtual void EmitInstruction(SUnit *SU);
virtual void AdvanceCycle();
};
}
#endif

View File

@ -19,6 +19,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "post-RA-sched"
#include "ExactHazardRecognizer.h"
#include "SimpleHazardRecognizer.h"
#include "ScheduleDAGInstrs.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
@ -49,8 +51,8 @@ EnableAntiDepBreaking("break-anti-dependencies",
static cl::opt<bool>
EnablePostRAHazardAvoidance("avoid-hazards",
cl::desc("Enable simple hazard-avoidance"),
cl::init(true), cl::Hidden);
cl::desc("Enable exact hazard avoidance"),
cl::init(false), cl::Hidden);
namespace {
class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass {
@ -156,62 +158,6 @@ namespace {
void ListScheduleTopDown();
bool BreakAntiDependencies();
};
/// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses
/// a coarse classification and attempts to avoid that instructions of
/// a given class aren't grouped too densely together.
class SimpleHazardRecognizer : public ScheduleHazardRecognizer {
/// Class - A simple classification for SUnits.
enum Class {
Other, Load, Store
};
/// Window - The Class values of the most recently issued
/// instructions.
Class Window[8];
/// getClass - Classify the given SUnit.
Class getClass(const SUnit *SU) {
const MachineInstr *MI = SU->getInstr();
const TargetInstrDesc &TID = MI->getDesc();
if (TID.mayLoad())
return Load;
if (TID.mayStore())
return Store;
return Other;
}
/// Step - Rotate the existing entries in Window and insert the
/// given class value in position as the most recent.
void Step(Class C) {
std::copy(Window+1, array_endof(Window), Window);
Window[array_lengthof(Window)-1] = C;
}
public:
SimpleHazardRecognizer() : Window() {}
virtual HazardType getHazardType(SUnit *SU) {
Class C = getClass(SU);
if (C == Other)
return NoHazard;
unsigned Score = 0;
for (unsigned i = 0; i != array_lengthof(Window); ++i)
if (Window[i] == C)
Score += i + 1;
if (Score > array_lengthof(Window) * 2)
return Hazard;
return NoHazard;
}
virtual void EmitInstruction(SUnit *SU) {
Step(getClass(SU));
}
virtual void AdvanceCycle() {
Step(Other);
}
};
}
/// isSchedulingBoundary - Test if the given instruction should be
@ -241,9 +187,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData();
ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ?
new SimpleHazardRecognizer :
new ScheduleHazardRecognizer();
(ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) :
(ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR);
@ -289,6 +236,9 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
// Call the superclass.
ScheduleDAGInstrs::StartBlock(BB);
// Reset the hazard recognizer.
HazardRec->Reset();
// Clear out the register class data.
std::fill(Classes, array_endof(Classes),
static_cast<const TargetRegisterClass *>(0));
@ -380,6 +330,9 @@ void SchedulePostRATDList::Schedule() {
}
}
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this));
AvailableQueue.initNodes(SUnits);
ListScheduleTopDown();
@ -872,13 +825,6 @@ void SchedulePostRATDList::ListScheduleTopDown() {
MinDepth = PendingQueue[i]->getDepth();
}
// If there are no instructions available, don't try to issue anything, and
// don't advance the hazard recognizer.
if (AvailableQueue.empty()) {
CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1;
continue;
}
SUnit *FoundSUnit = 0;
bool HasNoopHazards = false;
@ -909,10 +855,14 @@ void SchedulePostRATDList::ListScheduleTopDown() {
ScheduleNodeTopDown(FoundSUnit, CurCycle);
HazardRec->EmitInstruction(FoundSUnit);
// If this is a pseudo-op node, we don't want to increment the current
// cycle.
if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
++CurCycle;
// If we are using the target-specific hazards, then don't
// advance the cycle time just because we schedule a node. If
// the target allows it we can schedule multiple nodes in the
// same cycle.
if (!EnablePostRAHazardAvoidance) {
if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
++CurCycle;
}
} else if (!HasNoopHazards) {
// Otherwise, we have a pipeline stall, but no other problem, just advance
// the current cycle and try again.

View File

@ -174,16 +174,20 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
std::vector<SUnit *> &UseList = Uses[Reg];
std::vector<SUnit *> &DefList = Defs[Reg];
// Optionally add output and anti dependencies.
// TODO: Using a latency of 1 here assumes there's no cost for
// reusing registers.
// Optionally add output and anti dependencies. For anti
// dependencies we use a latency of 0 because for a multi-issue
// target we want to allow the defining instruction to issue
// in the same cycle as the using instruction.
// TODO: Using a latency of 1 here for output dependencies assumes
// there's no cost for reusing registers.
SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
SUnit *DefSU = DefList[i];
if (DefSU != SU &&
(Kind != SDep::Output || !MO.isDead() ||
!DefSU->getInstr()->registerDefIsDead(Reg)))
DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg));
DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
}
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
std::vector<SUnit *> &DefList = Defs[*Alias];
@ -192,7 +196,7 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
if (DefSU != SU &&
(Kind != SDep::Output || !MO.isDead() ||
!DefSU->getInstr()->registerDefIsDead(Reg)))
DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias));
DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
}
}
@ -399,8 +403,7 @@ void ScheduleDAGInstrs::FinishBlock() {
void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
// Compute the latency for the node. We use the sum of the latencies for
// all nodes flagged together into this SUnit.
// Compute the latency for the node.
SU->Latency =
InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass());

View File

@ -0,0 +1,89 @@
//=- llvm/CodeGen/SimpleHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the SimpleHazardRecognizer class, which
// implements hazard-avoidance heuristics for scheduling, based on the
// scheduling itineraries specified for the target.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H
#define LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
/// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses
/// a coarse classification and attempts to avoid that instructions of
/// a given class aren't grouped too densely together.
class SimpleHazardRecognizer : public ScheduleHazardRecognizer {
/// Class - A simple classification for SUnits.
enum Class {
Other, Load, Store
};
/// Window - The Class values of the most recently issued
/// instructions.
Class Window[8];
/// getClass - Classify the given SUnit.
Class getClass(const SUnit *SU) {
const MachineInstr *MI = SU->getInstr();
const TargetInstrDesc &TID = MI->getDesc();
if (TID.mayLoad())
return Load;
if (TID.mayStore())
return Store;
return Other;
}
/// Step - Rotate the existing entries in Window and insert the
/// given class value in position as the most recent.
void Step(Class C) {
std::copy(Window+1, array_endof(Window), Window);
Window[array_lengthof(Window)-1] = C;
}
public:
SimpleHazardRecognizer() : Window() {
Reset();
}
virtual HazardType getHazardType(SUnit *SU) {
Class C = getClass(SU);
if (C == Other)
return NoHazard;
unsigned Score = 0;
for (unsigned i = 0; i != array_lengthof(Window); ++i)
if (Window[i] == C)
Score += i + 1;
if (Score > array_lengthof(Window) * 2)
return Hazard;
return NoHazard;
}
virtual void Reset() {
for (unsigned i = 0; i != array_lengthof(Window); ++i)
Window[i] = Other;
}
virtual void EmitInstruction(SUnit *SU) {
Step(getClass(SU));
}
virtual void AdvanceCycle() {
Step(Other);
}
};
}
#endif