mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-15 07:39:31 +00:00
MI-Sched: heuristics using the new latency and machine model.
llvm-svn: 184038
This commit is contained in:
parent
768a74cb96
commit
ba2de01078
@ -1097,7 +1097,7 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) {
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
|
||||
// ConvergingScheduler - Implementation of the generic MachineSchedStrategy.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace {
|
||||
@ -1185,32 +1185,21 @@ public:
|
||||
// Critical path through the DAG in expected latency.
|
||||
unsigned CriticalPath;
|
||||
|
||||
// Scaled count of micro-ops left to schedule.
|
||||
unsigned RemIssueCount;
|
||||
|
||||
// Unscheduled resources
|
||||
SmallVector<unsigned, 16> RemainingCounts;
|
||||
// Critical resource for the unscheduled zone.
|
||||
unsigned CritResIdx;
|
||||
// Number of micro-ops left to schedule.
|
||||
unsigned RemainingMicroOps;
|
||||
|
||||
void reset() {
|
||||
CriticalPath = 0;
|
||||
RemIssueCount = 0;
|
||||
RemainingCounts.clear();
|
||||
CritResIdx = 0;
|
||||
RemainingMicroOps = 0;
|
||||
}
|
||||
|
||||
SchedRemainder() { reset(); }
|
||||
|
||||
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
|
||||
|
||||
unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
|
||||
if (!SchedModel->hasInstrSchedModel())
|
||||
return 0;
|
||||
|
||||
return std::max(
|
||||
RemainingMicroOps * SchedModel->getMicroOpFactor(),
|
||||
RemainingCounts[CritResIdx]);
|
||||
}
|
||||
};
|
||||
|
||||
/// Each Scheduling boundary is associated with ready queues. It tracks the
|
||||
@ -1231,7 +1220,12 @@ public:
|
||||
|
||||
ScheduleHazardRecognizer *HazardRec;
|
||||
|
||||
/// Number of cycles it takes to issue the instructions scheduled in this
|
||||
/// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
|
||||
/// See getStalls().
|
||||
unsigned CurrCycle;
|
||||
|
||||
/// Micro-ops issued in the current cycle
|
||||
unsigned CurrMOps;
|
||||
|
||||
/// MinReadyCycle - Cycle of the soonest available instruction.
|
||||
@ -1241,21 +1235,30 @@ public:
|
||||
unsigned ExpectedLatency;
|
||||
|
||||
// The latency of dependence chains leading into this zone.
|
||||
// For each node scheduled: DLat = max DLat, N.Depth.
|
||||
// For each node scheduled top-down: DLat = max DLat, N.Depth.
|
||||
// For each cycle scheduled: DLat -= 1.
|
||||
unsigned DependentLatency;
|
||||
|
||||
// Resources used in the scheduled zone beyond this boundary.
|
||||
SmallVector<unsigned, 16> ResourceCounts;
|
||||
/// Count the scheduled (issued) micro-ops that can be retired by
|
||||
/// time=CurrCycle assuming the first scheduled instr is retired at time=0.
|
||||
unsigned RetiredMOps;
|
||||
|
||||
// Count scheduled resources that have been executed. Resources are
|
||||
// considered executed if they become ready in the time that it takes to
|
||||
// saturate any resource including the one in question. Counts are scaled
|
||||
// for direct comparison with other resources. Counts ca be compared with
|
||||
// MOps * getMicroOpFactor and Latency * getLatencyFactor.
|
||||
SmallVector<unsigned, 16> ExecutedResCounts;
|
||||
|
||||
/// Cache the max count for a single resource.
|
||||
unsigned MaxExecutedResCount;
|
||||
|
||||
// Cache the critical resources ID in this scheduled zone.
|
||||
unsigned CritResIdx;
|
||||
unsigned ZoneCritResIdx;
|
||||
|
||||
// Is the scheduled region resource limited vs. latency limited.
|
||||
bool IsResourceLimited;
|
||||
|
||||
unsigned ExpectedCount;
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Remember the greatest operand latency as an upper bound on the number of
|
||||
// times we should retry the pending queue because of a hazard.
|
||||
@ -1276,16 +1279,16 @@ public:
|
||||
MinReadyCycle = UINT_MAX;
|
||||
ExpectedLatency = 0;
|
||||
DependentLatency = 0;
|
||||
ResourceCounts.resize(1);
|
||||
assert(!ResourceCounts[0] && "nonzero count for bad resource");
|
||||
CritResIdx = 0;
|
||||
RetiredMOps = 0;
|
||||
MaxExecutedResCount = 0;
|
||||
ZoneCritResIdx = 0;
|
||||
IsResourceLimited = false;
|
||||
ExpectedCount = 0;
|
||||
#ifndef NDEBUG
|
||||
MaxObservedLatency = 0;
|
||||
#endif
|
||||
// Reserve a zero-count for invalid CritResIdx.
|
||||
ResourceCounts.resize(1);
|
||||
ExecutedResCounts.resize(1);
|
||||
assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
|
||||
}
|
||||
|
||||
/// Pending queues extend the ready queues with the same ID and the
|
||||
@ -1306,25 +1309,58 @@ public:
|
||||
return Available.getID() == ConvergingScheduler::TopQID;
|
||||
}
|
||||
|
||||
unsigned getUnscheduledLatency(SUnit *SU) const {
|
||||
if (isTop())
|
||||
return SU->getHeight();
|
||||
return SU->getDepth() + SU->Latency;
|
||||
const char *getResourceName(unsigned PIdx) {
|
||||
if (!PIdx)
|
||||
return "MOps";
|
||||
return SchedModel->getProcResource(PIdx)->Name;
|
||||
}
|
||||
|
||||
/// Get the number of latency cycles "covered" by the scheduled
|
||||
/// instructions. This is the larger of the critical path within the zone
|
||||
/// and the number of cycles required to issue the instructions.
|
||||
unsigned getScheduledLatency() const {
|
||||
return std::max(ExpectedLatency, CurrCycle);
|
||||
}
|
||||
|
||||
unsigned getUnscheduledLatency(SUnit *SU) const {
|
||||
return isTop() ? SU->getHeight() : SU->getDepth();
|
||||
}
|
||||
|
||||
unsigned getResourceCount(unsigned ResIdx) const {
|
||||
return ExecutedResCounts[ResIdx];
|
||||
}
|
||||
|
||||
/// Get the scaled count of scheduled micro-ops and resources, including
|
||||
/// executed resources.
|
||||
unsigned getCriticalCount() const {
|
||||
return ResourceCounts[CritResIdx];
|
||||
if (!ZoneCritResIdx)
|
||||
return RetiredMOps * SchedModel->getMicroOpFactor();
|
||||
return getResourceCount(ZoneCritResIdx);
|
||||
}
|
||||
|
||||
/// Get a scaled count for the minimum execution time of the scheduled
|
||||
/// micro-ops that are ready to execute by getExecutedCount. Notice the
|
||||
/// feedback loop.
|
||||
unsigned getExecutedCount() const {
|
||||
return std::max(CurrCycle * SchedModel->getLatencyFactor(),
|
||||
MaxExecutedResCount);
|
||||
}
|
||||
|
||||
bool checkHazard(SUnit *SU);
|
||||
|
||||
void setLatencyPolicy(CandPolicy &Policy);
|
||||
unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
|
||||
|
||||
unsigned getOtherResourceCount(unsigned &OtherCritIdx);
|
||||
|
||||
void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone);
|
||||
|
||||
void releaseNode(SUnit *SU, unsigned ReadyCycle);
|
||||
|
||||
void bumpCycle();
|
||||
void bumpCycle(unsigned NextCycle);
|
||||
|
||||
void countResource(unsigned PIdx, unsigned Cycles);
|
||||
void incExecutedResources(unsigned PIdx, unsigned Count);
|
||||
|
||||
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle);
|
||||
|
||||
void bumpNode(SUnit *SU);
|
||||
|
||||
@ -1333,6 +1369,8 @@ public:
|
||||
void removeReady(SUnit *SU);
|
||||
|
||||
SUnit *pickOnlyChoice();
|
||||
|
||||
void dumpScheduledState();
|
||||
};
|
||||
|
||||
private:
|
||||
@ -1369,15 +1407,6 @@ public:
|
||||
virtual void registerRoots();
|
||||
|
||||
protected:
|
||||
void balanceZones(
|
||||
ConvergingScheduler::SchedBoundary &CriticalZone,
|
||||
ConvergingScheduler::SchedCandidate &CriticalCand,
|
||||
ConvergingScheduler::SchedBoundary &OppositeZone,
|
||||
ConvergingScheduler::SchedCandidate &OppositeCand);
|
||||
|
||||
void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
|
||||
ConvergingScheduler::SchedCandidate &BotCand);
|
||||
|
||||
void tryCandidate(SchedCandidate &Cand,
|
||||
SchedCandidate &TryCand,
|
||||
SchedBoundary &Zone,
|
||||
@ -1407,7 +1436,8 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
|
||||
for (std::vector<SUnit>::iterator
|
||||
I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
|
||||
const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
|
||||
RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
|
||||
RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
|
||||
* SchedModel->getMicroOpFactor();
|
||||
for (TargetSchedModel::ProcResIter
|
||||
PI = SchedModel->getWriteProcResBegin(SC),
|
||||
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
|
||||
@ -1416,17 +1446,6 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
|
||||
RemainingCounts[PIdx] += (Factor * PI->Cycles);
|
||||
}
|
||||
}
|
||||
for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
|
||||
PIdx != PEnd; ++PIdx) {
|
||||
if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
|
||||
>= (int)SchedModel->getLatencyFactor()) {
|
||||
CritResIdx = PIdx;
|
||||
}
|
||||
}
|
||||
DEBUG(dbgs() << "Critical Resource: "
|
||||
<< SchedModel->getProcResource(CritResIdx)->Name
|
||||
<< ": " << RemainingCounts[CritResIdx]
|
||||
<< " / " << SchedModel->getLatencyFactor() << '\n');
|
||||
}
|
||||
|
||||
void ConvergingScheduler::SchedBoundary::
|
||||
@ -1436,7 +1455,7 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
|
||||
SchedModel = smodel;
|
||||
Rem = rem;
|
||||
if (SchedModel->hasInstrSchedModel())
|
||||
ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
|
||||
ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
|
||||
}
|
||||
|
||||
void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
|
||||
@ -1538,50 +1557,125 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Compute the remaining latency to determine whether ILP should be increased.
|
||||
void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
|
||||
DEBUG(dbgs() << " " << Available.getName()
|
||||
<< " DependentLatency " << DependentLatency << '\n');
|
||||
|
||||
// FIXME: compile time. In all, we visit four queues here one we should only
|
||||
// need to visit the one that was last popped if we cache the result.
|
||||
unsigned RemLatency = DependentLatency;
|
||||
for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
|
||||
// Find the unscheduled node in ReadySUs with the highest latency.
|
||||
unsigned ConvergingScheduler::SchedBoundary::
|
||||
findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
|
||||
SUnit *LateSU = 0;
|
||||
unsigned RemLatency = 0;
|
||||
for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
|
||||
I != E; ++I) {
|
||||
unsigned L = getUnscheduledLatency(*I);
|
||||
if (L > RemLatency) {
|
||||
DEBUG(dbgs() << " " << Available.getName()
|
||||
<< " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n');
|
||||
RemLatency = L;
|
||||
LateSU = *I;
|
||||
}
|
||||
}
|
||||
for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
|
||||
I != E; ++I) {
|
||||
unsigned L = getUnscheduledLatency(*I);
|
||||
if (L > RemLatency)
|
||||
RemLatency = L;
|
||||
if (LateSU) {
|
||||
DEBUG(dbgs() << Available.getName() << " RemLatency SU("
|
||||
<< LateSU->NodeNum << ") " << RemLatency << "c\n");
|
||||
}
|
||||
unsigned CriticalPathLimit = Rem->CriticalPath;
|
||||
DEBUG(dbgs() << " " << Available.getName()
|
||||
<< " ExpectedLatency " << ExpectedLatency
|
||||
<< " CP Limit " << CriticalPathLimit << '\n');
|
||||
return RemLatency;
|
||||
}
|
||||
|
||||
if (RemLatency + std::max(ExpectedLatency, CurrCycle) >= CriticalPathLimit
|
||||
&& RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
|
||||
Policy.ReduceLatency = true;
|
||||
DEBUG(dbgs() << " Increase ILP: " << Available.getName() << '\n');
|
||||
// Count resources in this zone and the remaining unscheduled
|
||||
// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
|
||||
// resource index, or zero if the zone is issue limited.
|
||||
unsigned ConvergingScheduler::SchedBoundary::
|
||||
getOtherResourceCount(unsigned &OtherCritIdx) {
|
||||
if (!SchedModel->hasInstrSchedModel())
|
||||
return 0;
|
||||
|
||||
unsigned OtherCritCount = Rem->RemIssueCount
|
||||
+ (RetiredMOps * SchedModel->getMicroOpFactor());
|
||||
DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
|
||||
<< OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
|
||||
OtherCritIdx = 0;
|
||||
for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
|
||||
PIdx != PEnd; ++PIdx) {
|
||||
unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
|
||||
if (OtherCount > OtherCritCount) {
|
||||
OtherCritCount = OtherCount;
|
||||
OtherCritIdx = PIdx;
|
||||
}
|
||||
}
|
||||
if (OtherCritIdx) {
|
||||
DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: "
|
||||
<< OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
|
||||
<< " " << getResourceName(OtherCritIdx) << "\n");
|
||||
}
|
||||
return OtherCritCount;
|
||||
}
|
||||
|
||||
/// Set the CandPolicy for this zone given the current resources and latencies
|
||||
/// inside and outside the zone.
|
||||
void ConvergingScheduler::SchedBoundary::setPolicy(CandPolicy &Policy,
|
||||
SchedBoundary &OtherZone) {
|
||||
// Now that potential stalls have been considered, apply preemptive heuristics
|
||||
// based on the the total latency and resources inside and outside this
|
||||
// zone.
|
||||
|
||||
// Compute remaining latency. We need this both to determine whether the
|
||||
// overall schedule has become latency-limited and whether the instructions
|
||||
// outside this zone are resource or latency limited.
|
||||
//
|
||||
// The "dependent" latency is updated incrementally during scheduling as the
|
||||
// max height/depth of scheduled nodes minus the cycles since it was
|
||||
// scheduled:
|
||||
// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
|
||||
//
|
||||
// The "independent" latency is the max ready queue depth:
|
||||
// ILat = max N.depth for N in Available|Pending
|
||||
//
|
||||
// RemainingLatency is the greater of independent and dependent latency.
|
||||
unsigned RemLatency = DependentLatency;
|
||||
RemLatency = std::max(RemLatency, findMaxLatency(Available.elements()));
|
||||
RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements()));
|
||||
|
||||
// Compute the critical resource outside the zone.
|
||||
unsigned OtherCritIdx;
|
||||
unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx);
|
||||
|
||||
bool OtherResLimited = false;
|
||||
if (SchedModel->hasInstrSchedModel()) {
|
||||
unsigned LFactor = SchedModel->getLatencyFactor();
|
||||
OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
|
||||
}
|
||||
if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) {
|
||||
Policy.ReduceLatency |= true;
|
||||
DEBUG(dbgs() << " " << Available.getName() << " RemainingLatency "
|
||||
<< RemLatency << " + " << CurrCycle << "c > CritPath "
|
||||
<< Rem->CriticalPath << "\n");
|
||||
}
|
||||
// If the same resource is limiting inside and outside the zone, do nothing.
|
||||
if (IsResourceLimited && OtherResLimited && (ZoneCritResIdx == OtherCritIdx))
|
||||
return;
|
||||
|
||||
DEBUG(
|
||||
if (IsResourceLimited) {
|
||||
dbgs() << " " << Available.getName() << " ResourceLimited: "
|
||||
<< getResourceName(ZoneCritResIdx) << "\n";
|
||||
}
|
||||
if (OtherResLimited)
|
||||
dbgs() << " RemainingLimit: " << getResourceName(OtherCritIdx);
|
||||
if (!IsResourceLimited && !OtherResLimited)
|
||||
dbgs() << " Latency limited both directions.\n");
|
||||
|
||||
if (IsResourceLimited && !Policy.ReduceResIdx)
|
||||
Policy.ReduceResIdx = ZoneCritResIdx;
|
||||
|
||||
if (OtherResLimited)
|
||||
Policy.DemandResIdx = OtherCritIdx;
|
||||
}
|
||||
|
||||
void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
|
||||
unsigned ReadyCycle) {
|
||||
|
||||
if (ReadyCycle < MinReadyCycle)
|
||||
MinReadyCycle = ReadyCycle;
|
||||
|
||||
// Check for interlocks first. For the purpose of other heuristics, an
|
||||
// instruction that cannot issue appears as if it's not in the ReadyQueue.
|
||||
if (ReadyCycle > CurrCycle || checkHazard(SU))
|
||||
bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
|
||||
if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
|
||||
Pending.push(SU);
|
||||
else
|
||||
Available.push(SU);
|
||||
@ -1591,16 +1685,17 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
|
||||
}
|
||||
|
||||
/// Move the boundary of scheduled code by one cycle.
|
||||
void ConvergingScheduler::SchedBoundary::bumpCycle() {
|
||||
unsigned Width = SchedModel->getIssueWidth();
|
||||
CurrMOps = (CurrMOps <= Width) ? 0 : CurrMOps - Width;
|
||||
|
||||
unsigned NextCycle = CurrCycle + 1;
|
||||
assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
|
||||
if (MinReadyCycle > NextCycle) {
|
||||
CurrMOps = 0;
|
||||
NextCycle = MinReadyCycle;
|
||||
void ConvergingScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) {
|
||||
if (SchedModel->getMicroOpBufferSize() == 0) {
|
||||
assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
|
||||
if (MinReadyCycle > NextCycle)
|
||||
NextCycle = MinReadyCycle;
|
||||
}
|
||||
// Update the current micro-ops, which will issue in the next cycle.
|
||||
unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
|
||||
CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
|
||||
|
||||
// Decrement DependentLatency based on the next cycle.
|
||||
if ((NextCycle - CurrCycle) > DependentLatency)
|
||||
DependentLatency = 0;
|
||||
else
|
||||
@ -1620,34 +1715,52 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
|
||||
}
|
||||
}
|
||||
CheckPending = true;
|
||||
IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
|
||||
unsigned LFactor = SchedModel->getLatencyFactor();
|
||||
IsResourceLimited =
|
||||
(int)(getCriticalCount() - (getScheduledLatency() * LFactor))
|
||||
> (int)LFactor;
|
||||
|
||||
DEBUG(dbgs() << " " << Available.getName()
|
||||
<< " Cycle: " << CurrCycle << '\n');
|
||||
DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
|
||||
}
|
||||
|
||||
void ConvergingScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
|
||||
unsigned Count) {
|
||||
ExecutedResCounts[PIdx] += Count;
|
||||
if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
|
||||
MaxExecutedResCount = ExecutedResCounts[PIdx];
|
||||
}
|
||||
|
||||
/// Add the given processor resource to this scheduled zone.
|
||||
void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
|
||||
unsigned Cycles) {
|
||||
///
|
||||
/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
|
||||
/// during which this resource is consumed.
|
||||
///
|
||||
/// \return the next cycle at which the instruction may execute without
|
||||
/// oversubscribing resources.
|
||||
unsigned ConvergingScheduler::SchedBoundary::
|
||||
countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
|
||||
unsigned Factor = SchedModel->getResourceFactor(PIdx);
|
||||
DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name
|
||||
<< " +(" << Cycles << "x" << Factor
|
||||
<< ") / " << SchedModel->getLatencyFactor() << '\n');
|
||||
|
||||
unsigned Count = Factor * Cycles;
|
||||
ResourceCounts[PIdx] += Count;
|
||||
DEBUG(dbgs() << " " << getResourceName(PIdx)
|
||||
<< " +" << Cycles << "x" << Factor << "u\n");
|
||||
|
||||
// Update Executed resources counts.
|
||||
incExecutedResources(PIdx, Count);
|
||||
assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
|
||||
Rem->RemainingCounts[PIdx] -= Count;
|
||||
|
||||
// Check if this resource exceeds the current critical resource by a full
|
||||
// cycle. If so, it becomes the critical resource.
|
||||
if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
|
||||
>= (int)SchedModel->getLatencyFactor()) {
|
||||
CritResIdx = PIdx;
|
||||
if (ZoneCritResIdx != PIdx
|
||||
&& ((int)(getResourceCount(PIdx) - getCriticalCount())
|
||||
>= (int)SchedModel->getLatencyFactor())) {
|
||||
ZoneCritResIdx = PIdx;
|
||||
DEBUG(dbgs() << " *** Critical resource "
|
||||
<< SchedModel->getProcResource(PIdx)->Name << " x"
|
||||
<< ResourceCounts[PIdx] << '\n');
|
||||
<< getResourceName(PIdx) << ": "
|
||||
<< getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
|
||||
}
|
||||
// TODO: We don't yet model reserved resources. It's not hard though.
|
||||
return CurrCycle;
|
||||
}
|
||||
|
||||
/// Move the boundary of scheduled code by one SUnit.
|
||||
@ -1661,38 +1774,96 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
|
||||
}
|
||||
HazardRec->EmitInstruction(SU);
|
||||
}
|
||||
// Update resource counts and critical resource.
|
||||
if (SchedModel->hasInstrSchedModel()) {
|
||||
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
|
||||
Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
|
||||
for (TargetSchedModel::ProcResIter
|
||||
PI = SchedModel->getWriteProcResBegin(SC),
|
||||
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
|
||||
countResource(PI->ProcResourceIdx, PI->Cycles);
|
||||
}
|
||||
}
|
||||
unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
|
||||
unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
|
||||
if (SU->getDepth() > TopLatency)
|
||||
TopLatency = SU->getDepth();
|
||||
if (SU->getHeight() > BotLatency)
|
||||
BotLatency = SU->getHeight();
|
||||
|
||||
IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
|
||||
|
||||
// Check the instruction group dispatch limit.
|
||||
// TODO: Check if this SU must end a dispatch group.
|
||||
CurrMOps += SchedModel->getNumMicroOps(SU->getInstr());
|
||||
|
||||
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
|
||||
unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
|
||||
CurrMOps += IncMOps;
|
||||
// checkHazard prevents scheduling multiple instructions per cycle that exceed
|
||||
// issue width. However, we commonly reach the maximum. In this case
|
||||
// opportunistically bump the cycle to avoid uselessly checking everything in
|
||||
// the readyQ. Furthermore, a single instruction may produce more than one
|
||||
// cycle's worth of micro-ops.
|
||||
//
|
||||
// TODO: Also check if this SU must end a dispatch group.
|
||||
unsigned NextCycle = CurrCycle;
|
||||
if (CurrMOps >= SchedModel->getIssueWidth()) {
|
||||
DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n');
|
||||
bumpCycle();
|
||||
++NextCycle;
|
||||
DEBUG(dbgs() << " *** Max MOps " << CurrMOps
|
||||
<< " at cycle " << CurrCycle << '\n');
|
||||
}
|
||||
unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
|
||||
DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
|
||||
|
||||
switch (SchedModel->getMicroOpBufferSize()) {
|
||||
case 0:
|
||||
assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
|
||||
break;
|
||||
case 1:
|
||||
if (ReadyCycle > NextCycle) {
|
||||
NextCycle = ReadyCycle;
|
||||
DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// We don't currently model the OOO reorder buffer, so consider all
|
||||
// scheduled MOps to be "retired".
|
||||
break;
|
||||
}
|
||||
RetiredMOps += IncMOps;
|
||||
|
||||
// Update resource counts and critical resource.
|
||||
if (SchedModel->hasInstrSchedModel()) {
|
||||
unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
|
||||
assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
|
||||
Rem->RemIssueCount -= DecRemIssue;
|
||||
if (ZoneCritResIdx) {
|
||||
// Scale scheduled micro-ops for comparing with the critical resource.
|
||||
unsigned ScaledMOps =
|
||||
RetiredMOps * SchedModel->getMicroOpFactor();
|
||||
|
||||
// If scaled micro-ops are now more than the previous critical resource by
|
||||
// a full cycle, then micro-ops issue becomes critical.
|
||||
if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
|
||||
>= (int)SchedModel->getLatencyFactor()) {
|
||||
ZoneCritResIdx = 0;
|
||||
DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
|
||||
<< ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
|
||||
}
|
||||
}
|
||||
for (TargetSchedModel::ProcResIter
|
||||
PI = SchedModel->getWriteProcResBegin(SC),
|
||||
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
|
||||
unsigned RCycle =
|
||||
countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle);
|
||||
if (RCycle > NextCycle)
|
||||
NextCycle = RCycle;
|
||||
}
|
||||
}
|
||||
// Update ExpectedLatency and DependentLatency.
|
||||
unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
|
||||
unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
|
||||
if (SU->getDepth() > TopLatency) {
|
||||
TopLatency = SU->getDepth();
|
||||
DEBUG(dbgs() << " " << Available.getName()
|
||||
<< " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
|
||||
}
|
||||
if (SU->getHeight() > BotLatency) {
|
||||
BotLatency = SU->getHeight();
|
||||
DEBUG(dbgs() << " " << Available.getName()
|
||||
<< " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
|
||||
}
|
||||
// If we stall for any reason, bump the cycle.
|
||||
if (NextCycle > CurrCycle) {
|
||||
bumpCycle(NextCycle);
|
||||
}
|
||||
else {
|
||||
// After updating ZoneCritResIdx and ExpectedLatency, check if we're
|
||||
// resource limited. If a stall occured, bumpCycle does this.
|
||||
unsigned LFactor = SchedModel->getLatencyFactor();
|
||||
IsResourceLimited =
|
||||
(int)(getCriticalCount() - (getScheduledLatency() * LFactor))
|
||||
> (int)LFactor;
|
||||
}
|
||||
DEBUG(dumpScheduledState());
|
||||
}
|
||||
|
||||
/// Release pending ready nodes in to the available queue. This makes them
|
||||
@ -1704,6 +1875,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {
|
||||
|
||||
// Check to see if any of the pending instructions are ready to issue. If
|
||||
// so, add them to the available queue.
|
||||
bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
|
||||
for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
|
||||
SUnit *SU = *(Pending.begin()+i);
|
||||
unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
|
||||
@ -1711,7 +1883,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {
|
||||
if (ReadyCycle < MinReadyCycle)
|
||||
MinReadyCycle = ReadyCycle;
|
||||
|
||||
if (ReadyCycle > CurrCycle)
|
||||
if (!IsBuffered && ReadyCycle > CurrCycle)
|
||||
continue;
|
||||
|
||||
if (checkHazard(SU))
|
||||
@ -1756,7 +1928,7 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
|
||||
for (unsigned i = 0; Available.empty(); ++i) {
|
||||
assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
|
||||
"permanent hazard"); (void)i;
|
||||
bumpCycle();
|
||||
bumpCycle(CurrCycle + 1);
|
||||
releasePending();
|
||||
}
|
||||
if (Available.size() == 1)
|
||||
@ -1764,103 +1936,28 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/// Record the candidate policy for opposite zones with different critical
|
||||
/// resources.
|
||||
///
|
||||
/// If the CriticalZone is latency limited, don't force a policy for the
|
||||
/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
|
||||
void ConvergingScheduler::balanceZones(
|
||||
ConvergingScheduler::SchedBoundary &CriticalZone,
|
||||
ConvergingScheduler::SchedCandidate &CriticalCand,
|
||||
ConvergingScheduler::SchedBoundary &OppositeZone,
|
||||
ConvergingScheduler::SchedCandidate &OppositeCand) {
|
||||
|
||||
if (!CriticalZone.IsResourceLimited)
|
||||
return;
|
||||
assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
|
||||
|
||||
SchedRemainder *Rem = CriticalZone.Rem;
|
||||
|
||||
// If the critical zone is overconsuming a resource relative to the
|
||||
// remainder, try to reduce it.
|
||||
unsigned RemainingCritCount =
|
||||
Rem->RemainingCounts[CriticalZone.CritResIdx];
|
||||
if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
|
||||
> (int)SchedModel->getLatencyFactor()) {
|
||||
CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
|
||||
DEBUG(dbgs() << " Balance " << CriticalZone.Available.getName()
|
||||
<< " reduce "
|
||||
<< SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
|
||||
<< '\n');
|
||||
// This is useful information to dump after bumpNode.
|
||||
// Note that the Queue contents are more useful before pickNodeFromQueue.
|
||||
void ConvergingScheduler::SchedBoundary::dumpScheduledState() {
|
||||
unsigned ResFactor;
|
||||
unsigned ResCount;
|
||||
if (ZoneCritResIdx) {
|
||||
ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
|
||||
ResCount = getResourceCount(ZoneCritResIdx);
|
||||
}
|
||||
// If the other zone is underconsuming a resource relative to the full zone,
|
||||
// try to increase it.
|
||||
unsigned OppositeCount =
|
||||
OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
|
||||
if ((int)(OppositeZone.ExpectedCount - OppositeCount)
|
||||
> (int)SchedModel->getLatencyFactor()) {
|
||||
OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
|
||||
DEBUG(dbgs() << " Balance " << OppositeZone.Available.getName()
|
||||
<< " demand "
|
||||
<< SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
|
||||
<< '\n');
|
||||
else {
|
||||
ResFactor = SchedModel->getMicroOpFactor();
|
||||
ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine if the scheduled zones exceed resource limits or critical path and
|
||||
/// set each candidate's ReduceHeight policy accordingly.
|
||||
void ConvergingScheduler::checkResourceLimits(
|
||||
ConvergingScheduler::SchedCandidate &TopCand,
|
||||
ConvergingScheduler::SchedCandidate &BotCand) {
|
||||
|
||||
// Set ReduceLatency to true if needed.
|
||||
Bot.setLatencyPolicy(BotCand.Policy);
|
||||
Top.setLatencyPolicy(TopCand.Policy);
|
||||
|
||||
// Handle resource-limited regions.
|
||||
if (Top.IsResourceLimited && Bot.IsResourceLimited
|
||||
&& Top.CritResIdx == Bot.CritResIdx) {
|
||||
// If the scheduled critical resource in both zones is no longer the
|
||||
// critical remaining resource, attempt to reduce resource height both ways.
|
||||
if (Top.CritResIdx != Rem.CritResIdx) {
|
||||
TopCand.Policy.ReduceResIdx = Top.CritResIdx;
|
||||
BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
|
||||
DEBUG(dbgs() << " Reduce scheduled "
|
||||
<< SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Handle latency-limited regions.
|
||||
if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
|
||||
// If the total scheduled expected latency exceeds the region's critical
|
||||
// path then reduce latency both ways.
|
||||
//
|
||||
// Just because a zone is not resource limited does not mean it is latency
|
||||
// limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
|
||||
// to exceed expected latency.
|
||||
if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
|
||||
&& (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
|
||||
TopCand.Policy.ReduceLatency = true;
|
||||
BotCand.Policy.ReduceLatency = true;
|
||||
DEBUG(dbgs() << " Reduce scheduled latency " << Top.ExpectedLatency
|
||||
<< " + " << Bot.ExpectedLatency << '\n');
|
||||
}
|
||||
return;
|
||||
}
|
||||
// The critical resource is different in each zone, so request balancing.
|
||||
|
||||
// Compute the cost of each zone.
|
||||
Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
|
||||
Top.ExpectedCount = std::max(
|
||||
Top.getCriticalCount(),
|
||||
Top.ExpectedCount * SchedModel->getLatencyFactor());
|
||||
Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
|
||||
Bot.ExpectedCount = std::max(
|
||||
Bot.getCriticalCount(),
|
||||
Bot.ExpectedCount * SchedModel->getLatencyFactor());
|
||||
|
||||
balanceZones(Top, TopCand, Bot, BotCand);
|
||||
balanceZones(Bot, BotCand, Top, TopCand);
|
||||
unsigned LFactor = SchedModel->getLatencyFactor();
|
||||
dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
|
||||
<< " Retired: " << RetiredMOps;
|
||||
dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
|
||||
dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
|
||||
<< ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx)
|
||||
<< "\n ExpectedLatency: " << ExpectedLatency << "c\n"
|
||||
<< (IsResourceLimited ? " - Resource" : " - Latency")
|
||||
<< " limited.\n";
|
||||
}
|
||||
|
||||
void ConvergingScheduler::SchedCandidate::
|
||||
@ -2030,8 +2127,7 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
|
||||
// Avoid serializing long latency dependence chains.
|
||||
if (Cand.Policy.ReduceLatency) {
|
||||
if (Zone.isTop()) {
|
||||
if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
|
||||
> Zone.ExpectedCount) {
|
||||
if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
|
||||
if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
|
||||
TryCand, Cand, TopDepthReduce))
|
||||
return;
|
||||
@ -2041,8 +2137,7 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
|
||||
return;
|
||||
}
|
||||
else {
|
||||
if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
|
||||
> Zone.ExpectedCount) {
|
||||
if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
|
||||
if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
|
||||
TryCand, Cand, BotHeightReduce))
|
||||
return;
|
||||
@ -2061,8 +2156,8 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
|
||||
Cand.Reason = MultiPressure;
|
||||
|
||||
// Prefer immediate defs/users of the last scheduled instruction. This is a
|
||||
// nice pressure avoidance strategy that also conserves the processor's
|
||||
// register renaming resources and keeps the machine code readable.
|
||||
// local pressure avoidance strategy that also makes the machine code
|
||||
// readable.
|
||||
if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
|
||||
TryCand, Cand, NextDefUse))
|
||||
return;
|
||||
@ -2224,18 +2319,19 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
|
||||
// efficient, but also provides the best heuristics for CriticalPSets.
|
||||
if (SUnit *SU = Bot.pickOnlyChoice()) {
|
||||
IsTopNode = false;
|
||||
DEBUG(dbgs() << "Pick Top NOCAND\n");
|
||||
DEBUG(dbgs() << "Pick Bot NOCAND\n");
|
||||
return SU;
|
||||
}
|
||||
if (SUnit *SU = Top.pickOnlyChoice()) {
|
||||
IsTopNode = true;
|
||||
DEBUG(dbgs() << "Pick Bot NOCAND\n");
|
||||
DEBUG(dbgs() << "Pick Top NOCAND\n");
|
||||
return SU;
|
||||
}
|
||||
CandPolicy NoPolicy;
|
||||
SchedCandidate BotCand(NoPolicy);
|
||||
SchedCandidate TopCand(NoPolicy);
|
||||
checkResourceLimits(TopCand, BotCand);
|
||||
Bot.setPolicy(BotCand.Policy, Top);
|
||||
Top.setPolicy(TopCand.Policy, Bot);
|
||||
|
||||
// Prefer bottom scheduling when heuristics are silent.
|
||||
pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
|
||||
@ -2364,13 +2460,13 @@ void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
|
||||
/// them here. See comments in biasPhysRegCopy.
|
||||
void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
|
||||
if (IsTopNode) {
|
||||
SU->TopReadyCycle = Top.CurrCycle;
|
||||
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle);
|
||||
Top.bumpNode(SU);
|
||||
if (SU->hasPhysRegUses)
|
||||
reschedulePhysRegCopies(SU, true);
|
||||
}
|
||||
else {
|
||||
SU->BotReadyCycle = Bot.CurrCycle;
|
||||
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle);
|
||||
Bot.bumpNode(SU);
|
||||
if (SU->hasPhysRegDefs)
|
||||
reschedulePhysRegCopies(SU, false);
|
||||
|
Loading…
Reference in New Issue
Block a user