mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-24 14:20:17 +00:00
[AMDGPU] Add GCNMaxILPSchedStrategy
Creates a new scheduling strategy that attempts to maximize ILP for a single wave. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D130869
This commit is contained in:
parent
ce6aff8d13
commit
d7100b398b
@ -426,6 +426,15 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
|
|||||||
return DAG;
|
return DAG;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ScheduleDAGInstrs *
|
||||||
|
createGCNMaxILPMachineScheduler(MachineSchedContext *C) {
|
||||||
|
ScheduleDAGMILive *DAG =
|
||||||
|
new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxILPSchedStrategy>(C));
|
||||||
|
DAG->addMutation(createIGroupLPDAGMutation());
|
||||||
|
DAG->addMutation(createSchedBarrierDAGMutation());
|
||||||
|
return DAG;
|
||||||
|
}
|
||||||
|
|
||||||
static ScheduleDAGInstrs *
|
static ScheduleDAGInstrs *
|
||||||
createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
|
createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
|
||||||
const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
|
const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
|
||||||
@ -464,19 +473,23 @@ GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
|
|||||||
createGCNMaxOccupancyMachineScheduler);
|
createGCNMaxOccupancyMachineScheduler);
|
||||||
|
|
||||||
static MachineSchedRegistry
|
static MachineSchedRegistry
|
||||||
IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
|
GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp",
|
||||||
"Run GCN scheduler to maximize occupancy (experimental)",
|
createGCNMaxILPMachineScheduler);
|
||||||
createIterativeGCNMaxOccupancyMachineScheduler);
|
|
||||||
|
|
||||||
static MachineSchedRegistry
|
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry(
|
||||||
GCNMinRegSchedRegistry("gcn-minreg",
|
"gcn-iterative-max-occupancy-experimental",
|
||||||
"Run GCN iterative scheduler for minimal register usage (experimental)",
|
"Run GCN scheduler to maximize occupancy (experimental)",
|
||||||
createMinRegScheduler);
|
createIterativeGCNMaxOccupancyMachineScheduler);
|
||||||
|
|
||||||
static MachineSchedRegistry
|
static MachineSchedRegistry GCNMinRegSchedRegistry(
|
||||||
GCNILPSchedRegistry("gcn-ilp",
|
"gcn-iterative-minreg",
|
||||||
"Run GCN iterative scheduler for ILP scheduling (experimental)",
|
"Run GCN iterative scheduler for minimal register usage (experimental)",
|
||||||
createIterativeILPMachineScheduler);
|
createMinRegScheduler);
|
||||||
|
|
||||||
|
static MachineSchedRegistry GCNILPSchedRegistry(
|
||||||
|
"gcn-iterative-ilp",
|
||||||
|
"Run GCN iterative scheduler for ILP scheduling (experimental)",
|
||||||
|
createIterativeILPMachineScheduler);
|
||||||
|
|
||||||
static StringRef computeDataLayout(const Triple &TT) {
|
static StringRef computeDataLayout(const Triple &TT) {
|
||||||
if (TT.getArch() == Triple::r600) {
|
if (TT.getArch() == Triple::r600) {
|
||||||
|
@ -38,12 +38,11 @@ cl::opt<bool>
|
|||||||
"reduction scheduling stage."),
|
"reduction scheduling stage."),
|
||||||
cl::init(false));
|
cl::init(false));
|
||||||
|
|
||||||
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
|
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
|
||||||
const MachineSchedContext *C)
|
|
||||||
: GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
|
: GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
|
||||||
HasHighPressure(false) {}
|
HasHighPressure(false) {}
|
||||||
|
|
||||||
void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
|
void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
|
||||||
GenericScheduler::initialize(DAG);
|
GenericScheduler::initialize(DAG);
|
||||||
|
|
||||||
MF = &DAG->MF;
|
MF = &DAG->MF;
|
||||||
@ -74,8 +73,9 @@ void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
|
|||||||
VGPRExcessLimit = std::min(VGPRExcessLimit - ErrorMargin, VGPRExcessLimit);
|
VGPRExcessLimit = std::min(VGPRExcessLimit - ErrorMargin, VGPRExcessLimit);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
|
void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
|
||||||
bool AtTop, const RegPressureTracker &RPTracker,
|
bool AtTop,
|
||||||
|
const RegPressureTracker &RPTracker,
|
||||||
const SIRegisterInfo *SRI,
|
const SIRegisterInfo *SRI,
|
||||||
unsigned SGPRPressure,
|
unsigned SGPRPressure,
|
||||||
unsigned VGPRPressure) {
|
unsigned VGPRPressure) {
|
||||||
@ -161,7 +161,7 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
|
|||||||
|
|
||||||
// This function is mostly cut and pasted from
|
// This function is mostly cut and pasted from
|
||||||
// GenericScheduler::pickNodeFromQueue()
|
// GenericScheduler::pickNodeFromQueue()
|
||||||
void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
|
void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
|
||||||
const CandPolicy &ZonePolicy,
|
const CandPolicy &ZonePolicy,
|
||||||
const RegPressureTracker &RPTracker,
|
const RegPressureTracker &RPTracker,
|
||||||
SchedCandidate &Cand) {
|
SchedCandidate &Cand) {
|
||||||
@ -181,7 +181,7 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
|
|||||||
SGPRPressure, VGPRPressure);
|
SGPRPressure, VGPRPressure);
|
||||||
// Pass SchedBoundary only when comparing nodes from the same boundary.
|
// Pass SchedBoundary only when comparing nodes from the same boundary.
|
||||||
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
|
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
|
||||||
GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
|
tryCandidate(Cand, TryCand, ZoneArg);
|
||||||
if (TryCand.Reason != NoCand) {
|
if (TryCand.Reason != NoCand) {
|
||||||
// Initialize resource delta if needed in case future heuristics query it.
|
// Initialize resource delta if needed in case future heuristics query it.
|
||||||
if (TryCand.ResDelta == SchedResourceDelta())
|
if (TryCand.ResDelta == SchedResourceDelta())
|
||||||
@ -194,7 +194,7 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
|
|||||||
|
|
||||||
// This function is mostly cut and pasted from
|
// This function is mostly cut and pasted from
|
||||||
// GenericScheduler::pickNodeBidirectional()
|
// GenericScheduler::pickNodeBidirectional()
|
||||||
SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
|
SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
|
||||||
// Schedule as far as possible in the direction of no choice. This is most
|
// Schedule as far as possible in the direction of no choice. This is most
|
||||||
// efficient, but also provides the best heuristics for CriticalPSets.
|
// efficient, but also provides the best heuristics for CriticalPSets.
|
||||||
if (SUnit *SU = Bot.pickOnlyChoice()) {
|
if (SUnit *SU = Bot.pickOnlyChoice()) {
|
||||||
@ -259,7 +259,7 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
|
|||||||
dbgs() << "Bot Cand: "; traceCandidate(BotCand););
|
dbgs() << "Bot Cand: "; traceCandidate(BotCand););
|
||||||
SchedCandidate Cand = BotCand;
|
SchedCandidate Cand = BotCand;
|
||||||
TopCand.Reason = NoCand;
|
TopCand.Reason = NoCand;
|
||||||
GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
|
tryCandidate(Cand, TopCand, nullptr);
|
||||||
if (TopCand.Reason != NoCand) {
|
if (TopCand.Reason != NoCand) {
|
||||||
Cand.setBest(TopCand);
|
Cand.setBest(TopCand);
|
||||||
}
|
}
|
||||||
@ -271,7 +271,7 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
|
|||||||
|
|
||||||
// This function is mostly cut and pasted from
|
// This function is mostly cut and pasted from
|
||||||
// GenericScheduler::pickNode()
|
// GenericScheduler::pickNode()
|
||||||
SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
|
SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
|
||||||
if (DAG->top() == DAG->bottom()) {
|
if (DAG->top() == DAG->bottom()) {
|
||||||
assert(Top.Available.empty() && Top.Pending.empty() &&
|
assert(Top.Available.empty() && Top.Pending.empty() &&
|
||||||
Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
|
Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
|
||||||
@ -314,6 +314,129 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
|
|||||||
return SU;
|
return SU;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GCNSchedStageID GCNSchedStrategy::getCurrentStage() {
|
||||||
|
assert(CurrentStage && CurrentStage != SchedStages.end());
|
||||||
|
return *CurrentStage;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GCNSchedStrategy::advanceStage() {
|
||||||
|
assert(CurrentStage != SchedStages.end());
|
||||||
|
if (!CurrentStage)
|
||||||
|
CurrentStage = SchedStages.begin();
|
||||||
|
else
|
||||||
|
CurrentStage++;
|
||||||
|
|
||||||
|
return CurrentStage != SchedStages.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GCNSchedStrategy::hasNextStage() const {
|
||||||
|
assert(CurrentStage);
|
||||||
|
return std::next(CurrentStage) != SchedStages.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
GCNSchedStageID GCNSchedStrategy::getNextStage() const {
|
||||||
|
assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());
|
||||||
|
return *std::next(CurrentStage);
|
||||||
|
}
|
||||||
|
|
||||||
|
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
|
||||||
|
const MachineSchedContext *C)
|
||||||
|
: GCNSchedStrategy(C) {
|
||||||
|
SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);
|
||||||
|
SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
|
||||||
|
SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
|
||||||
|
SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)
|
||||||
|
: GCNSchedStrategy(C) {
|
||||||
|
SchedStages.push_back(GCNSchedStageID::ILPInitialSchedule);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,
|
||||||
|
SchedCandidate &TryCand,
|
||||||
|
SchedBoundary *Zone) const {
|
||||||
|
// Initialize the candidate if needed.
|
||||||
|
if (!Cand.isValid()) {
|
||||||
|
TryCand.Reason = NodeOrder;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid spilling by exceeding the register limit.
|
||||||
|
if (DAG->isTrackingPressure() &&
|
||||||
|
tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
|
||||||
|
RegExcess, TRI, DAG->MF))
|
||||||
|
return TryCand.Reason != NoCand;
|
||||||
|
|
||||||
|
// Bias PhysReg Defs and copies to their uses and defined respectively.
|
||||||
|
if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
|
||||||
|
biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
|
||||||
|
return TryCand.Reason != NoCand;
|
||||||
|
|
||||||
|
bool SameBoundary = Zone != nullptr;
|
||||||
|
if (SameBoundary) {
|
||||||
|
// Prioritize instructions that read unbuffered resources by stall cycles.
|
||||||
|
if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
|
||||||
|
Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
|
||||||
|
return TryCand.Reason != NoCand;
|
||||||
|
|
||||||
|
// Avoid critical resource consumption and balance the schedule.
|
||||||
|
TryCand.initResourceDelta(DAG, SchedModel);
|
||||||
|
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
|
||||||
|
TryCand, Cand, ResourceReduce))
|
||||||
|
return TryCand.Reason != NoCand;
|
||||||
|
if (tryGreater(TryCand.ResDelta.DemandedResources,
|
||||||
|
Cand.ResDelta.DemandedResources, TryCand, Cand,
|
||||||
|
ResourceDemand))
|
||||||
|
return TryCand.Reason != NoCand;
|
||||||
|
|
||||||
|
// Unconditionally try to reduce latency.
|
||||||
|
if (tryLatency(TryCand, Cand, *Zone))
|
||||||
|
return TryCand.Reason != NoCand;
|
||||||
|
|
||||||
|
// Weak edges are for clustering and other constraints.
|
||||||
|
if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
|
||||||
|
getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
|
||||||
|
return TryCand.Reason != NoCand;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keep clustered nodes together to encourage downstream peephole
|
||||||
|
// optimizations which may reduce resource requirements.
|
||||||
|
//
|
||||||
|
// This is a best effort to set things up for a post-RA pass. Optimizations
|
||||||
|
// like generating loads of multiple registers should ideally be done within
|
||||||
|
// the scheduler pass by combining the loads during DAG postprocessing.
|
||||||
|
const SUnit *CandNextClusterSU =
|
||||||
|
Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
|
||||||
|
const SUnit *TryCandNextClusterSU =
|
||||||
|
TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
|
||||||
|
if (tryGreater(TryCand.SU == TryCandNextClusterSU,
|
||||||
|
Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
|
||||||
|
return TryCand.Reason != NoCand;
|
||||||
|
|
||||||
|
// Avoid increasing the max critical pressure in the scheduled region.
|
||||||
|
if (DAG->isTrackingPressure() &&
|
||||||
|
tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
|
||||||
|
TryCand, Cand, RegCritical, TRI, DAG->MF))
|
||||||
|
return TryCand.Reason != NoCand;
|
||||||
|
|
||||||
|
// Avoid increasing the max pressure of the entire region.
|
||||||
|
if (DAG->isTrackingPressure() &&
|
||||||
|
tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
|
||||||
|
Cand, RegMax, TRI, DAG->MF))
|
||||||
|
return TryCand.Reason != NoCand;
|
||||||
|
|
||||||
|
if (SameBoundary) {
|
||||||
|
// Fall through to original instruction order.
|
||||||
|
if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
|
||||||
|
(!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
|
||||||
|
TryCand.Reason = NodeOrder;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
GCNScheduleDAGMILive::GCNScheduleDAGMILive(
|
GCNScheduleDAGMILive::GCNScheduleDAGMILive(
|
||||||
MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
|
MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
|
||||||
: ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
|
: ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
|
||||||
@ -323,6 +446,22 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(
|
|||||||
LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
|
LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<GCNSchedStage>
|
||||||
|
GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
|
||||||
|
switch (SchedStageID) {
|
||||||
|
case GCNSchedStageID::OccInitialSchedule:
|
||||||
|
return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);
|
||||||
|
case GCNSchedStageID::UnclusteredHighRPReschedule:
|
||||||
|
return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);
|
||||||
|
case GCNSchedStageID::ClusteredLowOccupancyReschedule:
|
||||||
|
return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this);
|
||||||
|
case GCNSchedStageID::PreRARematerialize:
|
||||||
|
return std::make_unique<PreRARematStage>(SchedStageID, *this);
|
||||||
|
case GCNSchedStageID::ILPInitialSchedule:
|
||||||
|
return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void GCNScheduleDAGMILive::schedule() {
|
void GCNScheduleDAGMILive::schedule() {
|
||||||
// Collect all scheduling regions. The actual scheduling is performed in
|
// Collect all scheduling regions. The actual scheduling is performed in
|
||||||
// GCNScheduleDAGMILive::finalizeSchedule.
|
// GCNScheduleDAGMILive::finalizeSchedule.
|
||||||
@ -439,18 +578,13 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
|
|||||||
|
|
||||||
void GCNScheduleDAGMILive::runSchedStages() {
|
void GCNScheduleDAGMILive::runSchedStages() {
|
||||||
LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
|
LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
|
||||||
InitialScheduleStage S0(GCNSchedStageID::InitialSchedule, *this);
|
|
||||||
UnclusteredHighRPStage S1(GCNSchedStageID::UnclusteredHighRPReschedule,
|
|
||||||
*this);
|
|
||||||
ClusteredLowOccStage S2(GCNSchedStageID::ClusteredLowOccupancyReschedule,
|
|
||||||
*this);
|
|
||||||
PreRARematStage S3(GCNSchedStageID::PreRARematerialize, *this);
|
|
||||||
GCNSchedStage *SchedStages[] = {&S0, &S1, &S2, &S3};
|
|
||||||
|
|
||||||
if (!Regions.empty())
|
if (!Regions.empty())
|
||||||
BBLiveInMap = getBBLiveInMap();
|
BBLiveInMap = getBBLiveInMap();
|
||||||
|
|
||||||
for (auto *Stage : SchedStages) {
|
GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
|
||||||
|
while (S.advanceStage()) {
|
||||||
|
auto Stage = createSchedStage(S.getCurrentStage());
|
||||||
if (!Stage->initGCNSchedStage())
|
if (!Stage->initGCNSchedStage())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -475,8 +609,8 @@ void GCNScheduleDAGMILive::runSchedStages() {
|
|||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
|
raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
|
||||||
switch (StageID) {
|
switch (StageID) {
|
||||||
case GCNSchedStageID::InitialSchedule:
|
case GCNSchedStageID::OccInitialSchedule:
|
||||||
OS << "Initial Schedule";
|
OS << "Max Occupancy Initial Schedule";
|
||||||
break;
|
break;
|
||||||
case GCNSchedStageID::UnclusteredHighRPReschedule:
|
case GCNSchedStageID::UnclusteredHighRPReschedule:
|
||||||
OS << "Unclustered High Register Pressure Reschedule";
|
OS << "Unclustered High Register Pressure Reschedule";
|
||||||
@ -487,14 +621,18 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
|
|||||||
case GCNSchedStageID::PreRARematerialize:
|
case GCNSchedStageID::PreRARematerialize:
|
||||||
OS << "Pre-RA Rematerialize";
|
OS << "Pre-RA Rematerialize";
|
||||||
break;
|
break;
|
||||||
|
case GCNSchedStageID::ILPInitialSchedule:
|
||||||
|
OS << "Max ILP Initial Schedule";
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return OS;
|
return OS;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
|
GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
|
||||||
: DAG(DAG), S(static_cast<GCNMaxOccupancySchedStrategy &>(*DAG.SchedImpl)),
|
: DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),
|
||||||
MF(DAG.MF), MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
|
MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
|
||||||
|
|
||||||
bool GCNSchedStage::initGCNSchedStage() {
|
bool GCNSchedStage::initGCNSchedStage() {
|
||||||
if (!DAG.LIS)
|
if (!DAG.LIS)
|
||||||
@ -564,6 +702,7 @@ bool PreRARematStage::initGCNSchedStage() {
|
|||||||
// inbetween the defs and region we sinked the def to. Cached pressure
|
// inbetween the defs and region we sinked the def to. Cached pressure
|
||||||
// for regions where a def is sinked from will also be invalidated. Will
|
// for regions where a def is sinked from will also be invalidated. Will
|
||||||
// need to be fixed if there is another pass after this pass.
|
// need to be fixed if there is another pass after this pass.
|
||||||
|
assert(!S.hasNextStage());
|
||||||
|
|
||||||
collectRematerializableInstructions();
|
collectRematerializableInstructions();
|
||||||
if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
|
if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
|
||||||
@ -674,7 +813,7 @@ void GCNSchedStage::setupNewBlock() {
|
|||||||
DAG.startBlock(CurrentMBB);
|
DAG.startBlock(CurrentMBB);
|
||||||
// Get real RP for the region if it hasn't be calculated before. After the
|
// Get real RP for the region if it hasn't be calculated before. After the
|
||||||
// initial schedule stage real RP will be collected after scheduling.
|
// initial schedule stage real RP will be collected after scheduling.
|
||||||
if (StageID == GCNSchedStageID::InitialSchedule)
|
if (StageID == GCNSchedStageID::OccInitialSchedule)
|
||||||
DAG.computeBlockPressure(RegionIdx, CurrentMBB);
|
DAG.computeBlockPressure(RegionIdx, CurrentMBB);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -767,7 +906,7 @@ bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool InitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
|
bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
|
||||||
if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
|
if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
@ -810,6 +949,13 @@ bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
|
||||||
|
if (mayCauseSpilling(WavesAfter))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
|
bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
|
||||||
if (WavesAfter <= MFI.getMinWavesPerEU() &&
|
if (WavesAfter <= MFI.getMinWavesPerEU() &&
|
||||||
!PressureAfter.less(ST, PressureBefore) &&
|
!PressureAfter.less(ST, PressureBefore) &&
|
||||||
@ -826,7 +972,8 @@ void GCNSchedStage::revertScheduling() {
|
|||||||
PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
|
PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
|
||||||
LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
|
LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
|
||||||
DAG.RescheduleRegions[RegionIdx] =
|
DAG.RescheduleRegions[RegionIdx] =
|
||||||
(nextStage(StageID)) != GCNSchedStageID::UnclusteredHighRPReschedule;
|
S.hasNextStage() &&
|
||||||
|
S.getNextStage() != GCNSchedStageID::UnclusteredHighRPReschedule;
|
||||||
DAG.RegionEnd = DAG.RegionBegin;
|
DAG.RegionEnd = DAG.RegionBegin;
|
||||||
int SkippedDebugInstr = 0;
|
int SkippedDebugInstr = 0;
|
||||||
for (MachineInstr *MI : Unsched) {
|
for (MachineInstr *MI : Unsched) {
|
||||||
|
@ -22,12 +22,25 @@ namespace llvm {
|
|||||||
class SIMachineFunctionInfo;
|
class SIMachineFunctionInfo;
|
||||||
class SIRegisterInfo;
|
class SIRegisterInfo;
|
||||||
class GCNSubtarget;
|
class GCNSubtarget;
|
||||||
|
class GCNSchedStage;
|
||||||
|
|
||||||
|
enum class GCNSchedStageID : unsigned {
|
||||||
|
OccInitialSchedule = 0,
|
||||||
|
UnclusteredHighRPReschedule = 1,
|
||||||
|
ClusteredLowOccupancyReschedule = 2,
|
||||||
|
PreRARematerialize = 3,
|
||||||
|
ILPInitialSchedule = 4
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
|
raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
|
||||||
|
#endif
|
||||||
|
|
||||||
/// This is a minimal scheduler strategy. The main difference between this
|
/// This is a minimal scheduler strategy. The main difference between this
|
||||||
/// and the GenericScheduler is that GCNSchedStrategy uses different
|
/// and the GenericScheduler is that GCNSchedStrategy uses different
|
||||||
/// heuristics to determine excess/critical pressure sets. Its goal is to
|
/// heuristics to determine excess/critical pressure sets.
|
||||||
/// maximize kernel occupancy (i.e. maximum number of waves per simd).
|
class GCNSchedStrategy : public GenericScheduler {
|
||||||
class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
|
protected:
|
||||||
SUnit *pickNodeBidirectional(bool &IsTopNode);
|
SUnit *pickNodeBidirectional(bool &IsTopNode);
|
||||||
|
|
||||||
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
|
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
|
||||||
@ -51,6 +64,12 @@ class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
|
|||||||
|
|
||||||
MachineFunction *MF;
|
MachineFunction *MF;
|
||||||
|
|
||||||
|
// Scheduling stages for this strategy.
|
||||||
|
SmallVector<GCNSchedStageID, 4> SchedStages;
|
||||||
|
|
||||||
|
// Pointer to the current SchedStageID.
|
||||||
|
SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// schedule() have seen register pressure over the critical limits and had to
|
// schedule() have seen register pressure over the critical limits and had to
|
||||||
// track register pressure for actual scheduling heuristics.
|
// track register pressure for actual scheduling heuristics.
|
||||||
@ -69,7 +88,7 @@ public:
|
|||||||
|
|
||||||
unsigned VGPRCriticalLimit;
|
unsigned VGPRCriticalLimit;
|
||||||
|
|
||||||
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
|
GCNSchedStrategy(const MachineSchedContext *C);
|
||||||
|
|
||||||
SUnit *pickNode(bool &IsTopNode) override;
|
SUnit *pickNode(bool &IsTopNode) override;
|
||||||
|
|
||||||
@ -78,40 +97,42 @@ public:
|
|||||||
unsigned getTargetOccupancy() { return TargetOccupancy; }
|
unsigned getTargetOccupancy() { return TargetOccupancy; }
|
||||||
|
|
||||||
void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
|
void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
|
||||||
|
|
||||||
|
GCNSchedStageID getCurrentStage();
|
||||||
|
|
||||||
|
// Advances stage. Returns true if there are remaining stages.
|
||||||
|
bool advanceStage();
|
||||||
|
|
||||||
|
bool hasNextStage() const;
|
||||||
|
|
||||||
|
GCNSchedStageID getNextStage() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class GCNSchedStageID : unsigned {
|
/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
|
||||||
InitialSchedule = 0,
|
/// maximum number of waves per simd).
|
||||||
UnclusteredHighRPReschedule = 1,
|
class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
|
||||||
ClusteredLowOccupancyReschedule = 2,
|
public:
|
||||||
PreRARematerialize = 3,
|
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
|
||||||
LastStage = PreRARematerialize
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef NDEBUG
|
/// The goal of this scheduling strategy is to maximize ILP for a single wave
|
||||||
raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
|
/// (i.e. latency hiding).
|
||||||
#endif
|
class GCNMaxILPSchedStrategy final : public GCNSchedStrategy {
|
||||||
|
protected:
|
||||||
|
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
|
||||||
|
SchedBoundary *Zone) const override;
|
||||||
|
|
||||||
inline GCNSchedStageID &operator++(GCNSchedStageID &Stage, int) {
|
public:
|
||||||
assert(Stage != GCNSchedStageID::PreRARematerialize);
|
GCNMaxILPSchedStrategy(const MachineSchedContext *C);
|
||||||
Stage = static_cast<GCNSchedStageID>(static_cast<unsigned>(Stage) + 1);
|
};
|
||||||
return Stage;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GCNSchedStageID nextStage(const GCNSchedStageID Stage) {
|
|
||||||
return static_cast<GCNSchedStageID>(static_cast<unsigned>(Stage) + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool operator>(GCNSchedStageID &LHS, GCNSchedStageID &RHS) {
|
|
||||||
return static_cast<unsigned>(LHS) > static_cast<unsigned>(RHS);
|
|
||||||
}
|
|
||||||
|
|
||||||
class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
|
class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
|
||||||
friend class GCNSchedStage;
|
friend class GCNSchedStage;
|
||||||
friend class InitialScheduleStage;
|
friend class OccInitialScheduleStage;
|
||||||
friend class UnclusteredHighRPStage;
|
friend class UnclusteredHighRPStage;
|
||||||
friend class ClusteredLowOccStage;
|
friend class ClusteredLowOccStage;
|
||||||
friend class PreRARematStage;
|
friend class PreRARematStage;
|
||||||
|
friend class ILPInitialScheduleStage;
|
||||||
|
|
||||||
const GCNSubtarget &ST;
|
const GCNSubtarget &ST;
|
||||||
|
|
||||||
@ -169,6 +190,8 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
|
|||||||
|
|
||||||
void runSchedStages();
|
void runSchedStages();
|
||||||
|
|
||||||
|
std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GCNScheduleDAGMILive(MachineSchedContext *C,
|
GCNScheduleDAGMILive(MachineSchedContext *C,
|
||||||
std::unique_ptr<MachineSchedStrategy> S);
|
std::unique_ptr<MachineSchedStrategy> S);
|
||||||
@ -183,7 +206,7 @@ class GCNSchedStage {
|
|||||||
protected:
|
protected:
|
||||||
GCNScheduleDAGMILive &DAG;
|
GCNScheduleDAGMILive &DAG;
|
||||||
|
|
||||||
GCNMaxOccupancySchedStrategy &S;
|
GCNSchedStrategy &S;
|
||||||
|
|
||||||
MachineFunction &MF;
|
MachineFunction &MF;
|
||||||
|
|
||||||
@ -245,11 +268,11 @@ public:
|
|||||||
virtual ~GCNSchedStage() = default;
|
virtual ~GCNSchedStage() = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
class InitialScheduleStage : public GCNSchedStage {
|
class OccInitialScheduleStage : public GCNSchedStage {
|
||||||
public:
|
public:
|
||||||
bool shouldRevertScheduling(unsigned WavesAfter) override;
|
bool shouldRevertScheduling(unsigned WavesAfter) override;
|
||||||
|
|
||||||
InitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
|
OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
|
||||||
: GCNSchedStage(StageID, DAG) {}
|
: GCNSchedStage(StageID, DAG) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -324,6 +347,14 @@ public:
|
|||||||
: GCNSchedStage(StageID, DAG) {}
|
: GCNSchedStage(StageID, DAG) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class ILPInitialScheduleStage : public GCNSchedStage {
|
||||||
|
public:
|
||||||
|
bool shouldRevertScheduling(unsigned WavesAfter) override;
|
||||||
|
|
||||||
|
ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
|
||||||
|
: GCNSchedStage(StageID, DAG) {}
|
||||||
|
};
|
||||||
|
|
||||||
} // End namespace llvm
|
} // End namespace llvm
|
||||||
|
|
||||||
#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
|
#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
; RUN: llc -march=amdgcn -mcpu=tonga -misched=gcn-ilp -verify-machineinstrs < %s | FileCheck %s
|
; RUN: llc -march=amdgcn -mcpu=tonga -misched=gcn-iterative-ilp -verify-machineinstrs < %s | FileCheck %s
|
||||||
|
; RUN: llc -march=amdgcn -mcpu=tonga -misched=gcn-max-ilp -verify-machineinstrs < %s | FileCheck %s
|
||||||
|
|
||||||
; CHECK: NumVgprs: {{[0-9][0-9][0-9]$}}
|
; CHECK: NumVgprs: {{[0-9][0-9][0-9]$}}
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
; RUN: llc -enable-amdgpu-aa=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
|
; RUN: llc -enable-amdgpu-aa=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
|
||||||
; RUN: llc -enable-amdgpu-aa=0 -march=amdgcn -mcpu=tonga -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck %s
|
; RUN: llc -enable-amdgpu-aa=0 -march=amdgcn -mcpu=tonga -misched=gcn-iterative-minreg -verify-machineinstrs < %s | FileCheck %s
|
||||||
; RUN: llc -enable-amdgpu-aa=0 -march=amdgcn -mcpu=tonga -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck %s
|
; RUN: llc -enable-amdgpu-aa=0 -march=amdgcn -mcpu=tonga -misched=gcn-iterative-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck %s
|
||||||
|
|
||||||
; We expect a two digit VGPR usage here, not a three digit.
|
; We expect a two digit VGPR usage here, not a three digit.
|
||||||
; CHECK: NumVgprs: {{[0-9][0-9]$}}
|
; CHECK: NumVgprs: {{[0-9][0-9]$}}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
; RUN: llc -march=amdgcn -mcpu=tahiti -enable-amdgpu-aa=0 -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck --check-prefix=SI-MINREG %s
|
; RUN: llc -march=amdgcn -mcpu=tahiti -enable-amdgpu-aa=0 -misched=gcn-iterative-minreg -verify-machineinstrs < %s | FileCheck --check-prefix=SI-MINREG %s
|
||||||
; RUN: llc -march=amdgcn -mcpu=tahiti -enable-amdgpu-aa=0 -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck --check-prefix=SI-MAXOCC %s
|
; RUN: llc -march=amdgcn -mcpu=tahiti -enable-amdgpu-aa=0 -misched=gcn-iterative-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck --check-prefix=SI-MAXOCC %s
|
||||||
; RUN: llc -march=amdgcn -mcpu=fiji -enable-amdgpu-aa=0 -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
|
; RUN: llc -march=amdgcn -mcpu=fiji -enable-amdgpu-aa=0 -misched=gcn-iterative-minreg -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
|
||||||
; RUN: llc -march=amdgcn -mcpu=fiji -enable-amdgpu-aa=0 -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
|
; RUN: llc -march=amdgcn -mcpu=fiji -enable-amdgpu-aa=0 -misched=gcn-iterative-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
|
||||||
|
|
||||||
; SI-MINREG: NumSgprs: {{[1-9]$}}
|
; SI-MINREG: NumSgprs: {{[1-9]$}}
|
||||||
; SI-MINREG: NumVgprs: {{[1-9]$}}
|
; SI-MINREG: NumVgprs: {{[1-9]$}}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=MISCHED %s
|
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=MISCHED %s
|
||||||
; RUN: llc -march=amdgcn -mcpu=tonga -misched=gcn-ilp -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-ILP %s
|
; RUN: llc -march=amdgcn -mcpu=tonga -misched=gcn-iterative-ilp -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-ILP %s
|
||||||
|
|
||||||
; Test the scheduler when only one wave is requested. The result should be high register usage and max ILP.
|
; Test the scheduler when only one wave is requested. The result should be high register usage and max ILP.
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user