Add an ILP scheduler. This is a register pressure aware scheduler that's

appropriate for targets without detailed instruction iterineries.
The scheduler schedules for increased instruction level parallelism in
low register pressure situation; it schedules to reduce register pressure
when the register pressure becomes high.

On x86_64, this is a win for all tests in CFP2000. It also sped up 256.bzip2
by 16%.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109300 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2010-07-24 00:39:05 +00:00
parent 3c8e1bee63
commit 70017e44cd
6 changed files with 109 additions and 16 deletions

View File

@ -78,12 +78,19 @@ ScheduleDAGSDNodes *createTDRRListDAGScheduler(SelectionDAGISel *IS,
ScheduleDAGSDNodes *createSourceListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel);
/// createHybridListDAGScheduler - This creates a bottom up hybrid register
/// usage reduction list scheduler that make use of latency information to
/// avoid stalls for long latency instructions.
/// createHybridListDAGScheduler - This creates a bottom up register pressure
/// aware list scheduler that make use of latency information to avoid stalls
/// for long latency instructions in low register pressure mode. In high
/// register pressure mode it schedules to reduce register pressure.
ScheduleDAGSDNodes *createHybridListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level);
/// createILPListDAGScheduler - This creates a bottom up register pressure
/// aware list scheduler that tries to increase instruction level parallelism
/// in low register pressure mode. In high register pressure mode it schedules
/// to reduce register pressure.
ScheduleDAGSDNodes *createILPListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level);
/// createTDListDAGScheduler - This creates a top-down list scheduler with
/// a hazard recognizer.
ScheduleDAGSDNodes *createTDListDAGScheduler(SelectionDAGISel *IS,

View File

@ -75,7 +75,8 @@ namespace Sched {
None, // No preference
Latency, // Scheduling for shortest total latency.
RegPressure, // Scheduling for lowest register pressure.
Hybrid // Scheduling for both latency and register pressure.
Hybrid, // Scheduling for both latency and register pressure.
ILP // Scheduling for ILP in low register pressure mode.
};
}

View File

@ -55,10 +55,16 @@ static RegisterScheduler
static RegisterScheduler
hybridListDAGScheduler("list-hybrid",
"Bottom-up rr list scheduling which avoid stalls for "
"long latency instructions",
"Bottom-up register pressure aware list scheduling "
"which tries to balance latency and register pressure",
createHybridListDAGScheduler);
static RegisterScheduler
ILPListDAGScheduler("list-ilp",
"Bottom-up register pressure aware list scheduling "
"which tries to balance ILP and register pressure",
createILPListDAGScheduler);
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
@ -995,6 +1001,16 @@ namespace {
bool operator()(const SUnit* left, const SUnit* right) const;
};
struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
: SPQ(spq) {}
ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
: SPQ(RHS.SPQ) {}
bool operator()(const SUnit* left, const SUnit* right) const;
};
} // end anonymous namespace
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
@ -1323,14 +1339,15 @@ namespace {
if (!N->isMachineOpcode()) {
if (N->getOpcode() != ISD::CopyToReg)
return;
} else {
unsigned Opc = N->getMachineOpcode();
if (Opc == TargetOpcode::EXTRACT_SUBREG ||
Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG ||
Opc == TargetOpcode::REG_SEQUENCE ||
Opc == TargetOpcode::IMPLICIT_DEF)
return;
}
unsigned Opc = N->getMachineOpcode();
if (Opc == TargetOpcode::EXTRACT_SUBREG ||
Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG ||
Opc == TargetOpcode::REG_SEQUENCE ||
Opc == TargetOpcode::IMPLICIT_DEF)
return;
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
@ -1427,6 +1444,9 @@ namespace {
typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
HybridBURRPriorityQueue;
typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
ILPBURRPriorityQueue;
}
/// closestSucc - Returns the scheduled cycle of the successor which is
@ -1529,6 +1549,8 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
unsigned LExcess, RExcess;
bool LHigh = SPQ->HighRegPressure(left, LExcess);
bool RHigh = SPQ->HighRegPressure(right, RExcess);
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
if (LHigh && !RHigh)
return true;
else if (!LHigh && RHigh)
@ -1538,7 +1560,6 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
return true;
else if (LExcess < RExcess)
return false;
// Otherwise schedule for register pressure reduction.
} else {
// Low register pressure situation, schedule for latency if possible.
bool LStall = left->SchedulingPref == Sched::Latency &&
@ -1571,6 +1592,33 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
return BURRSort(left, right, SPQ);
}
bool ilp_ls_rr_sort::operator()(const SUnit *left,
const SUnit *right) const {
unsigned LExcess, RExcess;
bool LHigh = SPQ->HighRegPressure(left, LExcess);
bool RHigh = SPQ->HighRegPressure(right, RExcess);
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
if (LHigh && !RHigh)
return true;
else if (!LHigh && RHigh)
return false;
else if (LHigh && RHigh) {
if (LExcess > RExcess)
return true;
else if (LExcess < RExcess)
return false;
} else {
// Low register pressure situation, schedule for ILP.
if (left->NumPreds > right->NumPreds)
return false;
else if (left->NumPreds < right->NumPreds)
return false;
}
return BURRSort(left, right, SPQ);
}
template<class SF>
bool
RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
@ -1963,3 +2011,17 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
PQ->setScheduleDAG(SD);
return SD;
}
llvm::ScheduleDAGSDNodes *
llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
const TargetLowering *TLI = &IS->getTargetLowering();
ILPBURRPriorityQueue *PQ =
new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
PQ->setScheduleDAG(SD);
return SD;
}

View File

@ -137,9 +137,11 @@ namespace llvm {
return createTDListDAGScheduler(IS, OptLevel);
if (TLI.getSchedulingPreference() == Sched::RegPressure)
return createBURRListDAGScheduler(IS, OptLevel);
assert(TLI.getSchedulingPreference() == Sched::Hybrid &&
if (TLI.getSchedulingPreference() == Sched::Hybrid)
return createHybridListDAGScheduler(IS, OptLevel);
assert(TLI.getSchedulingPreference() == Sched::ILP &&
"Unknown sched type!");
return createHybridListDAGScheduler(IS, OptLevel);
return createILPListDAGScheduler(IS, OptLevel);
}
}

View File

@ -1190,6 +1190,24 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
}
unsigned
X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
switch (RC->getID()) {
default:
return 0;
case X86::GR32RegClassID:
return 4 - FPDiff;
case X86::GR64RegClassID:
return 8 - FPDiff;
case X86::VR128RegClassID:
return Subtarget->is64Bit() ? 10 : 4;
case X86::VR64RegClassID:
return 4;
}
}
bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const {
if (!Subtarget->isTargetLinux())

View File

@ -590,6 +590,9 @@ namespace llvm {
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const;
/// getStackCookieLocation - Return true if the target stores stack
/// protector cookies at a fixed offset in some non-standard address
/// space, and populates the address space and offset as