mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-21 11:11:42 +00:00
Convert -enable-sched-cycles and -enable-sched-hazard to -disable
flags. They are still not enable in this revision. Added TargetInstrInfo::isZeroCost() to fix a fundamental problem with the scheduler's model of operand latency in the selection DAG. Generalized unit tests to work with sched-cycles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123969 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
cd151d2f95
commit
c8bfd1d78f
@ -42,6 +42,8 @@ public:
|
||||
|
||||
unsigned getMaxLookAhead() const { return MaxLookAhead; }
|
||||
|
||||
bool isEnabled() const { return MaxLookAhead != 0; }
|
||||
|
||||
/// atIssueLimit - Return true if no more instructions may be issued in this
|
||||
/// cycle.
|
||||
virtual bool atIssueLimit() const { return false; }
|
||||
|
@ -567,9 +567,9 @@ public:
|
||||
virtual unsigned getInlineAsmLength(const char *Str,
|
||||
const MCAsmInfo &MAI) const;
|
||||
|
||||
/// CreateTargetPreRAHazardRecognizer - Allocate and return a hazard
|
||||
/// recognizer to use for this target when scheduling the machine instructions
|
||||
/// before register allocation.
|
||||
/// CreateTargetHazardRecognizer - Allocate and return a hazard recognizer to
|
||||
/// use for this target when scheduling the machine instructions before
|
||||
/// register allocation.
|
||||
virtual ScheduleHazardRecognizer*
|
||||
CreateTargetHazardRecognizer(const TargetMachine *TM,
|
||||
const ScheduleDAG *DAG) const = 0;
|
||||
@ -610,6 +610,14 @@ public:
|
||||
virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
|
||||
const MachineInstr *MI) const;
|
||||
|
||||
/// isZeroCost - Return true for pseudo instructions that don't consume any
|
||||
/// machine resources in their current form. These are common cases that the
|
||||
/// scheduler should consider free, rather than conservatively handling them
|
||||
/// as instructions with no itinerary.
|
||||
bool isZeroCost(unsigned Opcode) const {
|
||||
return Opcode <= TargetOpcode::COPY;
|
||||
}
|
||||
|
||||
/// getOperandLatency - Compute and return the use operand latency of a given
|
||||
/// pair of def and use.
|
||||
/// In most cases, the static scheduling itinerary was enough to determine the
|
||||
@ -686,6 +694,8 @@ public:
|
||||
const MachineBasicBlock *MBB,
|
||||
const MachineFunction &MF) const;
|
||||
|
||||
bool usePreRAHazardRecognizer() const;
|
||||
|
||||
virtual ScheduleHazardRecognizer *
|
||||
CreateTargetHazardRecognizer(const TargetMachine*, const ScheduleDAG*) const;
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetInstrItineraries.h"
|
||||
|
||||
using namespace llvm;
|
||||
@ -170,14 +171,17 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
|
||||
if (!ItinData || ItinData->isEmpty())
|
||||
return;
|
||||
|
||||
++IssueCount;
|
||||
|
||||
unsigned cycle = 0;
|
||||
|
||||
// Use the itinerary for the underlying instruction to reserve FU's
|
||||
// in the scoreboard at the appropriate future cycles.
|
||||
const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
|
||||
assert(TID && "The scheduler must filter non-machineinstrs");
|
||||
if (DAG->TII->isZeroCost(TID->Opcode))
|
||||
return;
|
||||
|
||||
++IssueCount;
|
||||
|
||||
unsigned cycle = 0;
|
||||
|
||||
unsigned idx = TID->getSchedClass();
|
||||
for (const InstrStage *IS = ItinData->beginStage(idx),
|
||||
*E = ItinData->endStage(idx); IS != E; ++IS) {
|
||||
|
@ -66,10 +66,9 @@ static RegisterScheduler
|
||||
"which tries to balance ILP and register pressure",
|
||||
createILPListDAGScheduler);
|
||||
|
||||
static cl::opt<bool> EnableSchedCycles(
|
||||
"enable-sched-cycles",
|
||||
cl::desc("Enable cycle-level precision during preRA scheduling"),
|
||||
cl::init(false), cl::Hidden);
|
||||
static cl::opt<bool> DisableSchedCycles(
|
||||
"disable-sched-cycles", cl::Hidden, cl::init(true),
|
||||
cl::desc("Disable cycle-level precision during preRA scheduling"));
|
||||
|
||||
namespace {
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -124,10 +123,10 @@ public:
|
||||
Topo(SUnits) {
|
||||
|
||||
const TargetMachine &tm = mf.getTarget();
|
||||
if (EnableSchedCycles && OptLevel != CodeGenOpt::None)
|
||||
HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
|
||||
else
|
||||
if (DisableSchedCycles || !NeedLatency)
|
||||
HazardRec = new ScheduleHazardRecognizer();
|
||||
else
|
||||
HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
|
||||
}
|
||||
|
||||
~ScheduleDAGRRList() {
|
||||
@ -168,7 +167,7 @@ public:
|
||||
|
||||
private:
|
||||
bool isReady(SUnit *SU) {
|
||||
return !EnableSchedCycles || !AvailableQueue->hasReadyFilter() ||
|
||||
return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
|
||||
AvailableQueue->isReady(SU);
|
||||
}
|
||||
|
||||
@ -237,7 +236,7 @@ void ScheduleDAGRRList::Schedule() {
|
||||
<< " '" << BB->getName() << "' **********\n");
|
||||
|
||||
CurCycle = 0;
|
||||
MinAvailableCycle = EnableSchedCycles ? UINT_MAX : 0;
|
||||
MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
|
||||
NumLiveRegs = 0;
|
||||
LiveRegDefs.resize(TRI->getNumRegs(), NULL);
|
||||
LiveRegGens.resize(TRI->getNumRegs(), NULL);
|
||||
@ -350,7 +349,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
|
||||
/// Check to see if any of the pending instructions are ready to issue. If
|
||||
/// so, add them to the available queue.
|
||||
void ScheduleDAGRRList::ReleasePending() {
|
||||
if (!EnableSchedCycles) {
|
||||
if (DisableSchedCycles) {
|
||||
assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
|
||||
return;
|
||||
}
|
||||
@ -385,7 +384,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
|
||||
return;
|
||||
|
||||
AvailableQueue->setCurCycle(NextCycle);
|
||||
if (HazardRec->getMaxLookAhead() == 0) {
|
||||
if (!HazardRec->isEnabled()) {
|
||||
// Bypass lots of virtual calls in case of long latency.
|
||||
CurCycle = NextCycle;
|
||||
}
|
||||
@ -405,7 +404,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
|
||||
/// Move the scheduler state forward until the specified node's dependents are
|
||||
/// ready and can be scheduled with no resource conflicts.
|
||||
void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
|
||||
if (!EnableSchedCycles)
|
||||
if (DisableSchedCycles)
|
||||
return;
|
||||
|
||||
unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
|
||||
@ -440,7 +439,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
|
||||
/// Record this SUnit in the HazardRecognizer.
|
||||
/// Does not update CurCycle.
|
||||
void ScheduleDAGRRList::EmitNode(SUnit *SU) {
|
||||
if (!EnableSchedCycles || HazardRec->getMaxLookAhead() == 0)
|
||||
if (!HazardRec->isEnabled())
|
||||
return;
|
||||
|
||||
// Check for phys reg copy.
|
||||
@ -525,9 +524,9 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
|
||||
// (1) No available instructions
|
||||
// (2) All pipelines full, so available instructions must have hazards.
|
||||
//
|
||||
// If SchedCycles is disabled, count each inst as one cycle.
|
||||
if (!EnableSchedCycles ||
|
||||
AvailableQueue->empty() || HazardRec->atIssueLimit())
|
||||
// If HazardRec is disabled, count each inst as one cycle.
|
||||
if (!HazardRec->isEnabled() || HazardRec->atIssueLimit()
|
||||
|| AvailableQueue->empty())
|
||||
AdvanceToCycle(CurCycle + 1);
|
||||
}
|
||||
|
||||
@ -585,7 +584,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
|
||||
SU->setHeightDirty();
|
||||
SU->isScheduled = false;
|
||||
SU->isAvailable = true;
|
||||
if (EnableSchedCycles && AvailableQueue->hasReadyFilter()) {
|
||||
if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
|
||||
// Don't make available until backtracking is complete.
|
||||
SU->isPending = true;
|
||||
PendingQueue.push_back(SU);
|
||||
@ -2010,24 +2009,27 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
|
||||
} else if (RStall)
|
||||
return -1;
|
||||
|
||||
// If either node is scheduling for latency, sort them by depth
|
||||
// If either node is scheduling for latency, sort them by height/depth
|
||||
// and latency.
|
||||
if (!checkPref || (left->SchedulingPref == Sched::Latency ||
|
||||
right->SchedulingPref == Sched::Latency)) {
|
||||
int LDepth = (int)left->getDepth();
|
||||
int RDepth = (int)right->getDepth();
|
||||
|
||||
if (EnableSchedCycles) {
|
||||
if (LDepth != RDepth)
|
||||
DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
|
||||
<< ") depth " << LDepth << " vs SU (" << right->NodeNum
|
||||
<< ") depth " << RDepth << ")\n");
|
||||
return LDepth < RDepth ? 1 : -1;
|
||||
}
|
||||
else {
|
||||
if (DisableSchedCycles) {
|
||||
if (LHeight != RHeight)
|
||||
return LHeight > RHeight ? 1 : -1;
|
||||
}
|
||||
else {
|
||||
// If neither instruction stalls (!LStall && !RStall) then
|
||||
// it's height is already covered so only its depth matters. We also reach
|
||||
// this if both stall but have the same height.
|
||||
unsigned LDepth = left->getDepth();
|
||||
unsigned RDepth = right->getDepth();
|
||||
if (LDepth != RDepth) {
|
||||
DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
|
||||
<< ") depth " << LDepth << " vs SU (" << right->NodeNum
|
||||
<< ") depth " << RDepth << "\n");
|
||||
return LDepth < RDepth ? 1 : -1;
|
||||
}
|
||||
}
|
||||
if (left->Latency != right->Latency)
|
||||
return left->Latency > right->Latency ? 1 : -1;
|
||||
}
|
||||
@ -2068,7 +2070,7 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
|
||||
if (LScratch != RScratch)
|
||||
return LScratch > RScratch;
|
||||
|
||||
if (EnableSchedCycles) {
|
||||
if (!DisableSchedCycles) {
|
||||
int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
|
||||
if (result != 0)
|
||||
return result > 0;
|
||||
|
@ -24,11 +24,16 @@
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
|
||||
#include "llvm/CodeGen/PseudoSourceValue.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool> DisableHazardRecognizer(
|
||||
"disable-sched-hazard", cl::Hidden, cl::init(false),
|
||||
cl::desc("Disable hazard detection during preRA scheduling"));
|
||||
|
||||
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
|
||||
/// after it, replacing it with an unconditional branch to NewDest.
|
||||
void
|
||||
@ -421,7 +426,13 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Default implementation of CreateTargetPreRAHazardRecognizer.
|
||||
// Provide a global flag for disabling the PreRA hazard recognizer that targets
|
||||
// may choose to honor.
|
||||
bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const {
|
||||
return !DisableHazardRecognizer;
|
||||
}
|
||||
|
||||
// Default implementation of CreateTargetRAHazardRecognizer.
|
||||
ScheduleHazardRecognizer *TargetInstrInfoImpl::
|
||||
CreateTargetHazardRecognizer(const TargetMachine *TM,
|
||||
const ScheduleDAG *DAG) const {
|
||||
|
@ -41,14 +41,6 @@ static cl::opt<bool>
|
||||
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
|
||||
cl::desc("Enable ARM 2-addr to 3-addr conv"));
|
||||
|
||||
// Other targets already have a hazard recognizer enabled by default, so this
|
||||
// flag currently only affects ARM. It will be generalized when it becomes a
|
||||
// disabled flag.
|
||||
static cl::opt<bool> EnableHazardRecognizer(
|
||||
"enable-sched-hazard", cl::Hidden,
|
||||
cl::desc("Enable hazard detection during preRA scheduling"),
|
||||
cl::init(false));
|
||||
|
||||
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
|
||||
struct ARM_MLxEntry {
|
||||
unsigned MLxOpc; // MLA / MLS opcode
|
||||
@ -97,7 +89,7 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
|
||||
ScheduleHazardRecognizer *ARMBaseInstrInfo::
|
||||
CreateTargetHazardRecognizer(const TargetMachine *TM,
|
||||
const ScheduleDAG *DAG) const {
|
||||
if (EnableHazardRecognizer) {
|
||||
if (usePreRAHazardRecognizer()) {
|
||||
const InstrItineraryData *II = TM->getInstrItineraryData();
|
||||
return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
|
||||
}
|
||||
@ -2173,6 +2165,10 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||
return 1;
|
||||
|
||||
const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode());
|
||||
|
||||
if (isZeroCost(DefTID.Opcode))
|
||||
return 0;
|
||||
|
||||
if (!ItinData || ItinData->isEmpty())
|
||||
return DefTID.mayLoad() ? 3 : 1;
|
||||
|
||||
|
@ -13,19 +13,19 @@ entry:
|
||||
ret float %retval
|
||||
}
|
||||
; VFP2: test1:
|
||||
; VFP2: vneg.f32 s1, s0
|
||||
; VFP2: vneg.f32 s{{.*}}, s{{.*}}
|
||||
|
||||
; NFP1: test1:
|
||||
; NFP1: vneg.f32 d1, d0
|
||||
; NFP1: vneg.f32 d{{.*}}, d{{.*}}
|
||||
|
||||
; NFP0: test1:
|
||||
; NFP0: vneg.f32 s1, s0
|
||||
; NFP0: vneg.f32 s{{.*}}, s{{.*}}
|
||||
|
||||
; CORTEXA8: test1:
|
||||
; CORTEXA8: vneg.f32 d1, d0
|
||||
; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}}
|
||||
|
||||
; CORTEXA9: test1:
|
||||
; CORTEXA9: vneg.f32 s1, s0
|
||||
; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}}
|
||||
|
||||
define float @test2(float* %a) {
|
||||
entry:
|
||||
@ -37,17 +37,17 @@ entry:
|
||||
ret float %retval
|
||||
}
|
||||
; VFP2: test2:
|
||||
; VFP2: vneg.f32 s1, s0
|
||||
; VFP2: vneg.f32 s{{.*}}, s{{.*}}
|
||||
|
||||
; NFP1: test2:
|
||||
; NFP1: vneg.f32 d1, d0
|
||||
; NFP1: vneg.f32 d{{.*}}, d{{.*}}
|
||||
|
||||
; NFP0: test2:
|
||||
; NFP0: vneg.f32 s1, s0
|
||||
; NFP0: vneg.f32 s{{.*}}, s{{.*}}
|
||||
|
||||
; CORTEXA8: test2:
|
||||
; CORTEXA8: vneg.f32 d1, d0
|
||||
; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}}
|
||||
|
||||
; CORTEXA9: test2:
|
||||
; CORTEXA9: vneg.f32 s1, s0
|
||||
; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}}
|
||||
|
||||
|
@ -11,7 +11,7 @@ entry:
|
||||
; NEON: vnmla.f32
|
||||
|
||||
; A8: t1:
|
||||
; A8: vnmul.f32 s0, s1, s0
|
||||
; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}}
|
||||
; A8: vsub.f32 d0, d0, d1
|
||||
%0 = fmul float %a, %b
|
||||
%1 = fsub float -0.0, %0
|
||||
@ -28,7 +28,7 @@ entry:
|
||||
; NEON: vnmla.f32
|
||||
|
||||
; A8: t2:
|
||||
; A8: vnmul.f32 s0, s1, s0
|
||||
; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}}
|
||||
; A8: vsub.f32 d0, d0, d1
|
||||
%0 = fmul float %a, %b
|
||||
%1 = fmul float -1.0, %0
|
||||
@ -45,7 +45,7 @@ entry:
|
||||
; NEON: vnmla.f64
|
||||
|
||||
; A8: t3:
|
||||
; A8: vnmul.f64 d16, d16, d17
|
||||
; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}}
|
||||
; A8: vsub.f64 d16, d16, d17
|
||||
%0 = fmul double %a, %b
|
||||
%1 = fsub double -0.0, %0
|
||||
@ -62,7 +62,7 @@ entry:
|
||||
; NEON: vnmla.f64
|
||||
|
||||
; A8: t4:
|
||||
; A8: vnmul.f64 d16, d16, d17
|
||||
; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}}
|
||||
; A8: vsub.f64 d16, d16, d17
|
||||
%0 = fmul double %a, %b
|
||||
%1 = fmul double -1.0, %0
|
||||
|
@ -3,7 +3,7 @@
|
||||
define float @t1(float %x) nounwind readnone optsize {
|
||||
entry:
|
||||
; CHECK: t1:
|
||||
; CHECK: vmov.f32 s1, #4.000000e+00
|
||||
; CHECK: vmov.f32 s{{.*}}, #4.000000e+00
|
||||
%0 = fadd float %x, 4.000000e+00
|
||||
ret float %0
|
||||
}
|
||||
@ -27,7 +27,7 @@ entry:
|
||||
define float @t4(float %x) nounwind readnone optsize {
|
||||
entry:
|
||||
; CHECK: t4:
|
||||
; CHECK: vmov.f32 s1, #-2.400000e+01
|
||||
; CHECK: vmov.f32 s{{.*}}, #-2.400000e+01
|
||||
%0 = fmul float %x, -2.400000e+01
|
||||
ret float %0
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=GENERIC
|
||||
; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=GENERIC
|
||||
; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
|
||||
; RUN: llc < %s -mtriple=armv6-apple-darwin -arm-strict-align | FileCheck %s -check-prefix=GENERIC
|
||||
; RUN: llc < %s -mtriple=armv6-linux | FileCheck %s -check-prefix=GENERIC
|
||||
|
Loading…
x
Reference in New Issue
Block a user