Improve machine schedulers for in-order processors

This patch enables schedulers to specify instructions that 
cannot be issued with any other instructions.
It also fixes BeginGroup/EndGroup.

Reviewed by: Andrew Trick
Differential Revision: https://reviews.llvm.org/D30744



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298885 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Javed Absar 2017-03-27 20:46:37 +00:00
parent 3903b47a90
commit 47652291c2
7 changed files with 154 additions and 1 deletions

View File

@ -91,6 +91,13 @@ public:
/// \brief Maximum number of micro-ops that may be scheduled per cycle.
unsigned getIssueWidth() const { return SchedModel.IssueWidth; }
/// \brief Return true if new group must begin.
bool mustBeginGroup(const MachineInstr *MI,
const MCSchedClassDesc *SC = nullptr) const;
/// \brief Return true if current group must end.
bool mustEndGroup(const MachineInstr *MI,
const MCSchedClassDesc *SC = nullptr) const;
/// \brief Return the number of issue slots required for this MI.
unsigned getNumMicroOps(const MachineInstr *MI,
const MCSchedClassDesc *SC = nullptr) const;
@ -176,6 +183,7 @@ public:
bool UseDefaultDefLatency = true) const;
unsigned computeInstrLatency(unsigned Opcode) const;
/// \brief Output dependency latency of a pair of defs of the same register.
///
/// This is typically one cycle.

View File

@ -255,6 +255,9 @@ class ProcWriteResources<list<ProcResourceKind> resources> {
// Allow a processor to mark some scheduling classes as unsupported
// for stronger verification.
bit Unsupported = 0;
// Allow a processor to mark some scheduling classes as single-issue.
// SingleIssue is an alias for Begin/End Group.
bit SingleIssue = 0;
SchedMachineModel SchedModel = ?;
}

View File

@ -1173,6 +1173,12 @@ void ScheduleDAGMILive::schedule() {
dbgs() << " Pressure Diff : ";
getPressureDiff(&SU).dump(*TRI);
}
dbgs() << " Single Issue : ";
if (SchedModel.mustBeginGroup(SU.getInstr()) &&
SchedModel.mustEndGroup(SU.getInstr()))
dbgs() << "true;";
else
dbgs() << "false;";
dbgs() << '\n';
}
if (ExitSU.getInstr() != nullptr)
@ -1910,12 +1916,22 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
&& HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
return true;
}
unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
<< SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
return true;
}
if (CurrMOps > 0 &&
((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
(!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
<< (isTop()? "begin" : "end") << " group\n");
return true;
}
if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
for (TargetSchedModel::ProcResIter
@ -2211,6 +2227,18 @@ void SchedBoundary::bumpNode(SUnit *SU) {
// one cycle. Since we commonly reach the max MOps here, opportunistically
// bump the cycle to avoid uselessly checking everything in the readyQ.
CurrMOps += IncMOps;
// Bump the cycle count for issue group constraints.
// This must be done after NextCycle has been adjust for all other stalls.
// Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
// currCycle to X.
if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
(!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
DEBUG(dbgs() << " Bump cycle to "
<< (isTop() ? "end" : "begin") << " group\n");
bumpCycle(++NextCycle);
}
while (CurrMOps >= SchedModel->getIssueWidth()) {
DEBUG(dbgs() << " *** Max MOps " << CurrMOps
<< " at cycle " << CurrCycle << '\n');

View File

@ -84,6 +84,29 @@ void TargetSchedModel::init(const MCSchedModel &sm,
}
}
/// Returns true only if instruction is specified as single issue.
bool TargetSchedModel::mustBeginGroup(const MachineInstr *MI,
const MCSchedClassDesc *SC) const {
if (hasInstrSchedModel()) {
if (!SC)
SC = resolveSchedClass(MI);
if (SC->isValid())
return SC->BeginGroup;
}
return false;
}
bool TargetSchedModel::mustEndGroup(const MachineInstr *MI,
const MCSchedClassDesc *SC) const {
if (hasInstrSchedModel()) {
if (!SC)
SC = resolveSchedClass(MI);
if (SC->isValid())
return SC->EndGroup;
}
return false;
}
unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
const MCSchedClassDesc *SC) const {
if (hasInstrItineraries()) {

View File

@ -74,7 +74,7 @@ def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
def : WriteRes<WriteDIV, [R52UnitDiv]> {
let Latency = 8; let ResourceCycles = [8]; // not pipelined
let Latency = 8; let ResourceCycles = [8]; // non-pipelined
}
// Branches - LR written in Late EX2
@ -717,16 +717,19 @@ def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 6;
let NumMicroOps = 3;
let ResourceCycles = [2];
let SingleIssue = 1;
}
def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 7;
let NumMicroOps = 5;
let ResourceCycles = [3];
let SingleIssue = 1;
}
def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 8;
let NumMicroOps = 7;
let ResourceCycles = [4];
let SingleIssue = 1;
}
def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 5;

View File

@ -0,0 +1,86 @@
# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=misched -misched-topdown 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=TOPDOWN
# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=misched -misched-bottomup 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOTTOMUP
# REQUIRES: asserts
--- |
; ModuleID = 'foo.ll'
source_filename = "foo.ll"
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "arm---eabi"
%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
; Function Attrs: nounwind
define <8 x i8> @foo(i8* %A) {
%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 8)
%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 1
%tmp4 = add <8 x i8> %tmp2, %tmp3
ret <8 x i8> %tmp4
}
declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8*, i32)
# CHECK: ********** MI Scheduling **********
# CHECK: ScheduleDAGMILive::schedule starting
# CHECK: SU(1): %vreg1<def> = VLD4d8Pseudo %vreg0, 8, pred:14, pred:%noreg; mem:LD32[%A](align=8) QQPR:%vreg1 GPR:%vreg0
# CHECK: Latency : 8
# CHECK: Single Issue : true;
# CHECK: SU(2): %vreg4<def> = VADDv8i8 %vreg1:dsub_0, %vreg1:dsub_1, pred:14, pred:%noreg; DPR:%vreg4 QQPR:%vreg1
# CHECK: Latency : 5
# CHECK: Single Issue : false;
# CHECK: SU(3): %vreg5<def>, %vreg6<def> = VMOVRRD %vreg4, pred:14, pred:%noreg; GPR:%vreg5,%vreg6 DPR:%vreg4
# CHECK: Latency : 4
# CHECK: Single Issue : false;
# TOPDOWN: Scheduling SU(1) %vreg1<def> = VLD4d8Pseudo
# TOPDOWN: Bump cycle to end group
# TOPDOWN: Scheduling SU(2) %vreg4<def> = VADDv8i8
# BOTTOMUP: Scheduling SU(2) %vreg4<def> = VADDv8i8
# BOTTOMUP: Scheduling SU(1) %vreg1<def> = VLD4d8Pseudo
# BOTTOMUP: Bump cycle to begin group
...
---
name: foo
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: gpr }
- { id: 1, class: qqpr }
- { id: 2, class: dpr }
- { id: 3, class: dpr }
- { id: 4, class: dpr }
- { id: 5, class: gpr }
- { id: 6, class: gpr }
liveins:
- { reg: '%r0', virtual-reg: '%0' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0 (%ir-block.0):
liveins: %r0
%0 = COPY %r0
%1 = VLD4d8Pseudo %0, 8, 14, _ :: (load 32 from %ir.A, align 8)
%4 = VADDv8i8 %1.dsub_0, %1.dsub_1, 14, _
%5, %6 = VMOVRRD %4, 14, _
%r0 = COPY %5
%r1 = COPY %6
BX_RET 14, _, implicit %r0, implicit killed %r1
...

View File

@ -917,6 +917,8 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
SCDesc.NumMicroOps += WriteRes->getValueAsInt("NumMicroOps");
SCDesc.BeginGroup |= WriteRes->getValueAsBit("BeginGroup");
SCDesc.EndGroup |= WriteRes->getValueAsBit("EndGroup");
SCDesc.BeginGroup |= WriteRes->getValueAsBit("SingleIssue");
SCDesc.EndGroup |= WriteRes->getValueAsBit("SingleIssue");
// Create an entry for each ProcResource listed in WriteRes.
RecVec PRVec = WriteRes->getValueAsListOfDefs("ProcResources");