mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-23 11:49:50 +00:00
Improve machine schedulers for in-order processors
This patch enables schedulers to specify instructions that cannot be issued with any other instructions. It also fixes BeginGroup/EndGroup. Reviewed by: Andrew Trick Differential Revision: https://reviews.llvm.org/D30744 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298885 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3903b47a90
commit
47652291c2
@ -91,6 +91,13 @@ public:
|
||||
/// \brief Maximum number of micro-ops that may be scheduled per cycle.
|
||||
unsigned getIssueWidth() const { return SchedModel.IssueWidth; }
|
||||
|
||||
/// \brief Return true if new group must begin.
|
||||
bool mustBeginGroup(const MachineInstr *MI,
|
||||
const MCSchedClassDesc *SC = nullptr) const;
|
||||
/// \brief Return true if current group must end.
|
||||
bool mustEndGroup(const MachineInstr *MI,
|
||||
const MCSchedClassDesc *SC = nullptr) const;
|
||||
|
||||
/// \brief Return the number of issue slots required for this MI.
|
||||
unsigned getNumMicroOps(const MachineInstr *MI,
|
||||
const MCSchedClassDesc *SC = nullptr) const;
|
||||
@ -176,6 +183,7 @@ public:
|
||||
bool UseDefaultDefLatency = true) const;
|
||||
unsigned computeInstrLatency(unsigned Opcode) const;
|
||||
|
||||
|
||||
/// \brief Output dependency latency of a pair of defs of the same register.
|
||||
///
|
||||
/// This is typically one cycle.
|
||||
|
@ -255,6 +255,9 @@ class ProcWriteResources<list<ProcResourceKind> resources> {
|
||||
// Allow a processor to mark some scheduling classes as unsupported
|
||||
// for stronger verification.
|
||||
bit Unsupported = 0;
|
||||
// Allow a processor to mark some scheduling classes as single-issue.
|
||||
// SingleIssue is an alias for Begin/End Group.
|
||||
bit SingleIssue = 0;
|
||||
SchedMachineModel SchedModel = ?;
|
||||
}
|
||||
|
||||
|
@ -1173,6 +1173,12 @@ void ScheduleDAGMILive::schedule() {
|
||||
dbgs() << " Pressure Diff : ";
|
||||
getPressureDiff(&SU).dump(*TRI);
|
||||
}
|
||||
dbgs() << " Single Issue : ";
|
||||
if (SchedModel.mustBeginGroup(SU.getInstr()) &&
|
||||
SchedModel.mustEndGroup(SU.getInstr()))
|
||||
dbgs() << "true;";
|
||||
else
|
||||
dbgs() << "false;";
|
||||
dbgs() << '\n';
|
||||
}
|
||||
if (ExitSU.getInstr() != nullptr)
|
||||
@ -1910,12 +1916,22 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
|
||||
&& HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
|
||||
if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
|
||||
DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
|
||||
<< SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
|
||||
return true;
|
||||
}
|
||||
|
||||
if (CurrMOps > 0 &&
|
||||
((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
|
||||
(!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
|
||||
DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
|
||||
<< (isTop()? "begin" : "end") << " group\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
|
||||
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
|
||||
for (TargetSchedModel::ProcResIter
|
||||
@ -2211,6 +2227,18 @@ void SchedBoundary::bumpNode(SUnit *SU) {
|
||||
// one cycle. Since we commonly reach the max MOps here, opportunistically
|
||||
// bump the cycle to avoid uselessly checking everything in the readyQ.
|
||||
CurrMOps += IncMOps;
|
||||
|
||||
// Bump the cycle count for issue group constraints.
|
||||
// This must be done after NextCycle has been adjust for all other stalls.
|
||||
// Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
|
||||
// currCycle to X.
|
||||
if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
|
||||
(!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
|
||||
DEBUG(dbgs() << " Bump cycle to "
|
||||
<< (isTop() ? "end" : "begin") << " group\n");
|
||||
bumpCycle(++NextCycle);
|
||||
}
|
||||
|
||||
while (CurrMOps >= SchedModel->getIssueWidth()) {
|
||||
DEBUG(dbgs() << " *** Max MOps " << CurrMOps
|
||||
<< " at cycle " << CurrCycle << '\n');
|
||||
|
@ -84,6 +84,29 @@ void TargetSchedModel::init(const MCSchedModel &sm,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true only if instruction is specified as single issue.
|
||||
bool TargetSchedModel::mustBeginGroup(const MachineInstr *MI,
|
||||
const MCSchedClassDesc *SC) const {
|
||||
if (hasInstrSchedModel()) {
|
||||
if (!SC)
|
||||
SC = resolveSchedClass(MI);
|
||||
if (SC->isValid())
|
||||
return SC->BeginGroup;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool TargetSchedModel::mustEndGroup(const MachineInstr *MI,
|
||||
const MCSchedClassDesc *SC) const {
|
||||
if (hasInstrSchedModel()) {
|
||||
if (!SC)
|
||||
SC = resolveSchedClass(MI);
|
||||
if (SC->isValid())
|
||||
return SC->EndGroup;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
|
||||
const MCSchedClassDesc *SC) const {
|
||||
if (hasInstrItineraries()) {
|
||||
|
@ -74,7 +74,7 @@ def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
|
||||
|
||||
// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
|
||||
def : WriteRes<WriteDIV, [R52UnitDiv]> {
|
||||
let Latency = 8; let ResourceCycles = [8]; // not pipelined
|
||||
let Latency = 8; let ResourceCycles = [8]; // non-pipelined
|
||||
}
|
||||
|
||||
// Branches - LR written in Late EX2
|
||||
@ -717,16 +717,19 @@ def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> {
|
||||
let Latency = 6;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [2];
|
||||
let SingleIssue = 1;
|
||||
}
|
||||
def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 5;
|
||||
let ResourceCycles = [3];
|
||||
let SingleIssue = 1;
|
||||
}
|
||||
def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 7;
|
||||
let ResourceCycles = [4];
|
||||
let SingleIssue = 1;
|
||||
}
|
||||
def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
|
||||
let Latency = 5;
|
||||
|
86
test/CodeGen/ARM/single-issue-r52.mir
Normal file
86
test/CodeGen/ARM/single-issue-r52.mir
Normal file
@ -0,0 +1,86 @@
|
||||
# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=misched -misched-topdown 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=TOPDOWN
|
||||
# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=misched -misched-bottomup 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOTTOMUP
|
||||
# REQUIRES: asserts
|
||||
--- |
|
||||
; ModuleID = 'foo.ll'
|
||||
source_filename = "foo.ll"
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "arm---eabi"
|
||||
|
||||
%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
|
||||
; Function Attrs: nounwind
|
||||
define <8 x i8> @foo(i8* %A) {
|
||||
%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 8)
|
||||
%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
|
||||
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 1
|
||||
%tmp4 = add <8 x i8> %tmp2, %tmp3
|
||||
ret <8 x i8> %tmp4
|
||||
}
|
||||
declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8*, i32)
|
||||
|
||||
# CHECK: ********** MI Scheduling **********
|
||||
# CHECK: ScheduleDAGMILive::schedule starting
|
||||
# CHECK: SU(1): %vreg1<def> = VLD4d8Pseudo %vreg0, 8, pred:14, pred:%noreg; mem:LD32[%A](align=8) QQPR:%vreg1 GPR:%vreg0
|
||||
# CHECK: Latency : 8
|
||||
# CHECK: Single Issue : true;
|
||||
# CHECK: SU(2): %vreg4<def> = VADDv8i8 %vreg1:dsub_0, %vreg1:dsub_1, pred:14, pred:%noreg; DPR:%vreg4 QQPR:%vreg1
|
||||
# CHECK: Latency : 5
|
||||
# CHECK: Single Issue : false;
|
||||
# CHECK: SU(3): %vreg5<def>, %vreg6<def> = VMOVRRD %vreg4, pred:14, pred:%noreg; GPR:%vreg5,%vreg6 DPR:%vreg4
|
||||
# CHECK: Latency : 4
|
||||
# CHECK: Single Issue : false;
|
||||
|
||||
# TOPDOWN: Scheduling SU(1) %vreg1<def> = VLD4d8Pseudo
|
||||
# TOPDOWN: Bump cycle to end group
|
||||
# TOPDOWN: Scheduling SU(2) %vreg4<def> = VADDv8i8
|
||||
|
||||
# BOTTOMUP: Scheduling SU(2) %vreg4<def> = VADDv8i8
|
||||
# BOTTOMUP: Scheduling SU(1) %vreg1<def> = VLD4d8Pseudo
|
||||
# BOTTOMUP: Bump cycle to begin group
|
||||
|
||||
...
|
||||
---
|
||||
name: foo
|
||||
alignment: 2
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gpr }
|
||||
- { id: 1, class: qqpr }
|
||||
- { id: 2, class: dpr }
|
||||
- { id: 3, class: dpr }
|
||||
- { id: 4, class: dpr }
|
||||
- { id: 5, class: gpr }
|
||||
- { id: 6, class: gpr }
|
||||
liveins:
|
||||
- { reg: '%r0', virtual-reg: '%0' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
maxCallFrameSize: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: %r0
|
||||
|
||||
%0 = COPY %r0
|
||||
%1 = VLD4d8Pseudo %0, 8, 14, _ :: (load 32 from %ir.A, align 8)
|
||||
%4 = VADDv8i8 %1.dsub_0, %1.dsub_1, 14, _
|
||||
%5, %6 = VMOVRRD %4, 14, _
|
||||
%r0 = COPY %5
|
||||
%r1 = COPY %6
|
||||
BX_RET 14, _, implicit %r0, implicit killed %r1
|
||||
|
||||
...
|
@ -917,6 +917,8 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
|
||||
SCDesc.NumMicroOps += WriteRes->getValueAsInt("NumMicroOps");
|
||||
SCDesc.BeginGroup |= WriteRes->getValueAsBit("BeginGroup");
|
||||
SCDesc.EndGroup |= WriteRes->getValueAsBit("EndGroup");
|
||||
SCDesc.BeginGroup |= WriteRes->getValueAsBit("SingleIssue");
|
||||
SCDesc.EndGroup |= WriteRes->getValueAsBit("SingleIssue");
|
||||
|
||||
// Create an entry for each ProcResource listed in WriteRes.
|
||||
RecVec PRVec = WriteRes->getValueAsListOfDefs("ProcResources");
|
||||
|
Loading…
Reference in New Issue
Block a user