mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-19 10:53:55 +00:00
[Hexagon] Handle instruction latency for 0 or 2 cycles
The Hexagon schedulers need to handle instructions with a latency of 0 or 2 more accurately. The problem, in v60, is that a dependence between two instructions with a 2 cycle latency can use a .cur version of the source to achieve a 0 cycle latency when the use is in the same packet. Any othe use, must be at least 2 packets later, or a stall occurs. In other words, the compiler does not want to schedule the dependent instructions 1 cycle later. To achieve this, the latency adjustment code allows only a single dependence to have a zero latency. All other instructions have the other value, which is typically 2 cycles. We use a heuristic to determine which instruction gets the 0 latency. The Hexagon machine scheduler was also changed to increase the cost associated with 0 latency dependences than can be scheduled in the same packet. Patch by Brendon Cahoon. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275625 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
35290cc53d
commit
1b5d21e1e4
@ -2515,6 +2515,28 @@ bool HexagonInstrInfo::isTC4x(const MachineInstr *MI) const {
|
||||
}
|
||||
|
||||
|
||||
// Schedule this ASAP.
|
||||
bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr *MI1,
|
||||
const MachineInstr *MI2) const {
|
||||
if (!MI1 || !MI2)
|
||||
return false;
|
||||
if (mayBeCurLoad(MI1)) {
|
||||
// if (result of SU is used in Next) return true;
|
||||
unsigned DstReg = MI1->getOperand(0).getReg();
|
||||
int N = MI2->getNumOperands();
|
||||
for (int I = 0; I < N; I++)
|
||||
if (MI2->getOperand(I).isReg() && DstReg == MI2->getOperand(I).getReg())
|
||||
return true;
|
||||
}
|
||||
if (mayBeNewStore(MI2))
|
||||
if (MI2->getOpcode() == Hexagon::V6_vS32b_pi)
|
||||
if (MI1->getOperand(0).isReg() && MI2->getOperand(3).isReg() &&
|
||||
MI1->getOperand(0).getReg() == MI2->getOperand(3).getReg())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr *MI) const {
|
||||
if (!MI)
|
||||
return false;
|
||||
@ -2839,6 +2861,16 @@ bool HexagonInstrInfo::isZeroExtendingLoad(const MachineInstr &MI) const {
|
||||
}
|
||||
|
||||
|
||||
// Add latency to instruction.
|
||||
bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr *MI1,
|
||||
const MachineInstr *MI2) const {
|
||||
if (isV60VectorInstruction(MI1) && isV60VectorInstruction(MI2))
|
||||
if (!isVecUsableNextPacket(MI1, MI2))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/// \brief Can these instructions execute at the same time in a bundle.
|
||||
bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First,
|
||||
const MachineInstr *Second) const {
|
||||
|
@ -314,6 +314,8 @@ public:
|
||||
bool isTC2(const MachineInstr *MI) const;
|
||||
bool isTC2Early(const MachineInstr *MI) const;
|
||||
bool isTC4x(const MachineInstr *MI) const;
|
||||
bool isToBeScheduledASAP(const MachineInstr *MI1,
|
||||
const MachineInstr *MI2) const;
|
||||
bool isV60VectorInstruction(const MachineInstr *MI) const;
|
||||
bool isValidAutoIncImm(const EVT VT, const int Offset) const;
|
||||
bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const;
|
||||
@ -323,6 +325,8 @@ public:
|
||||
const MachineInstr *ConsMI) const;
|
||||
bool isZeroExtendingLoad(const MachineInstr &MI) const;
|
||||
|
||||
bool addLatencyToSchedule(const MachineInstr *MI1,
|
||||
const MachineInstr *MI2) const;
|
||||
bool canExecuteInBundle(const MachineInstr *First,
|
||||
const MachineInstr *Second) const;
|
||||
bool hasEHLabel(const MachineBasicBlock *B) const;
|
||||
|
@ -609,6 +609,28 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
|
||||
auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
|
||||
auto &QII = *QST.getInstrInfo();
|
||||
|
||||
// Give preference to a zero latency instruction if the dependent
|
||||
// instruction is in the current packet.
|
||||
if (Q.getID() == TopQID) {
|
||||
for (const SDep &PI : SU->Preds) {
|
||||
if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
|
||||
PI.getLatency() == 0 &&
|
||||
Top.ResourceModel->isInPacket(PI.getSUnit())) {
|
||||
ResCount += PriorityTwo;
|
||||
DEBUG(if (verbose) dbgs() << "Z|");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (const SDep &SI : SU->Succs) {
|
||||
if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
|
||||
SI.getLatency() == 0 &&
|
||||
Bot.ResourceModel->isInPacket(SI.getSUnit())) {
|
||||
ResCount += PriorityTwo;
|
||||
DEBUG(if (verbose) dbgs() << "Z|");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Give less preference to an instruction that will cause a stall with
|
||||
// an instruction in the previous packet.
|
||||
if (QII.isV60VectorInstruction(Instr)) {
|
||||
|
@ -51,6 +51,16 @@ static cl::opt<bool> EnableHexagonHVX("enable-hexagon-hvx",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(false),
|
||||
cl::desc("Enable Hexagon Vector eXtensions"));
|
||||
|
||||
static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(false));
|
||||
|
||||
static cl::opt<bool> EnableDotCurSched("enable-cur-sched",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(true),
|
||||
cl::desc("Enable the scheduler to generate .cur"));
|
||||
|
||||
static cl::opt<bool> EnableVecFrwdSched("enable-evec-frwd-sched",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(true));
|
||||
|
||||
static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(false),
|
||||
cl::desc("Disable Hexagon MI Scheduling"));
|
||||
@ -185,3 +195,152 @@ bool HexagonSubtarget::enableSubRegLiveness() const {
|
||||
return EnableSubregLiveness;
|
||||
}
|
||||
|
||||
// This helper function is responsible for increasing the latency only.
|
||||
void HexagonSubtarget::updateLatency(MachineInstr *SrcInst,
|
||||
MachineInstr *DstInst, SDep &Dep) const {
|
||||
if (!hasV60TOps())
|
||||
return;
|
||||
|
||||
auto &QII = static_cast<const HexagonInstrInfo&>(*getInstrInfo());
|
||||
|
||||
if (EnableVecFrwdSched && QII.addLatencyToSchedule(SrcInst, DstInst)) {
|
||||
// Vec frwd scheduling.
|
||||
Dep.setLatency(Dep.getLatency() + 1);
|
||||
} else if (useBSBScheduling() &&
|
||||
QII.isLateInstrFeedsEarlyInstr(SrcInst, DstInst)) {
|
||||
// BSB scheduling.
|
||||
Dep.setLatency(Dep.getLatency() + 1);
|
||||
} else if (EnableTCLatencySched) {
|
||||
// TClass latency scheduling.
|
||||
// Check if SrcInst produces in 2C an operand of DstInst taken in stage 2B.
|
||||
if (QII.isTC1(SrcInst) || QII.isTC2(SrcInst))
|
||||
if (!QII.isTC1(DstInst) && !QII.isTC2(DstInst))
|
||||
Dep.setLatency(Dep.getLatency() + 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Return true if these are the best two instructions to schedule
|
||||
// together with a zero latency. Only one dependence should have a zero
|
||||
// latency. If there are multiple choices, choose the best, and change
|
||||
// ther others, if needed.
|
||||
bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst,
|
||||
const HexagonInstrInfo *TII) const {
|
||||
MachineInstr *SrcInst = Src->getInstr();
|
||||
MachineInstr *DstInst = Dst->getInstr();
|
||||
// Check if the instructions can be scheduled together.
|
||||
assert((TII->isToBeScheduledASAP(SrcInst, DstInst) ||
|
||||
TII->canExecuteInBundle(SrcInst, DstInst)) &&
|
||||
"Unable to schedule instructions together.");
|
||||
|
||||
if (SrcInst->isPHI() || DstInst->isPHI())
|
||||
return false;
|
||||
|
||||
// Look for the best candidate to schedule together. If there are
|
||||
// multiple choices, then the best candidate is the one with the
|
||||
// greatest height, i.e., longest critical path.
|
||||
SUnit *Best = Dst;
|
||||
SUnit *PrevBest = nullptr;
|
||||
for (const SDep &SI : Src->Succs) {
|
||||
if (!SI.isAssignedRegDep())
|
||||
continue;
|
||||
if (SI.getLatency() == 0)
|
||||
PrevBest = SI.getSUnit();
|
||||
MachineInstr *Inst = SI.getSUnit()->getInstr();
|
||||
if (!TII->isToBeScheduledASAP(SrcInst, Inst) ||
|
||||
!TII->canExecuteInBundle(SrcInst, Inst))
|
||||
continue;
|
||||
if (SI.getSUnit()->getHeight() > Best->getHeight())
|
||||
Best = SI.getSUnit();
|
||||
}
|
||||
|
||||
// Reassign the latency for the previous best, which requires setting
|
||||
// the dependence edge in both directions.
|
||||
if (Best != PrevBest) {
|
||||
for (SDep &SI : Src->Succs) {
|
||||
if (SI.getSUnit() != PrevBest)
|
||||
continue;
|
||||
SI.setLatency(1);
|
||||
updateLatency(SrcInst, DstInst, SI);
|
||||
// Update the latency of the predecessor edge too.
|
||||
for (SDep &PI : PrevBest->Preds) {
|
||||
if (PI.getSUnit() != Src || !PI.isAssignedRegDep())
|
||||
continue;
|
||||
PI.setLatency(1);
|
||||
updateLatency(SrcInst, DstInst, PI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Best == Dst;
|
||||
}
|
||||
|
||||
// Update the latency of a Phi when the Phi bridges two instructions that
|
||||
// require a multi-cycle latency.
|
||||
void HexagonSubtarget::changePhiLatency(MachineInstr *SrcInst, SUnit *Dst,
|
||||
SDep &Dep) const {
|
||||
if (!SrcInst->isPHI() || Dst->NumPreds == 0 || Dep.getLatency() != 0)
|
||||
return;
|
||||
|
||||
for (const SDep &PI : Dst->Preds) {
|
||||
if (PI.getLatency() != 0)
|
||||
continue;
|
||||
Dep.setLatency(2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Perform target specific adjustments to the latency of a schedule
|
||||
/// dependency.
|
||||
void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
|
||||
SDep &Dep) const {
|
||||
MachineInstr *SrcInst = Src->getInstr();
|
||||
MachineInstr *DstInst = Dst->getInstr();
|
||||
if (!Src->isInstr() || !Dst->isInstr())
|
||||
return;
|
||||
|
||||
const HexagonInstrInfo *QII = static_cast<const HexagonInstrInfo *>(getInstrInfo());
|
||||
|
||||
// Instructions with .new operands have zero latency.
|
||||
if (QII->canExecuteInBundle(SrcInst, DstInst) &&
|
||||
isBestZeroLatency(Src, Dst, QII)) {
|
||||
Dep.setLatency(0);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!hasV60TOps())
|
||||
return;
|
||||
|
||||
// Don't adjust the latency of post-increment part of the instruction.
|
||||
if (QII->isPostIncrement(SrcInst) && Dep.isAssignedRegDep()) {
|
||||
if (SrcInst->mayStore())
|
||||
return;
|
||||
if (Dep.getReg() != SrcInst->getOperand(0).getReg())
|
||||
return;
|
||||
} else if (QII->isPostIncrement(DstInst) && Dep.getKind() == SDep::Anti) {
|
||||
if (DstInst->mayStore())
|
||||
return;
|
||||
if (Dep.getReg() != DstInst->getOperand(0).getReg())
|
||||
return;
|
||||
} else if (QII->isPostIncrement(DstInst) && DstInst->mayStore() &&
|
||||
Dep.isAssignedRegDep()) {
|
||||
MachineOperand &Op = DstInst->getOperand(DstInst->getNumOperands() - 1);
|
||||
if (Op.isReg() && Dep.getReg() != Op.getReg())
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if we need to change any the latency values when Phis are added.
|
||||
if (useBSBScheduling() && SrcInst->isPHI()) {
|
||||
changePhiLatency(SrcInst, Dst, Dep);
|
||||
return;
|
||||
}
|
||||
|
||||
// Try to schedule uses near definitions to generate .cur.
|
||||
if (EnableDotCurSched && QII->isToBeScheduledASAP(SrcInst, DstInst) &&
|
||||
isBestZeroLatency(Src, Dst, QII)) {
|
||||
Dep.setLatency(0);
|
||||
return;
|
||||
}
|
||||
|
||||
updateLatency(SrcInst, DstInst, Dep);
|
||||
}
|
||||
|
||||
|
@ -127,6 +127,16 @@ public:
|
||||
void getPostRAMutations(
|
||||
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
|
||||
const override;
|
||||
|
||||
/// \brief Perform target specific adjustments to the latency of a schedule
|
||||
/// dependency.
|
||||
void adjustSchedDependency(SUnit *def, SUnit *use, SDep& dep) const override;
|
||||
|
||||
private:
|
||||
// Helper function responsible for increasing the latency only.
|
||||
void updateLatency(MachineInstr *SrcInst, MachineInstr *DstInst, SDep &Dep) const;
|
||||
bool isBestZeroLatency(SUnit *Src, SUnit *Dst, const HexagonInstrInfo *TII) const;
|
||||
void changePhiLatency(MachineInstr *SrcInst, SUnit *Dst, SDep &Dep) const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
Loading…
x
Reference in New Issue
Block a user