From 020f4106f820648fd7e91956859844a80de13974 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 14 Dec 2011 20:00:08 +0000 Subject: [PATCH] Model ARM predicated write as read-mod-write. e.g. r0 = mov #0 r0 = moveq #1 Then the second instruction has an implicit data dependency on the first instruction. Sadly I have yet to come up with a small test case that demonstrate the post-ra scheduler taking advantage of this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@146583 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetInstrInfo.h | 5 +-- lib/CodeGen/ScheduleDAGInstrs.cpp | 4 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 55 ++++++++++++++++++++------- lib/Target/ARM/ARMBaseInstrInfo.h | 4 ++ 4 files changed, 49 insertions(+), 19 deletions(-) diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 957a89af820..84092297a62 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -652,9 +652,8 @@ public: /// a given pair of defs which both target the same register. This is usually /// one. virtual unsigned getOutputLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI1, - const MachineInstr *DefMI2, - unsigned Reg) const { + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *DepMI) const { return 1; } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 47c533932d6..4418f4023a3 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -281,8 +281,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { if (Kind == SDep::Anti) DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/Reg)); else { - unsigned AOLat = TII->getOutputLatency(InstrItins, MI, - DefSU->getInstr(), Reg); + unsigned AOLat = TII->getOutputLatency(InstrItins, MI, j, + DefSU->getInstr()); DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/Reg)); } } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 5ee2dc8a2dd..8bf5475fcd7 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2360,7 +2360,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, - const MachineInstr *MI, + const MachineInstr *MI, unsigned Reg, unsigned &DefIdx, unsigned &Dist) { Dist = 0; @@ -2370,7 +2370,6 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, assert(II->isInsideBundle() && "Empty bundle?"); int Idx = -1; - unsigned Reg = MI->getOperand(DefIdx).getReg(); while (II->isInsideBundle()) { Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); if (Idx != -1) @@ -2385,7 +2384,7 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, } static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, - const MachineInstr *MI, + const MachineInstr *MI, unsigned Reg, unsigned &UseIdx, unsigned &Dist) { Dist = 0; @@ -2395,7 +2394,6 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, // FIXME: This doesn't properly handle multiple uses. int Idx = -1; - unsigned Reg = MI->getOperand(UseIdx).getReg(); while (II != E && II->isInsideBundle()) { Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); if (Idx != -1) @@ -2405,7 +2403,11 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, ++II; } - assert(Idx != -1 && "Cannot find bundled definition!"); + if (Idx == -1) { + Dist = 0; + return 0; + } + UseIdx = Idx; return II; } @@ -2424,7 +2426,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MCInstrDesc *DefMCID = &DefMI->getDesc(); const MCInstrDesc *UseMCID = &UseMI->getDesc(); const MachineOperand &DefMO = DefMI->getOperand(DefIdx); - if (DefMO.getReg() == ARM::CPSR) { + unsigned Reg = DefMO.getReg(); + if (Reg == ARM::CPSR) { if (DefMI->getOpcode() == ARM::FMSTAT) { // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) return Subtarget.isCortexA9() ? 1 : 20; @@ -2436,11 +2439,16 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // Otherwise it takes the instruction latency (generally one). int Latency = getInstrLatency(ItinData, DefMI); - // For Thumb2, prefer scheduling CPSR setting instruction close to its uses. - // Instructions which are otherwise scheduled between them may incur a code - // size penalty (not able to use the CPSR setting 16-bit instructions). - if (Latency > 0 && Subtarget.isThumb2()) - --Latency; + + // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to + // its uses. Instructions which are otherwise scheduled between them may + // incur a code size penalty (not able to use the CPSR setting 16-bit + // instructions). + if (Latency > 0 && Subtarget.isThumb2()) { + const MachineFunction *MF = DefMI->getParent()->getParent(); + if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + --Latency; + } return Latency; } @@ -2451,7 +2459,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, unsigned DefAdj = 0; if (DefMI->isBundle()) { - DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, DefIdx, DefAdj); + DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj); if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || DefMI->isRegSequence() || DefMI->isImplicitDef()) return 1; @@ -2459,8 +2467,14 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } unsigned UseAdj = 0; if (UseMI->isBundle()) { - UseMI = getBundledUseMI(&getRegisterInfo(), UseMI, UseIdx, UseAdj); - UseMCID = &UseMI->getDesc(); + unsigned NewUseIdx; + const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, + Reg, NewUseIdx, UseAdj); + if (NewUseMI) { + UseMI = NewUseMI; + UseIdx = NewUseIdx; + UseMCID = &UseMI->getDesc(); + } } int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign, @@ -2797,6 +2811,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } +unsigned +ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *DepMI) const { + unsigned Reg = DefMI->getOperand(DefIdx).getReg(); + if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI)) + return 1; + + // If the second MI is predicated, then there is an implicit use dependency. + return getOperandLatency(ItinData, DefMI, DefIdx, DepMI, + DepMI->getNumOperands()); +} + int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 4ce74617629..68e8208eedd 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -210,6 +210,10 @@ public: SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const; + virtual unsigned getOutputLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *DepMI) const; + /// VFP/NEON execution domains. std::pair getExecutionDomain(const MachineInstr *MI) const;