From 81a682a4c004b0f44452ef824637a1b6face178f Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Thu, 23 Feb 2012 01:52:38 +0000 Subject: [PATCH] PostRASched: Convert physreg def/use tracking to Jakob's SparseSet. Added array subscript to SparseSet for convenience. Slight reorg to make it easier to manage the def/use sets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151228 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/SparseSet.h | 7 ++ lib/CodeGen/ScheduleDAGInstrs.cpp | 172 ++++++++++++++++-------------- lib/CodeGen/ScheduleDAGInstrs.h | 24 +++-- 3 files changed, 115 insertions(+), 88 deletions(-) diff --git a/include/llvm/ADT/SparseSet.h b/include/llvm/ADT/SparseSet.h index 7801d8d9bd1..9b276d55331 100644 --- a/include/llvm/ADT/SparseSet.h +++ b/include/llvm/ADT/SparseSet.h @@ -213,6 +213,13 @@ public: return std::make_pair(end() - 1, true); } + /// array subscript - If an element already exists with this key, return it. + /// Otherwise, automatically construct a new value from Key, insert it, + /// and return the newly inserted element. + ValueT &operator[](unsigned Key) { + return *insert(ValueT(Key)).first; + } + /// erase - Erases an existing element identified by a valid iterator. /// /// This invalidates all iterators, but erase() returns an iterator pointing diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 5c1247f987a..29e4df670c3 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -39,9 +39,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), InstrItins(mf.getTarget().getInstrItineraryData()), IsPostRA(IsPostRAFlag), - LIS(lis), UnitLatencies(false), - Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), - LoopRegs(MLI, MDT), FirstDbgValue(0) { + LIS(lis), UnitLatencies(false), LoopRegs(MLI, MDT), FirstDbgValue(0) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && @@ -173,7 +171,7 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() { if (Reg == 0) continue; if (TRI->isPhysicalRegister(Reg)) - Uses[Reg].push_back(&ExitSU); + Uses[Reg].SUnits.push_back(&ExitSU); else assert(!IsPostRA && "Virtual register encountered after regalloc."); } @@ -187,22 +185,68 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() { E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; if (Seen.insert(Reg)) - Uses[Reg].push_back(&ExitSU); + Uses[Reg].SUnits.push_back(&ExitSU); } } } +/// MO is an operand of SU's instruction that defines a physical register. Add +/// data dependencies from SU to any uses of the physical register. +void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, + const MachineOperand &MO) { + assert(MO.isDef() && "expect physreg def"); + + // Ask the target if address-backscheduling is desirable, and if so how much. + const TargetSubtargetInfo &ST = TM.getSubtarget(); + unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); + unsigned DataLatency = SU->Latency; + + for (const unsigned *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { + Reg2SUnitsMap::iterator UsesI = Uses.find(*Alias); + if (UsesI == Uses.end()) + continue; + std::vector &UseList = UsesI->SUnits; + for (unsigned i = 0, e = UseList.size(); i != e; ++i) { + SUnit *UseSU = UseList[i]; + if (UseSU == SU) + continue; + unsigned LDataLatency = DataLatency; + // Optionally add in a special extra latency for nodes that + // feed addresses. + // TODO: Perhaps we should get rid of + // SpecialAddressLatency and just move this into + // adjustSchedDependency for the targets that care about it. + if (SpecialAddressLatency != 0 && !UnitLatencies && + UseSU != &ExitSU) { + MachineInstr *UseMI = UseSU->getInstr(); + const MCInstrDesc &UseMCID = UseMI->getDesc(); + int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); + assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); + if (RegUseIndex >= 0 && + (UseMI->mayLoad() || UseMI->mayStore()) && + (unsigned)RegUseIndex < UseMCID.getNumOperands() && + UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) + LDataLatency += SpecialAddressLatency; + } + // Adjust the dependence latency using operand def/use + // information (if any), and then allow the target to + // perform its own adjustments. + const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); + if (!UnitLatencies) { + ComputeOperandLatency(SU, UseSU, const_cast(dep)); + ST.adjustSchedDependency(SU, UseSU, const_cast(dep)); + } + UseSU->addPred(dep); + } + } +} + /// addPhysRegDeps - Add register dependencies (data, anti, and output) from /// this SUnit to following instructions in the same scheduling region that /// depend the physical register referenced at OperIdx. void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { const MachineInstr *MI = SU->getInstr(); const MachineOperand &MO = MI->getOperand(OperIdx); - unsigned Reg = MO.getReg(); - - // Ask the target if address-backscheduling is desirable, and if so how much. - const TargetSubtargetInfo &ST = TM.getSubtarget(); - unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); // Optionally add output and anti dependencies. For anti // dependencies we use a latency of 0 because for a multi-issue @@ -211,8 +255,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // TODO: Using a latency of 1 here for output dependencies assumes // there's no cost for reusing registers. SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; - for (const unsigned *Alias = TRI->getOverlaps(Reg); *Alias; ++Alias) { - std::vector &DefList = Defs[*Alias]; + for (const unsigned *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { + Reg2SUnitsMap::iterator DefI = Defs.find(*Alias); + if (DefI == Defs.end()) + continue; + std::vector &DefList = DefI->SUnits; for (unsigned i = 0, e = DefList.size(); i != e; ++i) { SUnit *DefSU = DefList[i]; if (DefSU == &ExitSU) @@ -231,73 +278,32 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { } } - // Retrieve the UseList to add data dependencies and update uses. - std::vector &UseList = Uses[Reg]; - if (MO.isDef()) { - // Update DefList. Defs are pushed in the order they are visited and - // never reordered. - std::vector &DefList = Defs[Reg]; + if (!MO.isDef()) { + // Either insert a new Reg2SUnits entry with an empty SUnits list, or + // retrieve the existing SUnits list for this register's uses. + // Push this SUnit on the use list. + Uses[MO.getReg()].SUnits.push_back(SU); + } + else { + addPhysRegDataDeps(SU, MO); - // Add any data dependencies. - unsigned DataLatency = SU->Latency; - for (unsigned i = 0, e = UseList.size(); i != e; ++i) { - SUnit *UseSU = UseList[i]; - if (UseSU == SU) - continue; - unsigned LDataLatency = DataLatency; - // Optionally add in a special extra latency for nodes that - // feed addresses. - // TODO: Do this for register aliases too. - // TODO: Perhaps we should get rid of - // SpecialAddressLatency and just move this into - // adjustSchedDependency for the targets that care about it. - if (SpecialAddressLatency != 0 && !UnitLatencies && - UseSU != &ExitSU) { - MachineInstr *UseMI = UseSU->getInstr(); - const MCInstrDesc &UseMCID = UseMI->getDesc(); - int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); - assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); - if (RegUseIndex >= 0 && - (UseMI->mayLoad() || UseMI->mayStore()) && - (unsigned)RegUseIndex < UseMCID.getNumOperands() && - UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) - LDataLatency += SpecialAddressLatency; - } - // Adjust the dependence latency using operand def/use - // information (if any), and then allow the target to - // perform its own adjustments. - const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); - if (!UnitLatencies) { - ComputeOperandLatency(SU, UseSU, const_cast(dep)); - ST.adjustSchedDependency(SU, UseSU, const_cast(dep)); - } - UseSU->addPred(dep); - } - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - std::vector &UseList = Uses[*Alias]; - for (unsigned i = 0, e = UseList.size(); i != e; ++i) { - SUnit *UseSU = UseList[i]; - if (UseSU == SU) - continue; - const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); - if (!UnitLatencies) { - ComputeOperandLatency(SU, UseSU, const_cast(dep)); - ST.adjustSchedDependency(SU, UseSU, const_cast(dep)); - } - UseSU->addPred(dep); - } - } + // Either insert a new Reg2SUnits entry with an empty SUnits list, or + // retrieve the existing SUnits list for this register's defs. + std::vector &DefList = Defs[MO.getReg()].SUnits; // If a def is going to wrap back around to the top of the loop, // backschedule it. if (!UnitLatencies && DefList.empty()) { - LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg); + LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg()); if (I != LoopRegs.Deps.end()) { const MachineOperand *UseMO = I->second.first; unsigned Count = I->second.second; const MachineInstr *UseMI = UseMO->getParent(); unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); const MCInstrDesc &UseMCID = UseMI->getDesc(); + const TargetSubtargetInfo &ST = + TM.getSubtarget(); + unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); // TODO: If we knew the total depth of the region here, we could // handle the case where the whole loop is inside the region but // is large enough that the isScheduleHigh trick isn't needed. @@ -332,7 +338,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { } } - UseList.clear(); + // clear this register's use list + Reg2SUnitsMap::iterator UsesI = Uses.find(MO.getReg()); + if (UsesI != Uses.end()) + UsesI->SUnits.clear(); + if (!MO.isDead()) DefList.clear(); @@ -345,9 +355,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { while (!DefList.empty() && DefList.back()->isCall) DefList.pop_back(); } + // Defs are pushed in the order they are visited and never reordered. DefList.push_back(SU); - } else { - UseList.push_back(SU); } } @@ -482,13 +491,10 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { DbgValues.clear(); FirstDbgValue = NULL; - // Model data dependencies between instructions being scheduled and the - // ExitSU. - AddSchedBarrierDeps(); - - for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { - assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs"); - } + assert(Defs.empty() && Uses.empty() && + "Only BuildGraph should update Defs/Uses"); + Defs.setUniverse(TRI->getNumRegs()); + Uses.setUniverse(TRI->getNumRegs()); assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); // FIXME: Allow SparseSet to reserve space for the creation of virtual @@ -496,6 +502,10 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // because we want to assert that vregs are not created during DAG building. VRegDefs.setUniverse(MRI.getNumVirtRegs()); + // Model data dependencies between instructions being scheduled and the + // ExitSU. + AddSchedBarrierDeps(); + // Walk the list of instructions, from bottom moving up. MachineInstr *PrevMI = NULL; for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; @@ -685,10 +695,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { if (PrevMI) FirstDbgValue = PrevMI; - for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { - Defs[i].clear(); - Uses[i].clear(); - } + Defs.clear(); + Uses.clear(); VRegDefs.clear(); PendingLoads.clear(); MISUnitMap.clear(); diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index 3b450b0de90..ea61f2f6617 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -118,12 +118,15 @@ namespace llvm { /// the def-side latency only. bool UnitLatencies; - /// Defs, Uses - Remember where defs and uses of each register are as we - /// iterate upward through the instructions. This is allocated here instead - /// of inside BuildSchedGraph to avoid the need for it to be initialized and - /// destructed for each block. - std::vector > Defs; - std::vector > Uses; + /// An individual mapping from physical register number to an SUnit vector. + struct Reg2SUnits { + unsigned PhysReg; + std::vector SUnits; + + explicit Reg2SUnits(unsigned reg): PhysReg(reg) {} + + unsigned getSparseSetKey() const { return PhysReg; } + }; /// An individual mapping from virtual register number to SUnit. struct VReg2SUnit { @@ -139,8 +142,16 @@ namespace llvm { // Use SparseSet as a SparseMap by relying on the fact that it never // compares ValueT's, only unsigned keys. This allows the set to be cleared // between scheduling regions in constant time. + typedef SparseSet Reg2SUnitsMap; typedef SparseSet VReg2SUnitMap; + /// Defs, Uses - Remember where defs and uses of each register are as we + /// iterate upward through the instructions. This is allocated here instead + /// of inside BuildSchedGraph to avoid the need for it to be initialized and + /// destructed for each block. + Reg2SUnitsMap Defs; + Reg2SUnitsMap Uses; + // Track the last instructon in this region defining each virtual register. VReg2SUnitMap VRegDefs; @@ -247,6 +258,7 @@ namespace llvm { } void initSUnits(); + void addPhysRegDataDeps(SUnit *SU, const MachineOperand &MO); void addPhysRegDeps(SUnit *SU, unsigned OperIdx); void addVRegDefDeps(SUnit *SU, unsigned OperIdx); void addVRegUseDeps(SUnit *SU, unsigned OperIdx);