From bb92764db48a84680706439ee7f00d43be61fdb0 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Mon, 10 Jun 2019 12:50:08 +0000 Subject: [PATCH] [MCA] Further refactor the bottleneck analysis view. NFCI. llvm-svn: 362933 --- lib/MCA/HardwareUnits/Scheduler.cpp | 3 +- tools/llvm-mca/Views/BottleneckAnalysis.cpp | 216 ++++++++++++-------- tools/llvm-mca/Views/BottleneckAnalysis.h | 67 +++--- tools/llvm-mca/llvm-mca.cpp | 2 +- 4 files changed, 175 insertions(+), 113 deletions(-) diff --git a/lib/MCA/HardwareUnits/Scheduler.cpp b/lib/MCA/HardwareUnits/Scheduler.cpp index c7091203595..0f0f2ffb832 100644 --- a/lib/MCA/HardwareUnits/Scheduler.cpp +++ b/lib/MCA/HardwareUnits/Scheduler.cpp @@ -198,7 +198,8 @@ InstRef Scheduler::select() { Strategy->compare(IR, ReadySet[QueueIndex])) { Instruction &IS = *IR.getInstruction(); uint64_t BusyResourceMask = Resources->checkAvailability(IS.getDesc()); - IS.setCriticalResourceMask(BusyResourceMask); + if (BusyResourceMask) + IS.setCriticalResourceMask(BusyResourceMask); BusyResourceUnits |= BusyResourceMask; if (!BusyResourceMask) QueueIndex = I; diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/tools/llvm-mca/Views/BottleneckAnalysis.cpp index cde896fbb5f..1c38cb90831 100644 --- a/tools/llvm-mca/Views/BottleneckAnalysis.cpp +++ b/tools/llvm-mca/Views/BottleneckAnalysis.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "Views/BottleneckAnalysis.h" +#include "llvm/MC/MCInst.h" #include "llvm/MCA/Support.h" #include "llvm/Support/Format.h" @@ -40,43 +41,38 @@ PressureTracker::PressureTracker(const MCSchedModel &Model) } ResourceUsers.resize(NextResourceUsersIdx); - std::fill(ResourceUsers.begin(), ResourceUsers.end(), ~0U); + std::fill(ResourceUsers.begin(), ResourceUsers.end(), + std::make_pair(~0U, 0U)); } -void PressureTracker::getUniqueUsers( - uint64_t ResourceMask, SmallVectorImpl &UniqueUsers) const { +void PressureTracker::getResourceUsers(uint64_t ResourceMask, + SmallVectorImpl &Users) const { unsigned Index = getResourceStateIndex(ResourceMask); unsigned ProcResID = ResIdx2ProcResID[Index]; const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { - unsigned From = getResourceUser(ProcResID, I); - if (find(UniqueUsers, From) == UniqueUsers.end()) - UniqueUsers.emplace_back(From); + const User U = getResourceUser(ProcResID, I); + if (U.second && IPI.find(U.first) != IPI.end()) + Users.emplace_back(U); } } -void PressureTracker::handleInstructionEvent(const HWInstructionEvent &Event) { +void PressureTracker::onInstructionDispatched(unsigned IID) { + IPI.insert(std::make_pair(IID, InstructionPressureInfo())); +} + +void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); } + +void PressureTracker::handleInstructionIssuedEvent( + const HWInstructionIssuedEvent &Event) { unsigned IID = Event.IR.getSourceIndex(); - switch (Event.Type) { - default: - break; - case HWInstructionEvent::Dispatched: - IPI.insert(std::make_pair(IID, InstructionPressureInfo())); - break; - case HWInstructionEvent::Executed: - IPI.erase(IID); - break; - case HWInstructionEvent::Issued: { - const auto &IIE = static_cast(Event); - using ResourceRef = HWInstructionIssuedEvent::ResourceRef; - using ResourceUse = std::pair; - for (const ResourceUse &Use : IIE.UsedResources) { - const ResourceRef &RR = Use.first; - unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; - Index += countTrailingZeros(RR.second); - ResourceUsers[Index] = IID; - } - } + using ResourceRef = HWInstructionIssuedEvent::ResourceRef; + using ResourceUse = std::pair; + for (const ResourceUse &Use : Event.UsedResources) { + const ResourceRef &RR = Use.first; + unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; + Index += countTrailingZeros(RR.second); + ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator()); } } @@ -125,7 +121,8 @@ void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { if (!BusyResources) continue; - IPI[IR.getSourceIndex()].ResourcePressureCycles++; + unsigned IID = IR.getSourceIndex(); + IPI[IID].ResourcePressureCycles++; } break; } @@ -146,51 +143,59 @@ void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { } #ifndef NDEBUG -void DependencyGraph::dumpRegDeps(raw_ostream &OS, MCInstPrinter &MCIP) const { +void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, unsigned FromIID, + const DependencyEdge &DE, + MCInstPrinter &MCIP) const { + bool LoopCarried = FromIID >= DE.IID; + OS << " FROM: " << FromIID << " TO: " << DE.IID + << (LoopCarried ? " (loop carried)" : " "); + if (DE.Type == DT_REGISTER) { + OS << " - REGISTER: "; + MCIP.printRegName(OS, DE.ResourceOrRegID); + } else if (DE.Type == DT_MEMORY) { + OS << " - MEMORY"; + } else { + assert(DE.Type == DT_RESOURCE && "Unexpected unsupported dependency type!"); + OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; + } + OS << " - CYCLES: " << DE.Cycles << '\n'; +} + +void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { OS << "\nREG DEPS\n"; for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { const DGNode &Node = Nodes[I]; - for (const DependencyEdge &DE : Node.RegDeps) { - bool LoopCarried = I >= DE.IID; - OS << " FROM: " << I << " TO: " << DE.IID - << (LoopCarried ? " (loop carried)" : " ") - << " - REGISTER: "; - MCIP.printRegName(OS, DE.ResourceOrRegID); - OS << " - CYCLES: " << DE.Cycles << '\n'; + for (const DependencyEdge &DE : Node.OutgoingEdges) { + if (DE.Type == DT_REGISTER) + dumpDependencyEdge(OS, I, DE, MCIP); } } -} -void DependencyGraph::dumpMemDeps(raw_ostream &OS) const { OS << "\nMEM DEPS\n"; for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { const DGNode &Node = Nodes[I]; - for (const DependencyEdge &DE : Node.MemDeps) { - bool LoopCarried = I >= DE.IID; - OS << " FROM: " << I << " TO: " << DE.IID - << (LoopCarried ? " (loop carried)" : " ") - << " - MEMORY - CYCLES: " << DE.Cycles << '\n'; + for (const DependencyEdge &DE : Node.OutgoingEdges) { + if (DE.Type == DT_MEMORY) + dumpDependencyEdge(OS, I, DE, MCIP); } } -} -void DependencyGraph::dumpResDeps(raw_ostream &OS) const { OS << "\nRESOURCE DEPS\n"; for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { const DGNode &Node = Nodes[I]; - for (const DependencyEdge &DE : Node.ResDeps) { - bool LoopCarried = I >= DE.IID; - OS << " FROM: " << I << " TO: " << DE.IID - << (LoopCarried ? "(loop carried)" : " ") - << " - RESOURCE MASK: " << DE.ResourceOrRegID; - OS << " - CYCLES: " << DE.Cycles << '\n'; + for (const DependencyEdge &DE : Node.OutgoingEdges) { + if (DE.Type == DT_RESOURCE) + dumpDependencyEdge(OS, I, DE, MCIP); } } } #endif // NDEBUG -void DependencyGraph::addDepImpl(SmallVectorImpl &Vec, - DependencyEdge &&Dep) { +void DependencyGraph::addDependency(unsigned From, DependencyEdge &&Dep) { + DGNode &NodeFrom = Nodes[From]; + DGNode &NodeTo = Nodes[Dep.IID]; + SmallVectorImpl &Vec = NodeFrom.OutgoingEdges; + auto It = find_if(Vec, [Dep](DependencyEdge &DE) { return DE.IID == Dep.IID && DE.ResourceOrRegID == Dep.ResourceOrRegID; }); @@ -201,38 +206,102 @@ void DependencyGraph::addDepImpl(SmallVectorImpl &Vec, } Vec.emplace_back(Dep); - Nodes[Dep.IID].NumPredecessors++; + NodeTo.NumPredecessors++; } BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, - ArrayRef Sequence) - : STI(sti), Tracker(STI.getSchedModel()), DG(Sequence.size()), - Source(Sequence), TotalCycles(0), - PressureIncreasedBecauseOfResources(false), + MCInstPrinter &Printer, + ArrayRef S) + : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3), + Source(S), TotalCycles(0), PressureIncreasedBecauseOfResources(false), PressureIncreasedBecauseOfRegisterDependencies(false), PressureIncreasedBecauseOfMemoryDependencies(false), SeenStallCycles(false), BPI() {} +void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, + unsigned RegID, unsigned Cy) { + bool IsLoopCarried = From >= To; + unsigned SourceSize = Source.size(); + if (IsLoopCarried) { + DG.addRegisterDep(From, To + SourceSize, RegID, Cy); + DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cy); + return; + } + DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cy); +} + +void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, unsigned Cy) { + bool IsLoopCarried = From >= To; + unsigned SourceSize = Source.size(); + if (IsLoopCarried) { + DG.addMemoryDep(From, To + SourceSize, Cy); + DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cy); + return; + } + DG.addMemoryDep(From + SourceSize, To + SourceSize, Cy); +} + +void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, + uint64_t Mask, unsigned Cy) { + bool IsLoopCarried = From >= To; + unsigned SourceSize = Source.size(); + if (IsLoopCarried) { + DG.addResourceDep(From, To + SourceSize, Mask, Cy); + DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cy); + return; + } + DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cy); +} + void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { - Tracker.handleInstructionEvent(Event); + const unsigned IID = Event.IR.getSourceIndex(); + if (Event.Type == HWInstructionEvent::Dispatched) { + Tracker.onInstructionDispatched(IID); + return; + } + if (Event.Type == HWInstructionEvent::Executed) { + Tracker.onInstructionExecuted(IID); + return; + } + if (Event.Type != HWInstructionEvent::Issued) return; - const unsigned IID = Event.IR.getSourceIndex(); const Instruction &IS = *Event.IR.getInstruction(); - unsigned Cycles = Tracker.getRegisterPressureCycles(IID); unsigned To = IID % Source.size(); + + unsigned Cycles = Tracker.getResourcePressureCycles(IID); + if (Cycles) { + uint64_t ResourceMask = IS.getCriticalResourceMask(); + SmallVector, 4> Users; + while (ResourceMask) { + uint64_t Current = ResourceMask & (-ResourceMask); + Tracker.getResourceUsers(Current, Users); + for (const std::pair &U : Users) { + unsigned Cost = std::min(U.second, Cycles); + addResourceDep(U.first % Source.size(), To, Current, Cost); + } + Users.clear(); + ResourceMask ^= Current; + } + } + + Cycles = Tracker.getRegisterPressureCycles(IID); if (Cycles) { const CriticalDependency &RegDep = IS.getCriticalRegDep(); unsigned From = RegDep.IID % Source.size(); - DG.addRegDep(From, To, RegDep.RegID, Cycles); + addRegisterDep(From, To, RegDep.RegID, Cycles); } + Cycles = Tracker.getMemoryPressureCycles(IID); if (Cycles) { const CriticalDependency &MemDep = IS.getCriticalMemDep(); unsigned From = MemDep.IID % Source.size(); - DG.addMemDep(From, To, Cycles); + addMemoryDep(From, To, Cycles); } + + Tracker.handleInstructionIssuedEvent( + static_cast(Event)); } void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { @@ -245,28 +314,9 @@ void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { default: break; - case HWPressureEvent::RESOURCES: { + case HWPressureEvent::RESOURCES: PressureIncreasedBecauseOfResources = true; - - SmallVector UniqueUsers; - for (const InstRef &IR : Event.AffectedInstructions) { - const Instruction &IS = *IR.getInstruction(); - unsigned To = IR.getSourceIndex() % Source.size(); - unsigned BusyResources = - IS.getCriticalResourceMask() & Event.ResourceMask; - while (BusyResources) { - uint64_t Current = BusyResources & (-BusyResources); - Tracker.getUniqueUsers(Current, UniqueUsers); - for (unsigned User : UniqueUsers) - DG.addResourceDep(User % Source.size(), To, Current, 1); - BusyResources ^= Current; - } - UniqueUsers.clear(); - } - break; - } - case HWPressureEvent::REGISTER_DEPS: PressureIncreasedBecauseOfRegisterDependencies = true; break; diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.h b/tools/llvm-mca/Views/BottleneckAnalysis.h index 4c4dc193e13..c208847fe9f 100644 --- a/tools/llvm-mca/Views/BottleneckAnalysis.h +++ b/tools/llvm-mca/Views/BottleneckAnalysis.h @@ -63,7 +63,8 @@ class PressureTracker { // There is one entry for every processor resource unit declared by the // processor model. An all_ones value is treated like an invalid instruction // identifier. - SmallVector ResourceUsers; + using User = std::pair; + SmallVector ResourceUsers; struct InstructionPressureInfo { unsigned RegisterPressureCycles; @@ -74,7 +75,7 @@ class PressureTracker { void updateResourcePressureDistribution(uint64_t CumulativeMask); - unsigned getResourceUser(unsigned ProcResID, unsigned UnitID) const { + User getResourceUser(unsigned ProcResID, unsigned UnitID) const { unsigned Index = ProcResID2ResourceUsersIndex[ProcResID]; return ResourceUsers[Index + UnitID]; } @@ -86,8 +87,8 @@ public: return ResourcePressureDistribution; } - void getUniqueUsers(uint64_t ResourceMask, - SmallVectorImpl &Users) const; + void getResourceUsers(uint64_t ResourceMask, + SmallVectorImpl &Users) const; unsigned getRegisterPressureCycles(unsigned IID) const { assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!"); @@ -107,12 +108,18 @@ public: return Info.ResourcePressureCycles; } + void onInstructionDispatched(unsigned IID); + void onInstructionExecuted(unsigned IID); + void handlePressureEvent(const HWPressureEvent &Event); - void handleInstructionEvent(const HWInstructionEvent &Event); + void handleInstructionIssuedEvent(const HWInstructionIssuedEvent &Event); }; class DependencyGraph { + enum DependencyType { DT_REGISTER, DT_MEMORY, DT_RESOURCE }; + struct DependencyEdge { + DependencyType Type; unsigned IID; uint64_t ResourceOrRegID; uint64_t Cycles; @@ -120,46 +127,44 @@ class DependencyGraph { struct DGNode { unsigned NumPredecessors; - SmallVector RegDeps; - SmallVector MemDeps; - SmallVector ResDeps; + SmallVector OutgoingEdges; }; SmallVector Nodes; - void addDepImpl(SmallVectorImpl &Vec, DependencyEdge &&DE); - DependencyGraph(const DependencyGraph &) = delete; DependencyGraph &operator=(const DependencyGraph &) = delete; -public: - DependencyGraph(unsigned NumNodes) : Nodes(NumNodes, DGNode()) {} + void addDependency(unsigned From, DependencyEdge &&DE); - void addRegDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy) { - addDepImpl(Nodes[From].RegDeps, {To, RegID, Cy}); +#ifndef NDEBUG + void dumpDependencyEdge(raw_ostream &OS, unsigned FromIID, + const DependencyEdge &DE, MCInstPrinter &MCIP) const; +#endif + +public: + DependencyGraph(unsigned Size) : Nodes(Size) {} + + void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy) { + addDependency(From, {DT_REGISTER, To, RegID, Cy}); } - void addMemDep(unsigned From, unsigned To, unsigned Cy) { - addDepImpl(Nodes[From].MemDeps, {To, /* unused */ 0, Cy}); + + void addMemoryDep(unsigned From, unsigned To, unsigned Cy) { + addDependency(From, {DT_MEMORY, To, /* unused */ 0, Cy}); } + void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy) { - addDepImpl(Nodes[From].ResDeps, {To, Mask, Cy}); + addDependency(From, {DT_RESOURCE, To, Mask, Cy}); } #ifndef NDEBUG - void dumpRegDeps(raw_ostream &OS, MCInstPrinter &MCIP) const; - void dumpMemDeps(raw_ostream &OS) const; - void dumpResDeps(raw_ostream &OS) const; - - void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { - dumpRegDeps(OS, MCIP); - dumpMemDeps(OS); - dumpResDeps(OS); - } + void dump(raw_ostream &OS, MCInstPrinter &MCIP) const; #endif }; /// A view that collects and prints a few performance numbers. class BottleneckAnalysis : public View { const MCSubtargetInfo &STI; + MCInstPrinter &MCIP; PressureTracker Tracker; DependencyGraph DG; @@ -189,8 +194,14 @@ class BottleneckAnalysis : public View { // Prints a bottleneck message to OS. void printBottleneckHints(raw_ostream &OS) const; + // Used to populate the dependency graph DG. + void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy); + void addMemoryDep(unsigned From, unsigned To, unsigned Cy); + void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy); + public: - BottleneckAnalysis(const MCSubtargetInfo &STI, ArrayRef Sequence); + BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP, + ArrayRef Sequence); void onCycleEnd() override; void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; } @@ -200,7 +211,7 @@ public: void printView(raw_ostream &OS) const override; #ifndef NDEBUG - void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); } + void dump(raw_ostream &OS) const { DG.dump(OS, MCIP); } #endif }; diff --git a/tools/llvm-mca/llvm-mca.cpp b/tools/llvm-mca/llvm-mca.cpp index a875c70e17f..6e6d23ff6f1 100644 --- a/tools/llvm-mca/llvm-mca.cpp +++ b/tools/llvm-mca/llvm-mca.cpp @@ -487,7 +487,7 @@ int main(int argc, char **argv) { llvm::make_unique(SM, Insts, DispatchWidth)); if (EnableBottleneckAnalysis) - Printer.addView(llvm::make_unique(*STI, Insts)); + Printer.addView(llvm::make_unique(*STI, *IP, Insts)); if (PrintInstructionInfoView) Printer.addView(