From d905bba691a96fb3ae4057dfe96c7969a78fda88 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 25 Apr 2014 23:08:57 +0000 Subject: [PATCH] blockfreq: Approximate irreducible control flow Previously, irreducible backedges were ignored. With this commit, irreducible SCCs are discovered on the fly, and modelled as loops with multiple headers. This approximation specifies the headers of irreducible sub-SCCs as its entry blocks and all nodes that are targets of a backedge within it (excluding backedges within true sub-loops). Block frequency calculations act as if we insert a new block that intercepts all the edges to the headers. All backedges and entries to the irreducible SCC point to this imaginary block. This imaginary block has an edge (with even probability) to each header block. The result is now reasonable enough that I've added a number of testcases for irreducible control flow. I've outlined in `BlockFrequencyInfoImpl.h` ways to improve the approximation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207286 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/Analysis/BlockFrequencyInfoImpl.h | 413 +++++++++++++++--- lib/Analysis/BlockFrequencyInfoImpl.cpp | 230 +++++++++- .../BlockFrequencyInfo/irreducible.ll | 339 +++++++++++--- 3 files changed, 850 insertions(+), 132 deletions(-) diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h index e5e9b479523..5e9920660c8 100644 --- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// // // Shared implementation of BlockFrequency for IR and Machine Instructions. +// See the documentation below for BlockFrequencyInfoImpl for details. // //===----------------------------------------------------------------------===// @@ -16,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/BasicBlock.h" #include "llvm/Support/BlockFrequency.h" @@ -896,6 +898,10 @@ class MachineFunction; class MachineLoop; class MachineLoopInfo; +namespace bfi_detail { +struct IrreducibleGraph; +} + /// \brief Base class for BlockFrequencyInfoImpl /// /// BlockFrequencyInfoImplBase has supporting data structures and some @@ -948,6 +954,7 @@ public: typedef SmallVector NodeList; LoopData *Parent; ///< The parent loop. bool IsPackaged; ///< Whether this has been packaged. + uint32_t NumHeaders; ///< Number of headers. ExitMap Exits; ///< Successor edges (and weights). NodeList Nodes; ///< Header and the members of the loop. BlockMass BackedgeMass; ///< Mass returned to loop header. @@ -955,11 +962,26 @@ public: Float Scale; LoopData(LoopData *Parent, const BlockNode &Header) - : Parent(Parent), IsPackaged(false), Nodes(1, Header) {} - bool isHeader(const BlockNode &Node) const { return Node == Nodes[0]; } + : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header) {} + template + LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther, + It2 LastOther) + : Parent(Parent), IsPackaged(false), Nodes(FirstHeader, LastHeader) { + NumHeaders = Nodes.size(); + Nodes.insert(Nodes.end(), FirstOther, LastOther); + } + bool isHeader(const BlockNode &Node) const { + if (isIrreducible()) + return std::binary_search(Nodes.begin(), Nodes.begin() + NumHeaders, + Node); + return Node == Nodes[0]; + } BlockNode getHeader() const { return Nodes[0]; } + bool isIrreducible() const { return NumHeaders > 1; } - NodeList::const_iterator members_begin() const { return Nodes.begin() + 1; } + NodeList::const_iterator members_begin() const { + return Nodes.begin() + NumHeaders; + } NodeList::const_iterator members_end() const { return Nodes.end(); } iterator_range members() const { return make_range(members_begin(), members_end()); @@ -975,9 +997,17 @@ public: WorkingData(const BlockNode &Node) : Node(Node), Loop(nullptr) {} bool isLoopHeader() const { return Loop && Loop->isHeader(Node); } + bool isDoubleLoopHeader() const { + return isLoopHeader() && Loop->Parent && Loop->Parent->isIrreducible() && + Loop->Parent->isHeader(Node); + } LoopData *getContainingLoop() const { - return isLoopHeader() ? Loop->Parent : Loop; + if (!isLoopHeader()) + return Loop; + if (!isDoubleLoopHeader()) + return Loop->Parent; + return Loop->Parent->Parent; } /// \brief Resolve a node to its representative. @@ -1011,12 +1041,22 @@ public: /// Get appropriate mass for Node. If Node is a loop-header (whose loop /// has been packaged), returns the mass of its pseudo-node. If it's a /// node inside a packaged loop, it returns the loop's mass. - BlockMass &getMass() { return isAPackage() ? Loop->Mass : Mass; } + BlockMass &getMass() { + if (!isAPackage()) + return Mass; + if (!isADoublePackage()) + return Loop->Mass; + return Loop->Parent->Mass; + } /// \brief Has ContainingLoop been packaged up? bool isPackaged() const { return getResolvedNode() != Node; } /// \brief Has Loop been packaged up? bool isAPackage() const { return isLoopHeader() && Loop->IsPackaged; } + /// \brief Has Loop been packaged up twice? + bool isADoublePackage() const { + return isDoubleLoopHeader() && Loop->Parent->IsPackaged; + } }; /// \brief Unscaled probability weight. @@ -1093,7 +1133,9 @@ public: /// /// Adds all edges from LocalLoopHead to Dist. Calls addToDist() to add each /// successor edge. - void addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, + /// + /// \return \c true unless there's an irreducible backedge. + bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist); /// \brief Add an edge to the distribution. @@ -1101,7 +1143,9 @@ public: /// Adds an edge to Succ to Dist. If \c LoopHead.isValid(), then whether the /// edge is local/exit/backedge is in the context of LoopHead. Otherwise, /// every edge should be a local edge (since all the loops are packaged up). - void addToDist(Distribution &Dist, const LoopData *OuterLoop, + /// + /// \return \c true unless aborted due to an irreducible backedge. + bool addToDist(Distribution &Dist, const LoopData *OuterLoop, const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight); LoopData &getLoopPackage(const BlockNode &Head) { @@ -1110,6 +1154,25 @@ public: return *Working[Head.Index].Loop; } + /// \brief Analyze irreducible SCCs. + /// + /// Separate irreducible SCCs from \c G, which is an explict graph of \c + /// OuterLoop (or the top-level function, if \c OuterLoop is \c nullptr). + /// Insert them into \a Loops before \c Insert. + /// + /// \return the \c LoopData nodes representing the irreducible SCCs. + iterator_range::iterator> + analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, + std::list::iterator Insert); + + /// \brief Update a loop after packaging irreducible SCCs inside of it. + /// + /// Update \c OuterLoop. Before finding irreducible control flow, it was + /// partway through \a computeMassInLoop(), so \a LoopData::Exits and \a + /// LoopData::BackedgeMass need to be reset. Also, nodes that were packaged + /// up need to be removed from \a OuterLoop::Nodes. + void updateLoopWithIrreducible(LoopData &OuterLoop); + /// \brief Distribute mass according to a distribution. /// /// Distributes the mass in Source according to Dist. If LoopHead.isValid(), @@ -1138,6 +1201,7 @@ public: void clear(); virtual std::string getBlockName(const BlockNode &Node) const; + std::string getLoopName(const LoopData &Loop) const; virtual raw_ostream &print(raw_ostream &OS) const { return OS; } void dump() const { print(dbgs()); } @@ -1197,6 +1261,106 @@ template <> inline std::string getBlockName(const BasicBlock *BB) { assert(BB && "Unexpected nullptr"); return BB->getName().str(); } + +/// \brief Graph of irreducible control flow. +/// +/// This graph is used for determining the SCCs in a loop (or top-level +/// function) that has irreducible control flow. +/// +/// During the block frequency algorithm, the local graphs are defined in a +/// light-weight way, deferring to the \a BasicBlock or \a MachineBasicBlock +/// graphs for most edges, but getting others from \a LoopData::ExitMap. The +/// latter only has successor information. +/// +/// \a IrreducibleGraph makes this graph explicit. It's in a form that can use +/// \a GraphTraits (so that \a analyzeIrreducible() can use \a scc_iterator), +/// and it explicitly lists predecessors and successors. The initialization +/// that relies on \c MachineBasicBlock is defined in the header. +struct IrreducibleGraph { + typedef BlockFrequencyInfoImplBase BFIBase; + + BFIBase &BFI; + + typedef BFIBase::BlockNode BlockNode; + struct IrrNode { + BlockNode Node; + unsigned NumIn; + std::deque Edges; + IrrNode(const BlockNode &Node) : Node(Node), NumIn(0) {} + + typedef typename std::deque::const_iterator iterator; + iterator pred_begin() const { return Edges.begin(); } + iterator succ_begin() const { return Edges.begin() + NumIn; } + iterator pred_end() const { return succ_begin(); } + iterator succ_end() const { return Edges.end(); } + }; + BlockNode Start; + const IrrNode *StartIrr; + std::vector Nodes; + SmallDenseMap Lookup; + + /// \brief Construct an explicit graph containing irreducible control flow. + /// + /// Construct an explicit graph of the control flow in \c OuterLoop (or the + /// top-level function, if \c OuterLoop is \c nullptr). Uses \c + /// addBlockEdges to add block successors that have not been packaged into + /// loops. + /// + /// \a BlockFrequencyInfoImpl::computeIrreducibleMass() is the only expected + /// user of this. + template + IrreducibleGraph(BFIBase &BFI, const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges) + : BFI(BFI), StartIrr(nullptr) { + initialize(OuterLoop, addBlockEdges); + } + + template + void initialize(const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges); + void addNodesInLoop(const BFIBase::LoopData &OuterLoop); + void addNodesInFunction(); + void addNode(const BlockNode &Node) { + Nodes.emplace_back(Node); + BFI.Working[Node.Index].getMass() = BlockMass::getEmpty(); + } + void indexNodes(); + template + void addEdges(const BlockNode &Node, const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges); + void addEdge(IrrNode &Irr, const BlockNode &Succ, + const BFIBase::LoopData *OuterLoop); +}; +template +void IrreducibleGraph::initialize(const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges) { + if (OuterLoop) { + addNodesInLoop(*OuterLoop); + for (auto N : OuterLoop->Nodes) + addEdges(N, OuterLoop, addBlockEdges); + } else { + addNodesInFunction(); + for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index) + addEdges(Index, OuterLoop, addBlockEdges); + } + StartIrr = Lookup[Start.Index]; +} +template +void IrreducibleGraph::addEdges(const BlockNode &Node, + const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges) { + auto L = Lookup.find(Node.Index); + if (L == Lookup.end()) + return; + IrrNode &Irr = *L->second; + const auto &Working = BFI.Working[Node.Index]; + + if (Working.isAPackage()) + for (const auto &I : Working.Loop->Exits) + addEdge(Irr, I.first, OuterLoop); + else + addBlockEdges(*this, Irr, OuterLoop); +} } /// \brief Shared implementation for block frequency analysis. @@ -1205,6 +1369,22 @@ template <> inline std::string getBlockName(const BasicBlock *BB) { /// MachineBlockFrequencyInfo, and calculates the relative frequencies of /// blocks. /// +/// LoopInfo defines a loop as a "non-trivial" SCC dominated by a single block, +/// which is called the header. A given loop, L, can have sub-loops, which are +/// loops within the subgraph of L that exclude its header. (A "trivial" SCC +/// consists of a single block that does not have a self-edge.) +/// +/// In addition to loops, this algorithm has limited support for irreducible +/// SCCs, which are SCCs with multiple entry blocks. Irreducible SCCs are +/// discovered on they fly, and modelled as loops with multiple headers. +/// +/// The headers of irreducible sub-SCCs consist of its entry blocks and all +/// nodes that are targets of a backedge within it (excluding backedges within +/// true sub-loops). Block frequency calculations act as if a block is +/// inserted that intercepts all the edges to the headers. All backedges and +/// entries point to this block. Its successors are the headers, which split +/// the frequency evenly. +/// /// This algorithm leverages BlockMass and UnsignedFloat to maintain precision, /// separates mass distribution from loop scaling, and dithers to eliminate /// probability mass loss. @@ -1228,7 +1408,7 @@ template <> inline std::string getBlockName(const BasicBlock *BB) { /// All other stages make use of this ordering. Save a lookup from BlockT /// to BlockNode (the index into RPOT) in Nodes. /// -/// 1. Loop indexing (\a initializeLoops()). +/// 1. Loop initialization (\a initializeLoops()). /// /// Translate LoopInfo/MachineLoopInfo into a form suitable for the rest of /// the algorithm. In particular, store the immediate members of each loop @@ -1239,11 +1419,9 @@ template <> inline std::string getBlockName(const BasicBlock *BB) { /// For each loop (bottom-up), distribute mass through the DAG resulting /// from ignoring backedges and treating sub-loops as a single pseudo-node. /// Track the backedge mass distributed to the loop header, and use it to -/// calculate the loop scale (number of loop iterations). -/// -/// Visiting loops bottom-up is a post-order traversal of loop headers. -/// For each loop, immediate members that represent sub-loops will already -/// have been visited and packaged into a pseudo-node. +/// calculate the loop scale (number of loop iterations). Immediate +/// members that represent sub-loops will already have been visited and +/// packaged into a pseudo-node. /// /// Distributing mass in a loop is a reverse-post-order traversal through /// the loop. Start by assigning full mass to the Loop header. For each @@ -1260,6 +1438,11 @@ template <> inline std::string getBlockName(const BasicBlock *BB) { /// The weight, the successor, and its category are stored in \a /// Distribution. There can be multiple edges to each successor. /// +/// - If there's a backedge to a non-header, there's an irreducible SCC. +/// The usual flow is temporarily aborted. \a +/// computeIrreducibleMass() finds the irreducible SCCs within the +/// loop, packages them up, and restarts the flow. +/// /// - Normalize the distribution: scale weights down so that their sum /// is 32-bits, and coalesce multiple edges to the same node. /// @@ -1274,39 +1457,62 @@ template <> inline std::string getBlockName(const BasicBlock *BB) { /// loops in the function. This uses the same algorithm as distributing /// mass in a loop, except that there are no exit or backedge edges. /// -/// 4. Loop unpackaging and cleanup (\a finalizeMetrics()). +/// 4. Unpackage loops (\a unwrapLoops()). /// -/// Initialize the frequency to a floating point representation of its -/// mass. +/// Initialize each block's frequency to a floating point representation of +/// its mass. /// -/// Visit loops top-down (reverse post-order), scaling the loop header's -/// frequency by its psuedo-node's mass and loop scale. Keep track of the -/// minimum and maximum final frequencies. +/// Visit loops top-down, scaling the frequencies of its immediate members +/// by the loop's pseudo-node's frequency. +/// +/// 5. Convert frequencies to a 64-bit range (\a finalizeMetrics()). /// /// Using the min and max frequencies as a guide, translate floating point /// frequencies to an appropriate range in uint64_t. /// /// It has some known flaws. /// -/// - Irreducible control flow isn't modelled correctly. In particular, -/// LoopInfo and MachineLoopInfo ignore irreducible backedges. The main -/// result is that irreducible SCCs will under-scaled. No mass is lost, -/// but the computed branch weights for the loop pseudo-node will be -/// incorrect. +/// - Loop scale is limited to 4096 per loop (2^12) to avoid exhausting +/// BlockFrequency's 64-bit integer precision. +/// +/// - The model of irreducible control flow is a rough approximation. /// /// Modelling irreducible control flow exactly involves setting up and /// solving a group of infinite geometric series. Such precision is /// unlikely to be worthwhile, since most of our algorithms give up on /// irreducible control flow anyway. /// -/// Nevertheless, we might find that we need to get closer. If -/// LoopInfo/MachineLoopInfo flags loops with irreducible control flow -/// (and/or the function as a whole), we can find the SCCs, compute an -/// approximate exit frequency for the SCC as a whole, and scale up -/// accordingly. +/// Nevertheless, we might find that we need to get closer. Here's a sort +/// of TODO list for the model with diminishing returns, to be completed as +/// necessary. /// -/// - Loop scale is limited to 4096 per loop (2^12) to avoid exhausting -/// BlockFrequency's 64-bit integer precision. +/// - The headers for the \a LoopData representing an irreducible SCC +/// include non-entry blocks. When these extra blocks exist, they +/// indicate a self-contained irreducible sub-SCC. We could treat them +/// as sub-loops, rather than arbitrarily shoving the problematic +/// blocks into the headers of the main irreducible SCC. +/// +/// - Backedge frequencies are assumed to be evenly split between the +/// headers of a given irreducible SCC. Instead, we could track the +/// backedge mass separately for each header, and adjust their relative +/// frequencies. +/// +/// - Entry frequencies are assumed to be evenly split between the +/// headers of a given irreducible SCC, which is the only option if we +/// need to compute mass in the SCC before its parent loop. Instead, +/// we could partially compute mass in the parent loop, and stop when +/// we get to the SCC. Here, we have the correct ratio of entry +/// masses, which we can use to adjust their relative frequencies. +/// Compute mass in the SCC, and then continue propagation in the +/// parent. +/// +/// - We can propagate mass iteratively through the SCC, for some fixed +/// number of iterations. Each iteration starts by assigning the entry +/// blocks their backedge mass from the prior iteration. The final +/// mass for each block (and each exit, and the total backedge mass +/// used for computing loop scale) is the sum of all iterations. +/// (Running this until fixed point would "solve" the geometric +/// series by simulation.) template class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { typedef typename bfi_detail::TypeMap::BlockT BlockT; typedef typename bfi_detail::TypeMap::FunctionT FunctionT; @@ -1361,7 +1567,9 @@ template class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { /// /// In the context of distributing mass through \c OuterLoop, divide the mass /// currently assigned to \c Node between its successors. - void propagateMassToSuccessors(LoopData *OuterLoop, const BlockNode &Node); + /// + /// \return \c true unless there's an irreducible backedge. + bool propagateMassToSuccessors(LoopData *OuterLoop, const BlockNode &Node); /// \brief Compute mass in a particular loop. /// @@ -1370,20 +1578,51 @@ template class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { /// that have not been packaged into sub-loops. /// /// \pre \a computeMassInLoop() has been called for each subloop of \c Loop. - void computeMassInLoop(LoopData &Loop); + /// \return \c true unless there's an irreducible backedge. + bool computeMassInLoop(LoopData &Loop); - /// \brief Compute mass in all loops. - /// - /// For each loop bottom-up, call \a computeMassInLoop(). - void computeMassInLoops(); - - /// \brief Compute mass in the top-level function. + /// \brief Try to compute mass in the top-level function. /// /// Assign mass to the entry block, and then for each block in reverse /// post-order, distribute mass to its successors. Skips nodes that have /// been packaged into loops. /// /// \pre \a computeMassInLoops() has been called. + /// \return \c true unless there's an irreducible backedge. + bool tryToComputeMassInFunction(); + + /// \brief Compute mass in (and package up) irreducible SCCs. + /// + /// Find the irreducible SCCs in \c OuterLoop, add them to \a Loops (in front + /// of \c Insert), and call \a computeMassInLoop() on each of them. + /// + /// If \c OuterLoop is \c nullptr, it refers to the top-level function. + /// + /// \pre \a computeMassInLoop() has been called for each subloop of \c + /// OuterLoop. + /// \pre \c Insert points at the the last loop successfully processed by \a + /// computeMassInLoop(). + /// \pre \c OuterLoop has irreducible SCCs. + void computeIrreducibleMass(LoopData *OuterLoop, + std::list::iterator Insert); + + /// \brief Compute mass in all loops. + /// + /// For each loop bottom-up, call \a computeMassInLoop(). + /// + /// \a computeMassInLoop() aborts (and returns \c false) on loops that + /// contain a irreducible sub-SCCs. Use \a computeIrreducibleMass() and then + /// re-enter \a computeMassInLoop(). + /// + /// \post \a computeMassInLoop() has returned \c true for every loop. + void computeMassInLoops(); + + /// \brief Compute mass in the top-level function. + /// + /// Uses \a tryToComputeMassInFunction() and \a computeIrreducibleMass() to + /// compute mass in the top-level function. + /// + /// \post \a tryToComputeMassInFunction() has returned \c true. void computeMassInFunction(); std::string getBlockName(const BlockNode &Node) const override { @@ -1530,27 +1769,50 @@ template void BlockFrequencyInfoImpl::initializeLoops() { template void BlockFrequencyInfoImpl::computeMassInLoops() { // Visit loops with the deepest first, and the top-level loops last. - for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L) - computeMassInLoop(*L); + for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L) { + if (computeMassInLoop(*L)) + continue; + auto Next = std::next(L); + computeIrreducibleMass(&*L, L.base()); + L = std::prev(Next); + if (computeMassInLoop(*L)) + continue; + llvm_unreachable("unhandled irreducible control flow"); + } } template -void BlockFrequencyInfoImpl::computeMassInLoop(LoopData &Loop) { +bool BlockFrequencyInfoImpl::computeMassInLoop(LoopData &Loop) { // Compute mass in loop. - DEBUG(dbgs() << "compute-mass-in-loop: " << getBlockName(Loop.getHeader()) - << "\n"); + DEBUG(dbgs() << "compute-mass-in-loop: " << getLoopName(Loop) << "\n"); - Working[Loop.getHeader().Index].getMass() = BlockMass::getFull(); - propagateMassToSuccessors(&Loop, Loop.getHeader()); - - for (const BlockNode &M : Loop.members()) - propagateMassToSuccessors(&Loop, M); + if (Loop.isIrreducible()) { + BlockMass Remaining = BlockMass::getFull(); + for (uint32_t H = 0; H < Loop.NumHeaders; ++H) { + auto &Mass = Working[Loop.Nodes[H].Index].getMass(); + Mass = Remaining * BranchProbability(1, Loop.NumHeaders - H); + Remaining -= Mass; + } + for (const BlockNode &M : Loop.Nodes) + if (!propagateMassToSuccessors(&Loop, M)) + llvm_unreachable("unhandled irreducible control flow"); + } else { + Working[Loop.getHeader().Index].getMass() = BlockMass::getFull(); + if (!propagateMassToSuccessors(&Loop, Loop.getHeader())) + llvm_unreachable("irreducible control flow to loop header!?"); + for (const BlockNode &M : Loop.members()) + if (!propagateMassToSuccessors(&Loop, M)) + // Irreducible backedge. + return false; + } computeLoopScale(Loop); packageLoop(Loop); + return true; } -template void BlockFrequencyInfoImpl::computeMassInFunction() { +template +bool BlockFrequencyInfoImpl::tryToComputeMassInFunction() { // Compute mass in function. DEBUG(dbgs() << "compute-mass-in-function\n"); assert(!Working.empty() && "no blocks in function"); @@ -1563,12 +1825,48 @@ template void BlockFrequencyInfoImpl::computeMassInFunction() { if (Working[Node.Index].isPackaged()) continue; - propagateMassToSuccessors(nullptr, Node); + if (!propagateMassToSuccessors(nullptr, Node)) + return false; } + return true; +} + +template void BlockFrequencyInfoImpl::computeMassInFunction() { + if (tryToComputeMassInFunction()) + return; + computeIrreducibleMass(nullptr, Loops.begin()); + if (tryToComputeMassInFunction()) + return; + llvm_unreachable("unhandled irreducible control flow"); } template -void +void BlockFrequencyInfoImpl::computeIrreducibleMass( + LoopData *OuterLoop, std::list::iterator Insert) { + DEBUG(dbgs() << "analyze-irreducible-in-"; + if (OuterLoop) dbgs() << "loop: " << getLoopName(*OuterLoop) << "\n"; + else dbgs() << "function\n"); + + using bfi_detail::IrreducibleGraph; + auto addBlockEdges = [&](IrreducibleGraph &G, IrreducibleGraph::IrrNode &Irr, + const LoopData *OuterLoop) { + const BlockT *BB = RPOT[Irr.Node.Index]; + for (auto I = Successor::child_begin(BB), E = Successor::child_end(BB); + I != E; ++I) + G.addEdge(Irr, getNode(*I), OuterLoop); + }; + IrreducibleGraph G(*this, OuterLoop, addBlockEdges); + + for (auto &L : analyzeIrreducible(G, OuterLoop, Insert)) + computeMassInLoop(L); + + if (!OuterLoop) + return; + updateLoopWithIrreducible(*OuterLoop); +} + +template +bool BlockFrequencyInfoImpl::propagateMassToSuccessors(LoopData *OuterLoop, const BlockNode &Node) { DEBUG(dbgs() << " - node: " << getBlockName(Node) << "\n"); @@ -1576,20 +1874,25 @@ BlockFrequencyInfoImpl::propagateMassToSuccessors(LoopData *OuterLoop, Distribution Dist; if (auto *Loop = Working[Node.Index].getPackagedLoop()) { assert(Loop != OuterLoop && "Cannot propagate mass in a packaged loop"); - addLoopSuccessorsToDist(OuterLoop, *Loop, Dist); + if (!addLoopSuccessorsToDist(OuterLoop, *Loop, Dist)) + // Irreducible backedge. + return false; } else { const BlockT *BB = getBlock(Node); for (auto SI = Successor::child_begin(BB), SE = Successor::child_end(BB); SI != SE; ++SI) // Do not dereference SI, or getEdgeWeight() is linear in the number of // successors. - addToDist(Dist, OuterLoop, Node, getNode(*SI), - BPI->getEdgeWeight(BB, SI)); + if (!addToDist(Dist, OuterLoop, Node, getNode(*SI), + BPI->getEdgeWeight(BB, SI))) + // Irreducible backedge. + return false; } // Distribute mass to successors, saving exit and backedge data in the // loop header. distributeMass(Node, OuterLoop, Dist); + return true; } template diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp index 2fcd9b8377b..a12128318e2 100644 --- a/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -17,6 +17,7 @@ #include using namespace llvm; +using namespace llvm::bfi_detail; #define DEBUG_TYPE "block-freq" @@ -568,7 +569,7 @@ static void cleanup(BlockFrequencyInfoImplBase &BFI) { BFI.Freqs = std::move(SavedFreqs); } -void BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, +bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, const LoopData *OuterLoop, const BlockNode &Pred, const BlockNode &Succ, @@ -598,34 +599,48 @@ void BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, if (isLoopHeader(Resolved)) { DEBUG(debugSuccessor("backedge")); Dist.addBackedge(OuterLoop->getHeader(), Weight); - return; + return true; } if (Working[Resolved.Index].getContainingLoop() != OuterLoop) { DEBUG(debugSuccessor(" exit ")); Dist.addExit(Resolved, Weight); - return; + return true; } if (Resolved < Pred) { - // Irreducible backedge. Skip. - DEBUG(debugSuccessor(" skip ")); - return; + if (!isLoopHeader(Pred)) { + // If OuterLoop is an irreducible loop, we can't actually handle this. + assert((!OuterLoop || !OuterLoop->isIrreducible()) && + "unhandled irreducible control flow"); + + // Irreducible backedge. Abort. + DEBUG(debugSuccessor("abort!!!")); + return false; + } + + // If "Pred" is a loop header, then this isn't really a backedge; rather, + // OuterLoop must be irreducible. These false backedges can come only from + // secondary loop headers. + assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) && + "unhandled irreducible control flow"); } DEBUG(debugSuccessor(" local ")); Dist.addLocal(Resolved, Weight); + return true; } -void BlockFrequencyInfoImplBase::addLoopSuccessorsToDist( +bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist( const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) { // Copy the exit map into Dist. for (const auto &I : Loop.Exits) - addToDist(Dist, OuterLoop, Loop.getHeader(), I.first, I.second.getMass()); + if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first, + I.second.getMass())) + // Irreducible backedge. + return false; - // We don't need this map any more. Clear it to prevent quadratic memory - // usage in deeply nested loops with irreducible control flow. - Loop.Exits.clear(); + return true; } /// \brief Get the maximum allowed loop scale. @@ -637,8 +652,7 @@ static Float getMaxLoopScale() { return Float(1, 12); } /// \brief Compute the loop scale for a loop. void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { // Compute loop scale. - DEBUG(dbgs() << "compute-loop-scale: " << getBlockName(Loop.getHeader()) - << "\n"); + DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n"); // LoopScale == 1 / ExitMass // ExitMass == HeadMass - BackedgeMass @@ -659,12 +673,15 @@ void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { /// \brief Package up a loop. void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) { - DEBUG(dbgs() << "packaging-loop: " << getBlockName(Loop.getHeader()) << "\n"); + DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n"); + + // Clear the subloop exits to prevent quadratic memory usage. + for (const BlockNode &M : Loop.Nodes) { + if (auto *Loop = Working[M.Index].getPackagedLoop()) + Loop->Exits.clear(); + DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n"); + } Loop.IsPackaged = true; - DEBUG(for (const BlockNode &M - : Loop.members()) { - dbgs() << " - node: " << getBlockName(M.Index) << "\n"; - }); } void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source, @@ -745,7 +762,7 @@ static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, /// Visits all the members of a loop, adjusting their BlockData according to /// the loop's pseudo-node. static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) { - DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getBlockName(Loop.getHeader()) + DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop) << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale << "\n"); Loop.Scale *= Loop.Mass.toFloat(); @@ -757,7 +774,7 @@ static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) { // final head scale will be used for updated the rest of the members. for (const BlockNode &N : Loop.Nodes) { const auto &Working = BFI.Working[N.Index]; - Float &F = Working.isAPackage() ? BFI.getLoopPackage(N).Scale + Float &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale : BFI.Freqs[N.Index].Floating; Float New = Loop.Scale * F; DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New @@ -813,6 +830,10 @@ std::string BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const { return std::string(); } +std::string +BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const { + return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*"); +} raw_ostream & BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, @@ -828,3 +849,172 @@ BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, return OS << Block / Entry; } + +void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) { + Start = OuterLoop.getHeader(); + Nodes.reserve(OuterLoop.Nodes.size()); + for (auto N : OuterLoop.Nodes) + addNode(N); + indexNodes(); +} +void IrreducibleGraph::addNodesInFunction() { + Start = 0; + for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index) + if (!BFI.Working[Index].isPackaged()) + addNode(Index); + indexNodes(); +} +void IrreducibleGraph::indexNodes() { + for (auto &I : Nodes) + Lookup[I.Node.Index] = &I; +} +void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ, + const BFIBase::LoopData *OuterLoop) { + if (OuterLoop && OuterLoop->isHeader(Succ)) + return; + auto L = Lookup.find(Succ.Index); + if (L == Lookup.end()) + return; + IrrNode &SuccIrr = *L->second; + Irr.Edges.push_back(&SuccIrr); + SuccIrr.Edges.push_front(&Irr); + ++SuccIrr.NumIn; +} + +namespace llvm { +template <> struct GraphTraits { + typedef bfi_detail::IrreducibleGraph GraphT; + + typedef const typename GraphT::IrrNode NodeType; + typedef typename GraphT::IrrNode::iterator ChildIteratorType; + + static const NodeType *getEntryNode(const GraphT &G) { + return G.StartIrr; + } + static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); } + static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); } +}; +} + +/// \brief Find extra irreducible headers. +/// +/// Find entry blocks and other blocks with backedges, which exist when \c G +/// contains irreducible sub-SCCs. +static void findIrreducibleHeaders( + const BlockFrequencyInfoImplBase &BFI, + const IrreducibleGraph &G, + const std::vector &SCC, + LoopData::NodeList &Headers, LoopData::NodeList &Others) { + // Map from nodes in the SCC to whether it's an entry block. + SmallDenseMap InSCC; + + // InSCC also acts the set of nodes in the graph. Seed it. + for (const auto *I : SCC) + InSCC[I] = false; + + for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) { + auto &Irr = *I->first; + for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) { + if (InSCC.count(P)) + continue; + + // This is an entry block. + I->second = true; + Headers.push_back(Irr.Node); + DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n"); + break; + } + } + assert(Headers.size() >= 2 && "Should be irreducible"); + if (Headers.size() == InSCC.size()) { + // Every block is a header. + std::sort(Headers.begin(), Headers.end()); + return; + } + + // Look for extra headers from irreducible sub-SCCs. + for (const auto &I : InSCC) { + // Entry blocks are already headers. + if (I.second) + continue; + + auto &Irr = *I.first; + for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) { + // Skip forward edges. + if (P->Node < Irr.Node) + continue; + + // Skip predecessors from entry blocks. These can have inverted + // ordering. + if (InSCC.lookup(P)) + continue; + + // Store the extra header. + Headers.push_back(Irr.Node); + DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n"); + break; + } + if (Headers.back() == Irr.Node) + // Added this as a header. + continue; + + // This is not a header. + Others.push_back(Irr.Node); + DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n"); + } + std::sort(Headers.begin(), Headers.end()); + std::sort(Others.begin(), Others.end()); +} + +static void createIrreducibleLoop( + BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, + LoopData *OuterLoop, std::list::iterator Insert, + const std::vector &SCC) { + // Translate the SCC into RPO. + DEBUG(dbgs() << " - found-scc\n"); + + LoopData::NodeList Headers; + LoopData::NodeList Others; + findIrreducibleHeaders(BFI, G, SCC, Headers, Others); + + auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(), + Headers.end(), Others.begin(), Others.end()); + + // Update loop hierarchy. + for (const auto &N : Loop->Nodes) + if (BFI.Working[N.Index].isLoopHeader()) + BFI.Working[N.Index].Loop->Parent = &*Loop; + else + BFI.Working[N.Index].Loop = &*Loop; +} + +iterator_range::iterator> +BlockFrequencyInfoImplBase::analyzeIrreducible( + const IrreducibleGraph &G, LoopData *OuterLoop, + std::list::iterator Insert) { + assert((OuterLoop == nullptr) == (Insert == Loops.begin())); + auto Prev = OuterLoop ? std::prev(Insert) : Loops.end(); + + for (auto I = scc_begin(G); !I.isAtEnd(); ++I) { + if (I->size() < 2) + continue; + + // Translate the SCC into RPO. + createIrreducibleLoop(*this, G, OuterLoop, Insert, *I); + } + + if (OuterLoop) + return make_range(std::next(Prev), Insert); + return make_range(Loops.begin(), Insert); +} + +void +BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) { + OuterLoop.Exits.clear(); + OuterLoop.BackedgeMass = BlockMass::getEmpty(); + auto O = OuterLoop.Nodes.begin() + 1; + for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I) + if (!Working[I->Index].isPackaged()) + *O++ = *I; + OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end()); +} diff --git a/test/Analysis/BlockFrequencyInfo/irreducible.ll b/test/Analysis/BlockFrequencyInfo/irreducible.ll index dd4dd9ed792..af4ad15d9c1 100644 --- a/test/Analysis/BlockFrequencyInfo/irreducible.ll +++ b/test/Analysis/BlockFrequencyInfo/irreducible.ll @@ -34,16 +34,28 @@ return: !0 = metadata !{metadata !"branch_weights", i32 1, i32 7} !1 = metadata !{metadata !"branch_weights", i32 3, i32 4} -; The current BlockFrequencyInfo algorithm doesn't handle multiple entrances -; into a loop very well. The frequencies assigned to blocks in the loop are -; predictable (and not absurd), but also not correct and therefore not worth -; testing. +; Irreducible control flow +; ======================== ; -; There are two testcases below. +; LoopInfo defines a loop as a non-trivial SCC dominated by a single block, +; called the header. A given loop, L, can have sub-loops, which are loops +; within the subgraph of L that excludes the header. ; -; For each testcase, I use a CHECK-NEXT/NOT combo like an XFAIL with the -; granularity of a single check. If/when this behaviour is fixed, we'll know -; about it, and the test should be updated. +; In addition to loops, -block-freq has limited support for irreducible SCCs, +; which are SCCs with multiple entry blocks. Irreducible SCCs are discovered +; on they fly, and modelled as loops with multiple headers. +; +; The headers of irreducible sub-SCCs consist of its entry blocks and all nodes +; that are targets of a backedge within it (excluding backedges within true +; sub-loops). +; +; -block-freq is currently designed to act like a block is inserted that +; intercepts all the edges to the headers. All backedges and entries point to +; this block. Its successors are the headers, which split the frequency +; evenly. +; +; There are a number of testcases below. Only the first two have detailed +; explanations. ; ; Testcase #1 ; =========== @@ -77,36 +89,31 @@ return: ; loop as a whole is 1/4, so the loop scale should be 4. Summing c1 and c2 ; gives 28/7, or 4.0, which is nice confirmation of the math above. ; -; However, assuming c1 precedes c2 in reverse post-order, the current algorithm -; returns 3/4 and 13/16, respectively. LoopInfo ignores edges between loops -; (and doesn't see any loops here at all), and -block-freq ignores the -; irreducible edge from c2 to c1. -; +; -block-freq currently treats the two nodes as equals. +define void @multientry(i1 %x) { ; CHECK-LABEL: Printing analysis {{.*}} for function 'multientry': ; CHECK-NEXT: block-frequency-info: multientry -define void @multientry(i1 %x) { -; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] entry: +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] br i1 %x, label %c1, label %c2, !prof !2 -; This is like a single-line XFAIL (see above). -; CHECK-NEXT: c1: -; CHECK-NOT: float = 2.142857{{[0-9]*}}, c1: +; CHECK-NEXT: c1: float = 2.0, +; The "correct" answer is: float = 2.142857{{[0-9]*}}, br i1 %x, label %c2, label %exit, !prof !2 -; This is like a single-line XFAIL (see above). -; CHECK-NEXT: c2: -; CHECK-NOT: float = 1.857142{{[0-9]*}}, c2: +; CHECK-NEXT: c2: float = 2.0, +; The "correct" answer is: float = 1.857142{{[0-9]*}}, br i1 %x, label %c1, label %exit, !prof !2 -; We still shouldn't lose any frequency. -; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] exit: +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] ret void } +!2 = metadata !{metadata !"branch_weights", i32 3, i32 1} + ; Testcase #2 ; =========== ; @@ -124,73 +131,291 @@ exit: ; step, c1 and c2 each get 1/3 of what's left in c1 and c2 combined. This ; infinite series sums to 1. ; -; However, assuming c1 precedes c2 in reverse post-order, the current algorithm -; returns 1/2 and 3/4, respectively. LoopInfo ignores edges between loops (and -; treats c1 and c2 as self-loops only), and -block-freq ignores the irreducible -; edge from c2 to c1. -; -; Below I use a CHECK-NEXT/NOT combo like an XFAIL with the granularity of a -; single check. If/when this behaviour is fixed, we'll know about it, and the -; test should be updated. -; +; Since the currently algorithm *always* assumes entry blocks are equal, +; -block-freq gets the right answers here. +define void @crossloops(i2 %x) { ; CHECK-LABEL: Printing analysis {{.*}} for function 'crossloops': ; CHECK-NEXT: block-frequency-info: crossloops -define void @crossloops(i2 %x) { -; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] entry: +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] switch i2 %x, label %exit [ i2 1, label %c1 i2 2, label %c2 ], !prof !3 -; This is like a single-line XFAIL (see above). -; CHECK-NEXT: c1: -; CHECK-NOT: float = 1.0, c1: +; CHECK-NEXT: c1: float = 1.0, switch i2 %x, label %exit [ i2 1, label %c1 i2 2, label %c2 ], !prof !3 -; This is like a single-line XFAIL (see above). -; CHECK-NEXT: c2: -; CHECK-NOT: float = 1.0, c2: +; CHECK-NEXT: c2: float = 1.0, switch i2 %x, label %exit [ i2 1, label %c1 i2 2, label %c2 ], !prof !3 -; We still shouldn't lose any frequency. -; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] exit: +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] ret void } -!2 = metadata !{metadata !"branch_weights", i32 3, i32 1} !3 = metadata !{metadata !"branch_weights", i32 2, i32 2, i32 2} -; A reducible loop with irreducible control flow inside should still have -; correct exit frequency. -; +; A true loop with irreducible control flow inside. +define void @loop_around_irreducible(i1 %x) { ; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_around_irreducible': ; CHECK-NEXT: block-frequency-info: loop_around_irreducible -define void @loop_around_irreducible(i1 %x) { -; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] entry: +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] br label %loop -; CHECK-NEXT: loop: float = [[HEAD:[0-9.]+]], int = [[HEADINT:[0-9]+]] loop: - br i1 %x, label %left, label %right +; CHECK-NEXT: loop: float = 4.0, int = [[HEAD:[0-9]+]] + br i1 %x, label %left, label %right, !prof !4 -; CHECK-NEXT: left: left: - br i1 %x, label %right, label %loop.end +; CHECK-NEXT: left: float = 8.0, + br i1 %x, label %right, label %loop.end, !prof !5 -; CHECK-NEXT: right: right: - br i1 %x, label %left, label %loop.end +; CHECK-NEXT: right: float = 8.0, + br i1 %x, label %left, label %loop.end, !prof !5 -; CHECK-NEXT: loop.end: float = [[HEAD]], int = [[HEADINT]] loop.end: - br i1 %x, label %loop, label %exit +; CHECK-NEXT: loop.end: float = 4.0, int = [[HEAD]] + br i1 %x, label %loop, label %exit, !prof !5 -; CHECK-NEXT: float = 1.0, int = [[ENTRY]] exit: +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] ret void } +!4 = metadata !{metadata !"branch_weights", i32 1, i32 1} +!5 = metadata !{metadata !"branch_weights", i32 3, i32 1} + +; Two unrelated irreducible SCCs. +define void @two_sccs(i1 %x) { +; CHECK-LABEL: Printing analysis {{.*}} for function 'two_sccs': +; CHECK-NEXT: block-frequency-info: two_sccs +entry: +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] + br i1 %x, label %a, label %b, !prof !6 + +a: +; CHECK-NEXT: a: float = 0.75, + br i1 %x, label %a.left, label %a.right, !prof !7 + +a.left: +; CHECK-NEXT: a.left: float = 1.5, + br i1 %x, label %a.right, label %exit, !prof !6 + +a.right: +; CHECK-NEXT: a.right: float = 1.5, + br i1 %x, label %a.left, label %exit, !prof !6 + +b: +; CHECK-NEXT: b: float = 0.25, + br i1 %x, label %b.left, label %b.right, !prof !7 + +b.left: +; CHECK-NEXT: b.left: float = 0.625, + br i1 %x, label %b.right, label %exit, !prof !8 + +b.right: +; CHECK-NEXT: b.right: float = 0.625, + br i1 %x, label %b.left, label %exit, !prof !8 + +exit: +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] + ret void +} +!6 = metadata !{metadata !"branch_weights", i32 3, i32 1} +!7 = metadata !{metadata !"branch_weights", i32 1, i32 1} +!8 = metadata !{metadata !"branch_weights", i32 4, i32 1} + +; A true loop inside irreducible control flow. +define void @loop_inside_irreducible(i1 %x) { +; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_inside_irreducible': +; CHECK-NEXT: block-frequency-info: loop_inside_irreducible +entry: +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] + br i1 %x, label %left, label %right, !prof !9 + +left: +; CHECK-NEXT: left: float = 2.0, + br i1 %x, label %right, label %exit, !prof !10 + +right: +; CHECK-NEXT: right: float = 2.0, int = [[RIGHT:[0-9]+]] + br label %loop + +loop: +; CHECK-NEXT: loop: float = 6.0, + br i1 %x, label %loop, label %right.end, !prof !11 + +right.end: +; CHECK-NEXT: right.end: float = 2.0, int = [[RIGHT]] + br i1 %x, label %left, label %exit, !prof !10 + +exit: +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] + ret void +} +!9 = metadata !{metadata !"branch_weights", i32 1, i32 1} +!10 = metadata !{metadata !"branch_weights", i32 3, i32 1} +!11 = metadata !{metadata !"branch_weights", i32 2, i32 1} + +; Irreducible control flow in a branch that's in a true loop. +define void @loop_around_branch_with_irreducible(i1 %x) { +; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_around_branch_with_irreducible': +; CHECK-NEXT: block-frequency-info: loop_around_branch_with_irreducible +entry: +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] + br label %loop + +loop: +; CHECK-NEXT: loop: float = 2.0, int = [[LOOP:[0-9]+]] + br i1 %x, label %normal, label %irreducible.entry, !prof !12 + +normal: +; CHECK-NEXT: normal: float = 1.5, + br label %loop.end + +irreducible.entry: +; CHECK-NEXT: irreducible.entry: float = 0.5, int = [[IRREDUCIBLE:[0-9]+]] + br i1 %x, label %left, label %right, !prof !13 + +left: +; CHECK-NEXT: left: float = 1.0, + br i1 %x, label %right, label %irreducible.exit, !prof !12 + +right: +; CHECK-NEXT: right: float = 1.0, + br i1 %x, label %left, label %irreducible.exit, !prof !12 + +irreducible.exit: +; CHECK-NEXT: irreducible.exit: float = 0.5, int = [[IRREDUCIBLE]] + br label %loop.end + +loop.end: +; CHECK-NEXT: loop.end: float = 2.0, int = [[LOOP]] + br i1 %x, label %loop, label %exit, !prof !13 + +exit: +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] + ret void +} +!12 = metadata !{metadata !"branch_weights", i32 3, i32 1} +!13 = metadata !{metadata !"branch_weights", i32 1, i32 1} + +; Irreducible control flow between two true loops. +define void @loop_around_branch_with_irreducible_around_loop(i1 %x) { +; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_around_branch_with_irreducible_around_loop': +; CHECK-NEXT: block-frequency-info: loop_around_branch_with_irreducible_around_loop +entry: +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] + br label %loop + +loop: +; CHECK-NEXT: loop: float = 3.0, int = [[LOOP:[0-9]+]] + br i1 %x, label %normal, label %irreducible, !prof !14 + +normal: +; CHECK-NEXT: normal: float = 2.0, + br label %loop.end + +irreducible: +; CHECK-NEXT: irreducible: float = 1.0, + br i1 %x, label %left, label %right, !prof !15 + +left: +; CHECK-NEXT: left: float = 2.0, + br i1 %x, label %right, label %loop.end, !prof !16 + +right: +; CHECK-NEXT: right: float = 2.0, int = [[RIGHT:[0-9]+]] + br label %right.loop + +right.loop: +; CHECK-NEXT: right.loop: float = 10.0, + br i1 %x, label %right.loop, label %right.end, !prof !17 + +right.end: +; CHECK-NEXT: right.end: float = 2.0, int = [[RIGHT]] + br i1 %x, label %left, label %loop.end, !prof !16 + +loop.end: +; CHECK-NEXT: loop.end: float = 3.0, int = [[LOOP]] + br i1 %x, label %loop, label %exit, !prof !14 + +exit: +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] + ret void +} +!14 = metadata !{metadata !"branch_weights", i32 2, i32 1} +!15 = metadata !{metadata !"branch_weights", i32 1, i32 1} +!16 = metadata !{metadata !"branch_weights", i32 3, i32 1} +!17 = metadata !{metadata !"branch_weights", i32 4, i32 1} + +; An irreducible SCC with a non-header. +define void @nonheader(i1 %x) { +; CHECK-LABEL: Printing analysis {{.*}} for function 'nonheader': +; CHECK-NEXT: block-frequency-info: nonheader +entry: +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] + br i1 %x, label %left, label %right, !prof !18 + +left: +; CHECK-NEXT: left: float = 1.0, + br i1 %x, label %bottom, label %exit, !prof !19 + +right: +; CHECK-NEXT: right: float = 1.0, + br i1 %x, label %bottom, label %exit, !prof !20 + +bottom: +; CHECK-NEXT: bottom: float = 1.0, + br i1 %x, label %left, label %right, !prof !18 + +exit: +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] + ret void +} +!18 = metadata !{metadata !"branch_weights", i32 1, i32 1} +!19 = metadata !{metadata !"branch_weights", i32 1, i32 3} +!20 = metadata !{metadata !"branch_weights", i32 3, i32 1} + +; An irreducible SCC with an irreducible sub-SCC. In the current version of +; -block-freq, this means an extra header. +; +; This testcases uses non-trivial branch weights. The CHECK statements here +; will start to fail if we change -block-freq to be more accurate. Currently, +; we expect left, right and top to be treated as equal headers. +define void @nonentry_header(i1 %x, i2 %y) { +; CHECK-LABEL: Printing analysis {{.*}} for function 'nonentry_header': +; CHECK-NEXT: block-frequency-info: nonentry_header +entry: +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] + br i1 %x, label %left, label %right, !prof !21 + +left: +; CHECK-NEXT: left: float = 3.0, + br i1 %x, label %top, label %bottom, !prof !22 + +right: +; CHECK-NEXT: right: float = 3.0, + br i1 %x, label %top, label %bottom, !prof !22 + +top: +; CHECK-NEXT: top: float = 3.0, + switch i2 %y, label %exit [ i2 0, label %left + i2 1, label %right + i2 2, label %bottom ], !prof !23 + +bottom: +; CHECK-NEXT: bottom: float = 4.5, + br label %top + +exit: +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] + ret void +} +!21 = metadata !{metadata !"branch_weights", i32 2, i32 1} +!22 = metadata !{metadata !"branch_weights", i32 1, i32 1} +!23 = metadata !{metadata !"branch_weights", i32 8, i32 1, i32 3, i32 12}