llvm/lib/Analysis/BranchProbabilityInfo.cpp

680 lines
23 KiB
C++
Raw Normal View History

//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Loops should be simplified before this analysis.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "branch-prob"
INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob",
"Branch Probability Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob",
"Branch Probability Analysis", false, true)
char BranchProbabilityInfoWrapperPass::ID = 0;
// Weights are for internal use only. They are used by heuristics to help to
// estimate edges' probability. Example:
//
// Using "Loop Branch Heuristics" we predict weights of edges for the
// block BB2.
// ...
// |
// V
// BB1<-+
// | |
// | | (Weight = 124)
// V |
// BB2--+
// |
// | (Weight = 4)
// V
// BB3
//
// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
static const uint32_t LBH_TAKEN_WEIGHT = 124;
static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
Remove return heuristics from the static branch probabilities, and introduce no-return or unreachable heuristics. The return heuristics from the Ball and Larus paper don't work well in practice as they pessimize early return paths. The only good hitrate return heuristics are those for: - NULL return - Constant return - negative integer return Only the last of these three can possibly require significant code for the returning block, and even the last is fairly rare and usually also a constant. As a consequence, even for the cold return paths, there is little code on that return path, and so little code density to be gained by sinking it. The places where sinking these blocks is valuable (inner loops) will already be weighted appropriately as the edge is a loop-exit branch. All of this aside, early returns are nearly as common as all three of these return categories, and should actually be predicted as taken! Rather than muddy the waters of the static predictions, just remain silent on returns and let the CFG itself dictate any layout or other issues. However, the return heuristic was flagging one very important case: unreachable. Unfortunately it still gave a 1/4 chance of the branch-to-unreachable occuring. It also didn't do a rigorous job of finding those blocks which post-dominate an unreachable block. This patch builds a more powerful analysis that should flag all branches to blocks known to then reach unreachable. It also has better worst-case runtime complexity by not looping through successors for each block. The previous code would perform an N^2 walk in the event of a single entry block branching to N successors with a switch where each successor falls through to the next and they finally fall through to a return. Test case added for noreturn heuristics. Also doxygen comments improved along the way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142793 91177308-0d34-0410-b5e6-96231b3b80d8
2011-10-24 12:01:08 +00:00
/// \brief Unreachable-terminating branch taken weight.
///
/// This is the weight for a branch being taken to a block that terminates
/// (eventually) in unreachable. These are predicted as unlikely as possible.
static const uint32_t UR_TAKEN_WEIGHT = 1;
/// \brief Unreachable-terminating branch not-taken weight.
///
/// This is the weight for a branch not being taken toward a block that
/// terminates (eventually) in unreachable. Such a branch is essentially never
/// taken. Set the weight to an absurdly high value so that nested loops don't
/// easily subsume it.
static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1;
/// \brief Weight for a branch taken going into a cold block.
///
/// This is the weight for a branch taken toward a block marked
/// cold. A block is marked cold if it's postdominated by a
/// block containing a call to a cold function. Cold functions
/// are those marked with attribute 'cold'.
static const uint32_t CC_TAKEN_WEIGHT = 4;
/// \brief Weight for a branch not-taken into a cold block.
///
/// This is the weight for a branch not taken toward a block marked
/// cold.
static const uint32_t CC_NONTAKEN_WEIGHT = 64;
static const uint32_t PH_TAKEN_WEIGHT = 20;
static const uint32_t PH_NONTAKEN_WEIGHT = 12;
static const uint32_t ZH_TAKEN_WEIGHT = 20;
static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
static const uint32_t FPH_TAKEN_WEIGHT = 20;
static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
/// \brief Invoke-terminating normal branch taken weight
///
/// This is the weight for branching to the normal destination of an invoke
/// instruction. We expect this to happen most of the time. Set the weight to an
/// absurdly high value so that nested loops subsume it.
static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
/// \brief Invoke-terminating normal branch not-taken weight.
///
/// This is the weight for branching to the unwind destination of an invoke
/// instruction. This is essentially never taken.
static const uint32_t IH_NONTAKEN_WEIGHT = 1;
Remove return heuristics from the static branch probabilities, and introduce no-return or unreachable heuristics. The return heuristics from the Ball and Larus paper don't work well in practice as they pessimize early return paths. The only good hitrate return heuristics are those for: - NULL return - Constant return - negative integer return Only the last of these three can possibly require significant code for the returning block, and even the last is fairly rare and usually also a constant. As a consequence, even for the cold return paths, there is little code on that return path, and so little code density to be gained by sinking it. The places where sinking these blocks is valuable (inner loops) will already be weighted appropriately as the edge is a loop-exit branch. All of this aside, early returns are nearly as common as all three of these return categories, and should actually be predicted as taken! Rather than muddy the waters of the static predictions, just remain silent on returns and let the CFG itself dictate any layout or other issues. However, the return heuristic was flagging one very important case: unreachable. Unfortunately it still gave a 1/4 chance of the branch-to-unreachable occuring. It also didn't do a rigorous job of finding those blocks which post-dominate an unreachable block. This patch builds a more powerful analysis that should flag all branches to blocks known to then reach unreachable. It also has better worst-case runtime complexity by not looping through successors for each block. The previous code would perform an N^2 walk in the event of a single entry block branching to N successors with a switch where each successor falls through to the next and they finally fall through to a return. Test case added for noreturn heuristics. Also doxygen comments improved along the way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142793 91177308-0d34-0410-b5e6-96231b3b80d8
2011-10-24 12:01:08 +00:00
/// \brief Calculate edge weights for successors lead to unreachable.
///
/// Predict that a successor which leads necessarily to an
/// unreachable-terminated block as extremely unlikely.
bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) {
TerminatorInst *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 0) {
if (isa<UnreachableInst>(TI))
PostDominatedByUnreachable.insert(BB);
return false;
}
SmallVector<unsigned, 4> UnreachableEdges;
SmallVector<unsigned, 4> ReachableEdges;
Remove return heuristics from the static branch probabilities, and introduce no-return or unreachable heuristics. The return heuristics from the Ball and Larus paper don't work well in practice as they pessimize early return paths. The only good hitrate return heuristics are those for: - NULL return - Constant return - negative integer return Only the last of these three can possibly require significant code for the returning block, and even the last is fairly rare and usually also a constant. As a consequence, even for the cold return paths, there is little code on that return path, and so little code density to be gained by sinking it. The places where sinking these blocks is valuable (inner loops) will already be weighted appropriately as the edge is a loop-exit branch. All of this aside, early returns are nearly as common as all three of these return categories, and should actually be predicted as taken! Rather than muddy the waters of the static predictions, just remain silent on returns and let the CFG itself dictate any layout or other issues. However, the return heuristic was flagging one very important case: unreachable. Unfortunately it still gave a 1/4 chance of the branch-to-unreachable occuring. It also didn't do a rigorous job of finding those blocks which post-dominate an unreachable block. This patch builds a more powerful analysis that should flag all branches to blocks known to then reach unreachable. It also has better worst-case runtime complexity by not looping through successors for each block. The previous code would perform an N^2 walk in the event of a single entry block branching to N successors with a switch where each successor falls through to the next and they finally fall through to a return. Test case added for noreturn heuristics. Also doxygen comments improved along the way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142793 91177308-0d34-0410-b5e6-96231b3b80d8
2011-10-24 12:01:08 +00:00
for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
if (PostDominatedByUnreachable.count(*I))
UnreachableEdges.push_back(I.getSuccessorIndex());
Remove return heuristics from the static branch probabilities, and introduce no-return or unreachable heuristics. The return heuristics from the Ball and Larus paper don't work well in practice as they pessimize early return paths. The only good hitrate return heuristics are those for: - NULL return - Constant return - negative integer return Only the last of these three can possibly require significant code for the returning block, and even the last is fairly rare and usually also a constant. As a consequence, even for the cold return paths, there is little code on that return path, and so little code density to be gained by sinking it. The places where sinking these blocks is valuable (inner loops) will already be weighted appropriately as the edge is a loop-exit branch. All of this aside, early returns are nearly as common as all three of these return categories, and should actually be predicted as taken! Rather than muddy the waters of the static predictions, just remain silent on returns and let the CFG itself dictate any layout or other issues. However, the return heuristic was flagging one very important case: unreachable. Unfortunately it still gave a 1/4 chance of the branch-to-unreachable occuring. It also didn't do a rigorous job of finding those blocks which post-dominate an unreachable block. This patch builds a more powerful analysis that should flag all branches to blocks known to then reach unreachable. It also has better worst-case runtime complexity by not looping through successors for each block. The previous code would perform an N^2 walk in the event of a single entry block branching to N successors with a switch where each successor falls through to the next and they finally fall through to a return. Test case added for noreturn heuristics. Also doxygen comments improved along the way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142793 91177308-0d34-0410-b5e6-96231b3b80d8
2011-10-24 12:01:08 +00:00
else
ReachableEdges.push_back(I.getSuccessorIndex());
}
Remove return heuristics from the static branch probabilities, and introduce no-return or unreachable heuristics. The return heuristics from the Ball and Larus paper don't work well in practice as they pessimize early return paths. The only good hitrate return heuristics are those for: - NULL return - Constant return - negative integer return Only the last of these three can possibly require significant code for the returning block, and even the last is fairly rare and usually also a constant. As a consequence, even for the cold return paths, there is little code on that return path, and so little code density to be gained by sinking it. The places where sinking these blocks is valuable (inner loops) will already be weighted appropriately as the edge is a loop-exit branch. All of this aside, early returns are nearly as common as all three of these return categories, and should actually be predicted as taken! Rather than muddy the waters of the static predictions, just remain silent on returns and let the CFG itself dictate any layout or other issues. However, the return heuristic was flagging one very important case: unreachable. Unfortunately it still gave a 1/4 chance of the branch-to-unreachable occuring. It also didn't do a rigorous job of finding those blocks which post-dominate an unreachable block. This patch builds a more powerful analysis that should flag all branches to blocks known to then reach unreachable. It also has better worst-case runtime complexity by not looping through successors for each block. The previous code would perform an N^2 walk in the event of a single entry block branching to N successors with a switch where each successor falls through to the next and they finally fall through to a return. Test case added for noreturn heuristics. Also doxygen comments improved along the way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142793 91177308-0d34-0410-b5e6-96231b3b80d8
2011-10-24 12:01:08 +00:00
// If all successors are in the set of blocks post-dominated by unreachable,
// this block is too.
if (UnreachableEdges.size() == TI->getNumSuccessors())
PostDominatedByUnreachable.insert(BB);
Remove return heuristics from the static branch probabilities, and introduce no-return or unreachable heuristics. The return heuristics from the Ball and Larus paper don't work well in practice as they pessimize early return paths. The only good hitrate return heuristics are those for: - NULL return - Constant return - negative integer return Only the last of these three can possibly require significant code for the returning block, and even the last is fairly rare and usually also a constant. As a consequence, even for the cold return paths, there is little code on that return path, and so little code density to be gained by sinking it. The places where sinking these blocks is valuable (inner loops) will already be weighted appropriately as the edge is a loop-exit branch. All of this aside, early returns are nearly as common as all three of these return categories, and should actually be predicted as taken! Rather than muddy the waters of the static predictions, just remain silent on returns and let the CFG itself dictate any layout or other issues. However, the return heuristic was flagging one very important case: unreachable. Unfortunately it still gave a 1/4 chance of the branch-to-unreachable occuring. It also didn't do a rigorous job of finding those blocks which post-dominate an unreachable block. This patch builds a more powerful analysis that should flag all branches to blocks known to then reach unreachable. It also has better worst-case runtime complexity by not looping through successors for each block. The previous code would perform an N^2 walk in the event of a single entry block branching to N successors with a switch where each successor falls through to the next and they finally fall through to a return. Test case added for noreturn heuristics. Also doxygen comments improved along the way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142793 91177308-0d34-0410-b5e6-96231b3b80d8
2011-10-24 12:01:08 +00:00
// Skip probabilities if this block has a single successor or if all were
// reachable.
if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty())
return false;
// If the terminator is an InvokeInst, check only the normal destination block
// as the unwind edge of InvokeInst is also very unlikely taken.
if (auto *II = dyn_cast<InvokeInst>(TI))
if (PostDominatedByUnreachable.count(II->getNormalDest())) {
PostDominatedByUnreachable.insert(BB);
// Return false here so that edge weights for InvokeInst could be decided
// in calcInvokeHeuristics().
return false;
}
if (ReachableEdges.empty()) {
BranchProbability Prob(1, UnreachableEdges.size());
for (unsigned SuccIdx : UnreachableEdges)
setEdgeProbability(BB, SuccIdx, Prob);
Remove return heuristics from the static branch probabilities, and introduce no-return or unreachable heuristics. The return heuristics from the Ball and Larus paper don't work well in practice as they pessimize early return paths. The only good hitrate return heuristics are those for: - NULL return - Constant return - negative integer return Only the last of these three can possibly require significant code for the returning block, and even the last is fairly rare and usually also a constant. As a consequence, even for the cold return paths, there is little code on that return path, and so little code density to be gained by sinking it. The places where sinking these blocks is valuable (inner loops) will already be weighted appropriately as the edge is a loop-exit branch. All of this aside, early returns are nearly as common as all three of these return categories, and should actually be predicted as taken! Rather than muddy the waters of the static predictions, just remain silent on returns and let the CFG itself dictate any layout or other issues. However, the return heuristic was flagging one very important case: unreachable. Unfortunately it still gave a 1/4 chance of the branch-to-unreachable occuring. It also didn't do a rigorous job of finding those blocks which post-dominate an unreachable block. This patch builds a more powerful analysis that should flag all branches to blocks known to then reach unreachable. It also has better worst-case runtime complexity by not looping through successors for each block. The previous code would perform an N^2 walk in the event of a single entry block branching to N successors with a switch where each successor falls through to the next and they finally fall through to a return. Test case added for noreturn heuristics. Also doxygen comments improved along the way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142793 91177308-0d34-0410-b5e6-96231b3b80d8
2011-10-24 12:01:08 +00:00
return true;
}
BranchProbability UnreachableProb(UR_TAKEN_WEIGHT,
(UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
UnreachableEdges.size());
BranchProbability ReachableProb(UR_NONTAKEN_WEIGHT,
(UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
ReachableEdges.size());
for (unsigned SuccIdx : UnreachableEdges)
setEdgeProbability(BB, SuccIdx, UnreachableProb);
for (unsigned SuccIdx : ReachableEdges)
setEdgeProbability(BB, SuccIdx, ReachableProb);
Remove return heuristics from the static branch probabilities, and introduce no-return or unreachable heuristics. The return heuristics from the Ball and Larus paper don't work well in practice as they pessimize early return paths. The only good hitrate return heuristics are those for: - NULL return - Constant return - negative integer return Only the last of these three can possibly require significant code for the returning block, and even the last is fairly rare and usually also a constant. As a consequence, even for the cold return paths, there is little code on that return path, and so little code density to be gained by sinking it. The places where sinking these blocks is valuable (inner loops) will already be weighted appropriately as the edge is a loop-exit branch. All of this aside, early returns are nearly as common as all three of these return categories, and should actually be predicted as taken! Rather than muddy the waters of the static predictions, just remain silent on returns and let the CFG itself dictate any layout or other issues. However, the return heuristic was flagging one very important case: unreachable. Unfortunately it still gave a 1/4 chance of the branch-to-unreachable occuring. It also didn't do a rigorous job of finding those blocks which post-dominate an unreachable block. This patch builds a more powerful analysis that should flag all branches to blocks known to then reach unreachable. It also has better worst-case runtime complexity by not looping through successors for each block. The previous code would perform an N^2 walk in the event of a single entry block branching to N successors with a switch where each successor falls through to the next and they finally fall through to a return. Test case added for noreturn heuristics. Also doxygen comments improved along the way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142793 91177308-0d34-0410-b5e6-96231b3b80d8
2011-10-24 12:01:08 +00:00
return true;
}
// Propagate existing explicit probabilities from either profile data or
// 'expect' intrinsic processing.
bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
TerminatorInst *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 1)
return false;
if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
return false;
MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
if (!WeightsNode)
return false;
// Check that the number of successors is manageable.
assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors");
// Ensure there are weights for all of the successors. Note that the first
// operand to the metadata node is a name, not a weight.
if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1)
return false;
// Build up the final weights that will be used in a temporary buffer.
// Compute the sum of all weights to later decide whether they need to
// be scaled to fit in 32 bits.
uint64_t WeightSum = 0;
SmallVector<uint32_t, 2> Weights;
Weights.reserve(TI->getNumSuccessors());
for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
IR: Split Metadata from Value Split `Metadata` away from the `Value` class hierarchy, as part of PR21532. Assembly and bitcode changes are in the wings, but this is the bulk of the change for the IR C++ API. I have a follow-up patch prepared for `clang`. If this breaks other sub-projects, I apologize in advance :(. Help me compile it on Darwin I'll try to fix it. FWIW, the errors should be easy to fix, so it may be simpler to just fix it yourself. This breaks the build for all metadata-related code that's out-of-tree. Rest assured the transition is mechanical and the compiler should catch almost all of the problems. Here's a quick guide for updating your code: - `Metadata` is the root of a class hierarchy with three main classes: `MDNode`, `MDString`, and `ValueAsMetadata`. It is distinct from the `Value` class hierarchy. It is typeless -- i.e., instances do *not* have a `Type`. - `MDNode`'s operands are all `Metadata *` (instead of `Value *`). - `TrackingVH<MDNode>` and `WeakVH` referring to metadata can be replaced with `TrackingMDNodeRef` and `TrackingMDRef`, respectively. If you're referring solely to resolved `MDNode`s -- post graph construction -- just use `MDNode*`. - `MDNode` (and the rest of `Metadata`) have only limited support for `replaceAllUsesWith()`. As long as an `MDNode` is pointing at a forward declaration -- the result of `MDNode::getTemporary()` -- it maintains a side map of its uses and can RAUW itself. Once the forward declarations are fully resolved RAUW support is dropped on the ground. This means that uniquing collisions on changing operands cause nodes to become "distinct". (This already happened fairly commonly, whenever an operand went to null.) If you're constructing complex (non self-reference) `MDNode` cycles, you need to call `MDNode::resolveCycles()` on each node (or on a top-level node that somehow references all of the nodes). Also, don't do that. Metadata cycles (and the RAUW machinery needed to construct them) are expensive. - An `MDNode` can only refer to a `Constant` through a bridge called `ConstantAsMetadata` (one of the subclasses of `ValueAsMetadata`). As a side effect, accessing an operand of an `MDNode` that is known to be, e.g., `ConstantInt`, takes three steps: first, cast from `Metadata` to `ConstantAsMetadata`; second, extract the `Constant`; third, cast down to `ConstantInt`. The eventual goal is to introduce `MDInt`/`MDFloat`/etc. and have metadata schema owners transition away from using `Constant`s when the type isn't important (and they don't care about referring to `GlobalValue`s). In the meantime, I've added transitional API to the `mdconst` namespace that matches semantics with the old code, in order to avoid adding the error-prone three-step equivalent to every call site. If your old code was: MDNode *N = foo(); bar(isa <ConstantInt>(N->getOperand(0))); baz(cast <ConstantInt>(N->getOperand(1))); bak(cast_or_null <ConstantInt>(N->getOperand(2))); bat(dyn_cast <ConstantInt>(N->getOperand(3))); bay(dyn_cast_or_null<ConstantInt>(N->getOperand(4))); you can trivially match its semantics with: MDNode *N = foo(); bar(mdconst::hasa <ConstantInt>(N->getOperand(0))); baz(mdconst::extract <ConstantInt>(N->getOperand(1))); bak(mdconst::extract_or_null <ConstantInt>(N->getOperand(2))); bat(mdconst::dyn_extract <ConstantInt>(N->getOperand(3))); bay(mdconst::dyn_extract_or_null<ConstantInt>(N->getOperand(4))); and when you transition your metadata schema to `MDInt`: MDNode *N = foo(); bar(isa <MDInt>(N->getOperand(0))); baz(cast <MDInt>(N->getOperand(1))); bak(cast_or_null <MDInt>(N->getOperand(2))); bat(dyn_cast <MDInt>(N->getOperand(3))); bay(dyn_cast_or_null<MDInt>(N->getOperand(4))); - A `CallInst` -- specifically, intrinsic instructions -- can refer to metadata through a bridge called `MetadataAsValue`. This is a subclass of `Value` where `getType()->isMetadataTy()`. `MetadataAsValue` is the *only* class that can legally refer to a `LocalAsMetadata`, which is a bridged form of non-`Constant` values like `Argument` and `Instruction`. It can also refer to any other `Metadata` subclass. (I'll break all your testcases in a follow-up commit, when I propagate this change to assembly.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223802 91177308-0d34-0410-b5e6-96231b3b80d8
2014-12-09 18:38:53 +00:00
ConstantInt *Weight =
mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(i));
if (!Weight)
return false;
assert(Weight->getValue().getActiveBits() <= 32 &&
"Too many bits for uint32_t");
Weights.push_back(Weight->getZExtValue());
WeightSum += Weights.back();
}
assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
// If the sum of weights does not fit in 32 bits, scale every weight down
// accordingly.
uint64_t ScalingFactor =
(WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
WeightSum = 0;
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
Weights[i] /= ScalingFactor;
WeightSum += Weights[i];
}
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
setEdgeProbability(BB, i, {Weights[i], static_cast<uint32_t>(WeightSum)});
assert(WeightSum <= UINT32_MAX &&
"Expected weights to scale down to 32 bits");
return true;
}
/// \brief Calculate edge weights for edges leading to cold blocks.
///
/// A cold block is one post-dominated by a block with a call to a
/// cold function. Those edges are unlikely to be taken, so we give
/// them relatively low weight.
///
/// Return true if we could compute the weights for cold edges.
/// Return false, otherwise.
bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) {
TerminatorInst *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 0)
return false;
// Determine which successors are post-dominated by a cold block.
SmallVector<unsigned, 4> ColdEdges;
SmallVector<unsigned, 4> NormalEdges;
for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
if (PostDominatedByColdCall.count(*I))
ColdEdges.push_back(I.getSuccessorIndex());
else
NormalEdges.push_back(I.getSuccessorIndex());
// If all successors are in the set of blocks post-dominated by cold calls,
// this block is in the set post-dominated by cold calls.
if (ColdEdges.size() == TI->getNumSuccessors())
PostDominatedByColdCall.insert(BB);
else {
// Otherwise, if the block itself contains a cold function, add it to the
// set of blocks postdominated by a cold call.
assert(!PostDominatedByColdCall.count(BB));
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (CallInst *CI = dyn_cast<CallInst>(I))
if (CI->hasFnAttr(Attribute::Cold)) {
PostDominatedByColdCall.insert(BB);
break;
}
}
// Skip probabilities if this block has a single successor.
if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
return false;
if (NormalEdges.empty()) {
BranchProbability Prob(1, ColdEdges.size());
for (unsigned SuccIdx : ColdEdges)
setEdgeProbability(BB, SuccIdx, Prob);
return true;
}
BranchProbability ColdProb(CC_TAKEN_WEIGHT,
(CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
ColdEdges.size());
BranchProbability NormalProb(CC_NONTAKEN_WEIGHT,
(CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
NormalEdges.size());
for (unsigned SuccIdx : ColdEdges)
setEdgeProbability(BB, SuccIdx, ColdProb);
for (unsigned SuccIdx : NormalEdges)
setEdgeProbability(BB, SuccIdx, NormalProb);
return true;
}
// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
// between two pointer or pointer and NULL will fail.
bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) {
BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || !BI->isConditional())
return false;
Value *Cond = BI->getCondition();
ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
if (!CI || !CI->isEquality())
return false;
Value *LHS = CI->getOperand(0);
if (!LHS->getType()->isPointerTy())
return false;
assert(CI->getOperand(1)->getType()->isPointerTy());
// p != 0 -> isProb = true
// p == 0 -> isProb = false
// p != q -> isProb = true
// p == q -> isProb = false;
unsigned TakenIdx = 0, NonTakenIdx = 1;
bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE;
if (!isProb)
std::swap(TakenIdx, NonTakenIdx);
BranchProbability TakenProb(PH_TAKEN_WEIGHT,
PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
setEdgeProbability(BB, TakenIdx, TakenProb);
setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
return true;
}
// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
// as taken, exiting edges as not-taken.
bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB,
const LoopInfo &LI) {
Loop *L = LI.getLoopFor(BB);
if (!L)
return false;
SmallVector<unsigned, 8> BackEdges;
SmallVector<unsigned, 8> ExitingEdges;
SmallVector<unsigned, 8> InEdges; // Edges from header to the loop.
for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
if (!L->contains(*I))
ExitingEdges.push_back(I.getSuccessorIndex());
else if (L->getHeader() == *I)
BackEdges.push_back(I.getSuccessorIndex());
else
InEdges.push_back(I.getSuccessorIndex());
}
if (BackEdges.empty() && ExitingEdges.empty())
return false;
// Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and
// normalize them so that they sum up to one.
SmallVector<BranchProbability, 4> Probs(3, BranchProbability::getZero());
unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
(InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
(ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT);
if (!BackEdges.empty())
Probs[0] = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
if (!InEdges.empty())
Probs[1] = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
if (!ExitingEdges.empty())
Probs[2] = BranchProbability(LBH_NONTAKEN_WEIGHT, Denom);
if (uint32_t numBackEdges = BackEdges.size()) {
auto Prob = Probs[0] / numBackEdges;
for (unsigned SuccIdx : BackEdges)
setEdgeProbability(BB, SuccIdx, Prob);
}
if (uint32_t numInEdges = InEdges.size()) {
auto Prob = Probs[1] / numInEdges;
for (unsigned SuccIdx : InEdges)
setEdgeProbability(BB, SuccIdx, Prob);
}
if (uint32_t numExitingEdges = ExitingEdges.size()) {
auto Prob = Probs[2] / numExitingEdges;
for (unsigned SuccIdx : ExitingEdges)
setEdgeProbability(BB, SuccIdx, Prob);
}
return true;
}
bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || !BI->isConditional())
return false;
Value *Cond = BI->getCondition();
ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
if (!CI)
return false;
Value *RHS = CI->getOperand(1);
ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
if (!CV)
return false;
// If the LHS is the result of AND'ing a value with a single bit bitmask,
// we don't have information about probabilities.
if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
if (LHS->getOpcode() == Instruction::And)
if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1)))
if (AndRHS->getUniqueInteger().isPowerOf2())
return false;
bool isProb;
if (CV->isZero()) {
switch (CI->getPredicate()) {
case CmpInst::ICMP_EQ:
// X == 0 -> Unlikely
isProb = false;
break;
case CmpInst::ICMP_NE:
// X != 0 -> Likely
isProb = true;
break;
case CmpInst::ICMP_SLT:
// X < 0 -> Unlikely
isProb = false;
break;
case CmpInst::ICMP_SGT:
// X > 0 -> Likely
isProb = true;
break;
default:
return false;
}
} else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) {
// InstCombine canonicalizes X <= 0 into X < 1.
// X <= 0 -> Unlikely
isProb = false;
} else if (CV->isAllOnesValue()) {
switch (CI->getPredicate()) {
case CmpInst::ICMP_EQ:
// X == -1 -> Unlikely
isProb = false;
break;
case CmpInst::ICMP_NE:
// X != -1 -> Likely
isProb = true;
break;
case CmpInst::ICMP_SGT:
// InstCombine canonicalizes X >= 0 into X > -1.
// X >= 0 -> Likely
isProb = true;
break;
default:
return false;
}
} else {
return false;
}
unsigned TakenIdx = 0, NonTakenIdx = 1;
if (!isProb)
std::swap(TakenIdx, NonTakenIdx);
BranchProbability TakenProb(ZH_TAKEN_WEIGHT,
ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
setEdgeProbability(BB, TakenIdx, TakenProb);
setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
return true;
}
bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) {
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || !BI->isConditional())
return false;
Value *Cond = BI->getCondition();
FCmpInst *FCmp = dyn_cast<FCmpInst>(Cond);
if (!FCmp)
return false;
bool isProb;
if (FCmp->isEquality()) {
// f1 == f2 -> Unlikely
// f1 != f2 -> Likely
isProb = !FCmp->isTrueWhenEqual();
} else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) {
// !isnan -> Likely
isProb = true;
} else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) {
// isnan -> Unlikely
isProb = false;
} else {
return false;
}
unsigned TakenIdx = 0, NonTakenIdx = 1;
if (!isProb)
std::swap(TakenIdx, NonTakenIdx);
BranchProbability TakenProb(FPH_TAKEN_WEIGHT,
FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT);
setEdgeProbability(BB, TakenIdx, TakenProb);
setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
return true;
}
bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) {
InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator());
if (!II)
return false;
BranchProbability TakenProb(IH_TAKEN_WEIGHT,
IH_TAKEN_WEIGHT + IH_NONTAKEN_WEIGHT);
setEdgeProbability(BB, 0 /*Index for Normal*/, TakenProb);
setEdgeProbability(BB, 1 /*Index for Unwind*/, TakenProb.getCompl());
return true;
}
void BranchProbabilityInfo::releaseMemory() {
Probs.clear();
}
void BranchProbabilityInfo::print(raw_ostream &OS) const {
OS << "---- Branch Probabilities ----\n";
// We print the probabilities from the last function the analysis ran over,
// or the function it is currently running over.
assert(LastF && "Cannot print prior to running over a function");
Analysis: Remove implicit ilist iterator conversions Remove implicit ilist iterator conversions from LLVMAnalysis. I came across something really scary in `llvm::isKnownNotFullPoison()` which relied on `Instruction::getNextNode()` being completely broken (not surprising, but scary nevertheless). This function is documented (and coded to) return `nullptr` when it gets to the sentinel, but with an `ilist_half_node` as a sentinel, the sentinel check looks into some other memory and we don't recognize we've hit the end. Rooting out these scary cases is the reason I'm removing the implicit conversions before doing anything else with `ilist`; I'm not at all surprised that clients rely on badness. I found another scary case -- this time, not relying on badness, just bad (but I guess getting lucky so far) -- in `ObjectSizeOffsetEvaluator::compute_()`. Here, we save out the insertion point, do some things, and then restore it. Previously, we let the iterator auto-convert to `Instruction*`, and then set it back using the `Instruction*` version: Instruction *PrevInsertPoint = Builder.GetInsertPoint(); /* Logic that may change insert point */ if (PrevInsertPoint) Builder.SetInsertPoint(PrevInsertPoint); The check for `PrevInsertPoint` doesn't protect correctly against bad accesses. If the insertion point has been set to the end of a basic block (i.e., `SetInsertPoint(SomeBB)`), then `GetInsertPoint()` returns an iterator pointing at the list sentinel. The version of `SetInsertPoint()` that's getting called will then call `PrevInsertPoint->getParent()`, which explodes horribly. The only reason this hasn't blown up is that it's fairly unlikely the builder is adding to the end of the block; usually, we're adding instructions somewhere before the terminator. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249925 91177308-0d34-0410-b5e6-96231b3b80d8
2015-10-10 00:53:03 +00:00
for (const auto &BI : *LastF) {
for (succ_const_iterator SI = succ_begin(&BI), SE = succ_end(&BI); SI != SE;
++SI) {
printEdgeProbability(OS << " ", &BI, *SI);
}
}
}
bool BranchProbabilityInfo::
isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
// Hot probability is at least 4/5 = 80%
// FIXME: Compare against a static "hot" BranchProbability.
return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
}
BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
auto MaxProb = BranchProbability::getZero();
BasicBlock *MaxSucc = nullptr;
for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
BasicBlock *Succ = *I;
auto Prob = getEdgeProbability(BB, Succ);
if (Prob > MaxProb) {
MaxProb = Prob;
MaxSucc = Succ;
}
}
// Hot probability is at least 4/5 = 80%
if (MaxProb > BranchProbability(4, 5))
return MaxSucc;
return nullptr;
}
/// Get the raw edge probability for the edge. If can't find it, return a
/// default probability 1/N where N is the number of successors. Here an edge is
/// specified using PredBlock and an
/// index to the successors.
BranchProbability
BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
unsigned IndexInSuccessors) const {
auto I = Probs.find(std::make_pair(Src, IndexInSuccessors));
if (I != Probs.end())
return I->second;
return {1,
static_cast<uint32_t>(std::distance(succ_begin(Src), succ_end(Src)))};
}
BranchProbability
BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
succ_const_iterator Dst) const {
return getEdgeProbability(Src, Dst.getSuccessorIndex());
}
/// Get the raw edge probability calculated for the block pair. This returns the
/// sum of all raw edge probabilities from Src to Dst.
BranchProbability
BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
const BasicBlock *Dst) const {
auto Prob = BranchProbability::getZero();
bool FoundProb = false;
for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
if (*I == Dst) {
auto MapI = Probs.find(std::make_pair(Src, I.getSuccessorIndex()));
if (MapI != Probs.end()) {
FoundProb = true;
Prob += MapI->second;
}
}
uint32_t succ_num = std::distance(succ_begin(Src), succ_end(Src));
return FoundProb ? Prob : BranchProbability(1, succ_num);
}
/// Set the edge probability for a given edge specified by PredBlock and an
/// index to the successors.
void BranchProbabilityInfo::setEdgeProbability(const BasicBlock *Src,
unsigned IndexInSuccessors,
BranchProbability Prob) {
Probs[std::make_pair(Src, IndexInSuccessors)] = Prob;
DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << IndexInSuccessors
<< " successor probability to " << Prob << "\n");
Replace all weight-based interfaces in MBB with probability-based interfaces, and update all uses of old interfaces. (This is the second attempt to submit this patch. The first caused two assertion failures and was reverted. See https://llvm.org/bugs/show_bug.cgi?id=25687) The patch in http://reviews.llvm.org/D13745 is broken into four parts: 1. New interfaces without functional changes (http://reviews.llvm.org/D13908). 2. Use new interfaces in SelectionDAG, while in other passes treat probabilities as weights (http://reviews.llvm.org/D14361). 3. Use new interfaces in all other passes. 4. Remove old interfaces. This patch is 3+4 above. In this patch, MBB won't provide weight-based interfaces any more, which are totally replaced by probability-based ones. The interface addSuccessor() is redesigned so that the default probability is unknown. We allow unknown probabilities but don't allow using it together with known probabilities in successor list. That is to say, we either have a list of successors with all known probabilities, or all unknown probabilities. In the latter case, we assume each successor has 1/N probability where N is the number of successors. An assertion checks if the user is attempting to add a successor with the disallowed mixed use as stated above. This can help us catch many misuses. All uses of weight-based interfaces are now updated to use probability-based ones. Differential revision: http://reviews.llvm.org/D14973 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254377 91177308-0d34-0410-b5e6-96231b3b80d8
2015-12-01 05:29:22 +00:00
}
raw_ostream &
BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
const BasicBlock *Src,
const BasicBlock *Dst) const {
const BranchProbability Prob = getEdgeProbability(Src, Dst);
OS << "edge " << Src->getName() << " -> " << Dst->getName()
<< " probability is " << Prob
<< (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
return OS;
}
void BranchProbabilityInfo::calculate(Function &F, const LoopInfo& LI) {
DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
<< " ----\n\n");
LastF = &F; // Store the last function we ran on for printing.
assert(PostDominatedByUnreachable.empty());
assert(PostDominatedByColdCall.empty());
// Walk the basic blocks in post-order so that we can build up state about
// the successors of a block iteratively.
for (auto BB : post_order(&F.getEntryBlock())) {
DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
if (calcUnreachableHeuristics(BB))
continue;
if (calcMetadataWeights(BB))
continue;
if (calcColdCallHeuristics(BB))
continue;
if (calcLoopBranchHeuristics(BB, LI))
continue;
if (calcPointerHeuristics(BB))
continue;
if (calcZeroHeuristics(BB))
continue;
if (calcFloatingPointHeuristics(BB))
continue;
calcInvokeHeuristics(BB);
}
PostDominatedByUnreachable.clear();
PostDominatedByColdCall.clear();
}
void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
AnalysisUsage &AU) const {
AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) {
const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
BPI.calculate(F, LI);
return false;
}
void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI.releaseMemory(); }
void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS,
const Module *) const {
BPI.print(OS);
}