Refactor threshold computation for inline cost analysis

Differential Revision: http://reviews.llvm.org/D15401

llvm-svn: 257832
This commit is contained in:
Easwaran Raman 2016-01-14 23:16:29 +00:00
parent 2de1f4cb59
commit 9b73e2c66d
6 changed files with 133 additions and 132 deletions

View File

@ -101,14 +101,15 @@ public:
/// \brief Get an InlineCost object representing the cost of inlining this
/// callsite.
///
/// Note that threshold is passed into this function. Only costs below the
/// threshold are computed with any accuracy. The threshold can be used to
/// bound the computation necessary to determine whether the cost is
/// Note that a default threshold is passed into this function. This threshold
/// could be modified based on callsite's properties and only costs below this
/// new threshold are computed with any accuracy. The new threshold can be
/// used to bound the computation necessary to determine whether the cost is
/// sufficiently low to warrant inlining.
///
/// Also note that calling this function *dynamically* computes the cost of
/// inlining the callsite. It is an expensive, heavyweight call.
InlineCost getInlineCost(CallSite CS, int Threshold,
InlineCost getInlineCost(CallSite CS, int DefaultThreshold,
TargetTransformInfo &CalleeTTI,
AssumptionCacheTracker *ACT);
@ -117,10 +118,15 @@ InlineCost getInlineCost(CallSite CS, int Threshold,
/// pointer. This behaves exactly as the version with no explicit callee
/// parameter in all other respects.
//
InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold,
InlineCost getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold,
TargetTransformInfo &CalleeTTI,
AssumptionCacheTracker *ACT);
int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel);
/// \brief Return the default value of -inline-threshold.
int getDefaultInlineThreshold();
/// \brief Minimal filter to detect invalid constructs for inlining.
bool isInlineViable(Function &Callee);
}

View File

@ -31,7 +31,7 @@ template <class PtrType, unsigned SmallSize> class SmallPtrSet;
///
struct Inliner : public CallGraphSCCPass {
explicit Inliner(char &ID);
explicit Inliner(char &ID, int Threshold, bool InsertLifetime);
explicit Inliner(char &ID, bool InsertLifetime);
/// getAnalysisUsage - For this class, we declare that we require and preserve
/// the call graph. If the derived class implements this method, it should
@ -47,18 +47,6 @@ struct Inliner : public CallGraphSCCPass {
// processing to avoid breaking the SCC traversal.
bool doFinalization(CallGraph &CG) override;
/// This method returns the value specified by the -inline-threshold value,
/// specified on the command line. This is typically not directly needed.
///
unsigned getInlineThreshold() const { return InlineThreshold; }
/// Calculate the inline threshold for given Caller. This threshold is lower
/// if the caller is marked with OptimizeForSize and -inline-threshold is not
/// given on the comand line. It is higher if the callee is marked with the
/// inlinehint attribute.
///
unsigned getInlineThreshold(CallSite CS) const;
/// getInlineCost - This method must be implemented by the subclass to
/// determine the cost of inlining the specified call site. If the cost
/// returned is greater than the current inline threshold, the call site is
@ -75,9 +63,6 @@ struct Inliner : public CallGraphSCCPass {
bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly = false);
private:
// InlineThreshold - Cache the value here for easy access.
unsigned InlineThreshold;
// InsertLifetime - Insert @llvm.lifetime intrinsics.
bool InsertLifetime;

View File

@ -39,6 +39,32 @@ using namespace llvm;
STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
// Threshold to use when optsize is specified (and there is no
// -inline-threshold).
const int OptSizeThreshold = 75;
// Threshold to use when -Oz is specified (and there is no -inline-threshold).
const int OptMinSizeThreshold = 25;
// Threshold to use when -O[34] is specified (and there is no
// -inline-threshold).
const int OptAggressiveThreshold = 275;
static cl::opt<int> DefaultInlineThreshold(
"inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
cl::desc("Control the amount of inlining to perform (default = 225)"));
static cl::opt<int> HintThreshold(
"inlinehint-threshold", cl::Hidden, cl::init(325),
cl::desc("Threshold for inlining functions with inline hint"));
// We introduce this threshold to help performance of instrumentation based
// PGO before we actually hook up inliner with analysis passes such as BPI and
// BFI.
static cl::opt<int> ColdThreshold(
"inlinecold-threshold", cl::Hidden, cl::init(225),
cl::desc("Threshold for inlining functions with cold attribute"));
namespace {
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@ -122,6 +148,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// inlined through this particular callsite.
bool isKnownNonNullInCallee(Value *V);
/// Update Threshold based on callsite properties such as callee
/// attributes and callee hotness for PGO builds. The Callee is explicitly
/// passed to support analyzing indirect calls whose target is inferred by
/// analysis.
void updateThreshold(CallSite CS, Function &Callee);
// Custom analysis routines.
bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
@ -541,6 +573,56 @@ bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
return false;
}
void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// If -inline-threshold is not given, listen to the optsize attribute when it
// would decrease the threshold.
Function *Caller = CS.getCaller();
// FIXME: Use Function::optForSize()
bool OptSize = Caller->hasFnAttribute(Attribute::OptimizeForSize);
if (!(DefaultInlineThreshold.getNumOccurrences() > 0) && OptSize &&
OptSizeThreshold < Threshold)
Threshold = OptSizeThreshold;
// If profile information is available, use that to adjust threshold of hot
// and cold functions.
// FIXME: The heuristic used below for determining hotness and coldness are
// based on preliminary SPEC tuning and may not be optimal. Replace this with
// a well-tuned heuristic based on *callsite* hotness and not callee hotness.
uint64_t FunctionCount = 0, MaxFunctionCount = 0;
bool HasPGOCounts = false;
if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) {
HasPGOCounts = true;
FunctionCount = Callee.getEntryCount().getValue();
MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue();
}
// Listen to the inlinehint attribute or profile based hotness information
// when it would increase the threshold and the caller does not need to
// minimize its size.
bool InlineHint =
Callee.hasFnAttribute(Attribute::InlineHint) ||
(HasPGOCounts &&
FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize())
Threshold = HintThreshold;
// Listen to the cold attribute or profile based coldness information
// when it would decrease the threshold.
bool ColdCallee =
Callee.hasFnAttribute(Attribute::Cold) ||
(HasPGOCounts &&
FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
// Command line argument for DefaultInlineThreshold will override the default
// ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
// do not use the default cold threshold even if it is smaller.
if ((DefaultInlineThreshold.getNumOccurrences() == 0 ||
ColdThreshold.getNumOccurrences() > 0) &&
ColdCallee && ColdThreshold < Threshold)
Threshold = ColdThreshold;
}
bool CallAnalyzer::visitCmpInst(CmpInst &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
// First try to handle simplified comparisons.
@ -1079,6 +1161,10 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// nice to base the bonus values on something more scientific.
assert(NumInstructions == 0);
assert(NumVectorInstructions == 0);
// Update the threshold based on callsite properties
updateThreshold(CS, F);
FiftyPercentVectorBonus = 3 * Threshold / 2;
TenPercentVectorBonus = 3 * Threshold / 4;
const DataLayout &DL = F.getParent()->getDataLayout();
@ -1335,15 +1421,31 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
AttributeFuncs::areInlineCompatible(*Caller, *Callee);
}
InlineCost llvm::getInlineCost(CallSite CS, int Threshold,
InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,
TargetTransformInfo &CalleeTTI,
AssumptionCacheTracker *ACT) {
return getInlineCost(CS, CS.getCalledFunction(), Threshold, CalleeTTI, ACT);
return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,
ACT);
}
InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int Threshold,
int llvm::computeThresholdFromOptLevels(unsigned OptLevel,
unsigned SizeOptLevel) {
if (OptLevel > 2)
return OptAggressiveThreshold;
if (SizeOptLevel == 1) // -Os
return OptSizeThreshold;
if (SizeOptLevel == 2) // -Oz
return OptMinSizeThreshold;
return DefaultInlineThreshold;
}
int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; }
InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
int DefaultThreshold,
TargetTransformInfo &CalleeTTI,
AssumptionCacheTracker *ACT) {
// Cannot inline indirect calls.
if (!Callee)
return llvm::InlineCost::getNever();
@ -1375,7 +1477,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int Threshold,
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
CallAnalyzer CA(CalleeTTI, ACT, *Callee, Threshold, CS);
CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());

View File

@ -37,13 +37,11 @@ namespace {
class AlwaysInliner : public Inliner {
public:
// Use extremely low threshold.
AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) {
AlwaysInliner() : Inliner(ID, /*InsertLifetime*/ true) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
AlwaysInliner(bool InsertLifetime)
: Inliner(ID, -2000000000, InsertLifetime) {
AlwaysInliner(bool InsertLifetime) : Inliner(ID, InsertLifetime) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}

View File

@ -38,14 +38,19 @@ namespace {
/// inliner pass and the always inliner pass. The two passes use different cost
/// analyses to determine when to inline.
class SimpleInliner : public Inliner {
// This field is populated based on one of the following:
// optimization or size optimization levels,
// --inline-threshold flag,
// user specified value.
int DefaultThreshold;
public:
SimpleInliner() : Inliner(ID) {
SimpleInliner()
: Inliner(ID), DefaultThreshold(llvm::getDefaultInlineThreshold()) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
SimpleInliner(int Threshold)
: Inliner(ID, Threshold, /*InsertLifetime*/ true) {
SimpleInliner(int Threshold) : Inliner(ID), DefaultThreshold(Threshold) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
@ -54,7 +59,7 @@ public:
InlineCost getInlineCost(CallSite CS) override {
Function *Callee = CS.getCalledFunction();
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
return llvm::getInlineCost(CS, getInlineThreshold(CS), TTI, ACT);
return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT);
}
bool runOnSCC(CallGraphSCC &SCC) override;
@ -64,17 +69,6 @@ private:
TargetTransformInfoWrapperPass *TTIWP;
};
static int computeThresholdFromOptLevels(unsigned OptLevel,
unsigned SizeOptLevel) {
if (OptLevel > 2)
return 275;
if (SizeOptLevel == 1) // -Os
return 75;
if (SizeOptLevel == 2) // -Oz
return 25;
return 225;
}
} // end anonymous namespace
char SimpleInliner::ID = 0;
@ -96,7 +90,7 @@ Pass *llvm::createFunctionInliningPass(int Threshold) {
Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
unsigned SizeOptLevel) {
return new SimpleInliner(
computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
llvm::computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
}
bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {

View File

@ -47,33 +47,10 @@ STATISTIC(NumMergedAllocas, "Number of allocas merged together");
// if those would be more profitable and blocked inline steps.
STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
static cl::opt<int>
InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
cl::desc("Control the amount of inlining to perform (default = 225)"));
Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {}
static cl::opt<int>
HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
cl::desc("Threshold for inlining functions with inline hint"));
// We instroduce this threshold to help performance of instrumentation based
// PGO before we actually hook up inliner with analysis passes such as BPI and
// BFI.
static cl::opt<int>
ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225),
cl::desc("Threshold for inlining functions with cold attribute"));
// Threshold to use when optsize is specified (and there is no -inline-limit).
const int OptSizeThreshold = 75;
Inliner::Inliner(char &ID)
: CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {
}
Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
: CallGraphSCCPass(ID),
InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit
: Threshold),
InsertLifetime(InsertLifetime) {}
Inliner::Inliner(char &ID, bool InsertLifetime)
: CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
/// For this class, we declare that we require and preserve the call graph.
/// If the derived class implements this method, it should
@ -243,67 +220,6 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
return true;
}
unsigned Inliner::getInlineThreshold(CallSite CS) const {
int Threshold = InlineThreshold; // -inline-threshold or else selected by
// overall opt level
// If -inline-threshold is not given, listen to the optsize attribute when it
// would decrease the threshold.
Function *Caller = CS.getCaller();
bool OptSize = Caller && !Caller->isDeclaration() &&
// FIXME: Use Function::optForSize().
Caller->hasFnAttribute(Attribute::OptimizeForSize);
if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
OptSizeThreshold < Threshold)
Threshold = OptSizeThreshold;
Function *Callee = CS.getCalledFunction();
if (!Callee || Callee->isDeclaration())
return Threshold;
// If profile information is available, use that to adjust threshold of hot
// and cold functions.
// FIXME: The heuristic used below for determining hotness and coldness are
// based on preliminary SPEC tuning and may not be optimal. Replace this with
// a well-tuned heuristic based on *callsite* hotness and not callee hotness.
uint64_t FunctionCount = 0, MaxFunctionCount = 0;
bool HasPGOCounts = false;
if (Callee->getEntryCount() &&
Callee->getParent()->getMaximumFunctionCount()) {
HasPGOCounts = true;
FunctionCount = Callee->getEntryCount().getValue();
MaxFunctionCount =
Callee->getParent()->getMaximumFunctionCount().getValue();
}
// Listen to the inlinehint attribute or profile based hotness information
// when it would increase the threshold and the caller does not need to
// minimize its size.
bool InlineHint =
Callee->hasFnAttribute(Attribute::InlineHint) ||
(HasPGOCounts &&
FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
if (InlineHint && HintThreshold > Threshold &&
!Caller->hasFnAttribute(Attribute::MinSize))
Threshold = HintThreshold;
// Listen to the cold attribute or profile based coldness information
// when it would decrease the threshold.
bool ColdCallee =
Callee->hasFnAttribute(Attribute::Cold) ||
(HasPGOCounts &&
FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
// Command line argument for InlineLimit will override the default
// ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
// do not use the default cold threshold even if it is smaller.
if ((InlineLimit.getNumOccurrences() == 0 ||
ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
ColdThreshold < Threshold)
Threshold = ColdThreshold;
return Threshold;
}
static void emitAnalysis(CallSite CS, const Twine &Msg) {
Function *Caller = CS.getCaller();
LLVMContext &Ctx = Caller->getContext();