mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-04 01:43:06 +00:00
Refactor threshold computation for inline cost analysis
Differential Revision: http://reviews.llvm.org/D15401 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257832 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
92f6aec271
commit
ec8106478b
@ -101,14 +101,15 @@ public:
|
||||
/// \brief Get an InlineCost object representing the cost of inlining this
|
||||
/// callsite.
|
||||
///
|
||||
/// Note that threshold is passed into this function. Only costs below the
|
||||
/// threshold are computed with any accuracy. The threshold can be used to
|
||||
/// bound the computation necessary to determine whether the cost is
|
||||
/// Note that a default threshold is passed into this function. This threshold
|
||||
/// could be modified based on callsite's properties and only costs below this
|
||||
/// new threshold are computed with any accuracy. The new threshold can be
|
||||
/// used to bound the computation necessary to determine whether the cost is
|
||||
/// sufficiently low to warrant inlining.
|
||||
///
|
||||
/// Also note that calling this function *dynamically* computes the cost of
|
||||
/// inlining the callsite. It is an expensive, heavyweight call.
|
||||
InlineCost getInlineCost(CallSite CS, int Threshold,
|
||||
InlineCost getInlineCost(CallSite CS, int DefaultThreshold,
|
||||
TargetTransformInfo &CalleeTTI,
|
||||
AssumptionCacheTracker *ACT);
|
||||
|
||||
@ -117,10 +118,15 @@ InlineCost getInlineCost(CallSite CS, int Threshold,
|
||||
/// pointer. This behaves exactly as the version with no explicit callee
|
||||
/// parameter in all other respects.
|
||||
//
|
||||
InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold,
|
||||
InlineCost getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold,
|
||||
TargetTransformInfo &CalleeTTI,
|
||||
AssumptionCacheTracker *ACT);
|
||||
|
||||
int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel);
|
||||
|
||||
/// \brief Return the default value of -inline-threshold.
|
||||
int getDefaultInlineThreshold();
|
||||
|
||||
/// \brief Minimal filter to detect invalid constructs for inlining.
|
||||
bool isInlineViable(Function &Callee);
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ template <class PtrType, unsigned SmallSize> class SmallPtrSet;
|
||||
///
|
||||
struct Inliner : public CallGraphSCCPass {
|
||||
explicit Inliner(char &ID);
|
||||
explicit Inliner(char &ID, int Threshold, bool InsertLifetime);
|
||||
explicit Inliner(char &ID, bool InsertLifetime);
|
||||
|
||||
/// getAnalysisUsage - For this class, we declare that we require and preserve
|
||||
/// the call graph. If the derived class implements this method, it should
|
||||
@ -47,18 +47,6 @@ struct Inliner : public CallGraphSCCPass {
|
||||
// processing to avoid breaking the SCC traversal.
|
||||
bool doFinalization(CallGraph &CG) override;
|
||||
|
||||
/// This method returns the value specified by the -inline-threshold value,
|
||||
/// specified on the command line. This is typically not directly needed.
|
||||
///
|
||||
unsigned getInlineThreshold() const { return InlineThreshold; }
|
||||
|
||||
/// Calculate the inline threshold for given Caller. This threshold is lower
|
||||
/// if the caller is marked with OptimizeForSize and -inline-threshold is not
|
||||
/// given on the comand line. It is higher if the callee is marked with the
|
||||
/// inlinehint attribute.
|
||||
///
|
||||
unsigned getInlineThreshold(CallSite CS) const;
|
||||
|
||||
/// getInlineCost - This method must be implemented by the subclass to
|
||||
/// determine the cost of inlining the specified call site. If the cost
|
||||
/// returned is greater than the current inline threshold, the call site is
|
||||
@ -75,9 +63,6 @@ struct Inliner : public CallGraphSCCPass {
|
||||
bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly = false);
|
||||
|
||||
private:
|
||||
// InlineThreshold - Cache the value here for easy access.
|
||||
unsigned InlineThreshold;
|
||||
|
||||
// InsertLifetime - Insert @llvm.lifetime intrinsics.
|
||||
bool InsertLifetime;
|
||||
|
||||
|
@ -39,6 +39,32 @@ using namespace llvm;
|
||||
|
||||
STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
|
||||
|
||||
// Threshold to use when optsize is specified (and there is no
|
||||
// -inline-threshold).
|
||||
const int OptSizeThreshold = 75;
|
||||
|
||||
// Threshold to use when -Oz is specified (and there is no -inline-threshold).
|
||||
const int OptMinSizeThreshold = 25;
|
||||
|
||||
// Threshold to use when -O[34] is specified (and there is no
|
||||
// -inline-threshold).
|
||||
const int OptAggressiveThreshold = 275;
|
||||
|
||||
static cl::opt<int> DefaultInlineThreshold(
|
||||
"inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
|
||||
cl::desc("Control the amount of inlining to perform (default = 225)"));
|
||||
|
||||
static cl::opt<int> HintThreshold(
|
||||
"inlinehint-threshold", cl::Hidden, cl::init(325),
|
||||
cl::desc("Threshold for inlining functions with inline hint"));
|
||||
|
||||
// We introduce this threshold to help performance of instrumentation based
|
||||
// PGO before we actually hook up inliner with analysis passes such as BPI and
|
||||
// BFI.
|
||||
static cl::opt<int> ColdThreshold(
|
||||
"inlinecold-threshold", cl::Hidden, cl::init(225),
|
||||
cl::desc("Threshold for inlining functions with cold attribute"));
|
||||
|
||||
namespace {
|
||||
|
||||
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
||||
@ -122,6 +148,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
||||
/// inlined through this particular callsite.
|
||||
bool isKnownNonNullInCallee(Value *V);
|
||||
|
||||
/// Update Threshold based on callsite properties such as callee
|
||||
/// attributes and callee hotness for PGO builds. The Callee is explicitly
|
||||
/// passed to support analyzing indirect calls whose target is inferred by
|
||||
/// analysis.
|
||||
void updateThreshold(CallSite CS, Function &Callee);
|
||||
|
||||
// Custom analysis routines.
|
||||
bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
|
||||
|
||||
@ -541,6 +573,56 @@ bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
|
||||
// If -inline-threshold is not given, listen to the optsize attribute when it
|
||||
// would decrease the threshold.
|
||||
Function *Caller = CS.getCaller();
|
||||
|
||||
// FIXME: Use Function::optForSize()
|
||||
bool OptSize = Caller->hasFnAttribute(Attribute::OptimizeForSize);
|
||||
|
||||
if (!(DefaultInlineThreshold.getNumOccurrences() > 0) && OptSize &&
|
||||
OptSizeThreshold < Threshold)
|
||||
Threshold = OptSizeThreshold;
|
||||
|
||||
// If profile information is available, use that to adjust threshold of hot
|
||||
// and cold functions.
|
||||
// FIXME: The heuristic used below for determining hotness and coldness are
|
||||
// based on preliminary SPEC tuning and may not be optimal. Replace this with
|
||||
// a well-tuned heuristic based on *callsite* hotness and not callee hotness.
|
||||
uint64_t FunctionCount = 0, MaxFunctionCount = 0;
|
||||
bool HasPGOCounts = false;
|
||||
if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) {
|
||||
HasPGOCounts = true;
|
||||
FunctionCount = Callee.getEntryCount().getValue();
|
||||
MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue();
|
||||
}
|
||||
|
||||
// Listen to the inlinehint attribute or profile based hotness information
|
||||
// when it would increase the threshold and the caller does not need to
|
||||
// minimize its size.
|
||||
bool InlineHint =
|
||||
Callee.hasFnAttribute(Attribute::InlineHint) ||
|
||||
(HasPGOCounts &&
|
||||
FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
|
||||
if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize())
|
||||
Threshold = HintThreshold;
|
||||
|
||||
// Listen to the cold attribute or profile based coldness information
|
||||
// when it would decrease the threshold.
|
||||
bool ColdCallee =
|
||||
Callee.hasFnAttribute(Attribute::Cold) ||
|
||||
(HasPGOCounts &&
|
||||
FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
|
||||
// Command line argument for DefaultInlineThreshold will override the default
|
||||
// ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
|
||||
// do not use the default cold threshold even if it is smaller.
|
||||
if ((DefaultInlineThreshold.getNumOccurrences() == 0 ||
|
||||
ColdThreshold.getNumOccurrences() > 0) &&
|
||||
ColdCallee && ColdThreshold < Threshold)
|
||||
Threshold = ColdThreshold;
|
||||
}
|
||||
|
||||
bool CallAnalyzer::visitCmpInst(CmpInst &I) {
|
||||
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
|
||||
// First try to handle simplified comparisons.
|
||||
@ -1079,6 +1161,10 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
|
||||
// nice to base the bonus values on something more scientific.
|
||||
assert(NumInstructions == 0);
|
||||
assert(NumVectorInstructions == 0);
|
||||
|
||||
// Update the threshold based on callsite properties
|
||||
updateThreshold(CS, F);
|
||||
|
||||
FiftyPercentVectorBonus = 3 * Threshold / 2;
|
||||
TenPercentVectorBonus = 3 * Threshold / 4;
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
@ -1335,15 +1421,31 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
|
||||
AttributeFuncs::areInlineCompatible(*Caller, *Callee);
|
||||
}
|
||||
|
||||
InlineCost llvm::getInlineCost(CallSite CS, int Threshold,
|
||||
InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,
|
||||
TargetTransformInfo &CalleeTTI,
|
||||
AssumptionCacheTracker *ACT) {
|
||||
return getInlineCost(CS, CS.getCalledFunction(), Threshold, CalleeTTI, ACT);
|
||||
return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,
|
||||
ACT);
|
||||
}
|
||||
|
||||
InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int Threshold,
|
||||
int llvm::computeThresholdFromOptLevels(unsigned OptLevel,
|
||||
unsigned SizeOptLevel) {
|
||||
if (OptLevel > 2)
|
||||
return OptAggressiveThreshold;
|
||||
if (SizeOptLevel == 1) // -Os
|
||||
return OptSizeThreshold;
|
||||
if (SizeOptLevel == 2) // -Oz
|
||||
return OptMinSizeThreshold;
|
||||
return DefaultInlineThreshold;
|
||||
}
|
||||
|
||||
int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; }
|
||||
|
||||
InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
|
||||
int DefaultThreshold,
|
||||
TargetTransformInfo &CalleeTTI,
|
||||
AssumptionCacheTracker *ACT) {
|
||||
|
||||
// Cannot inline indirect calls.
|
||||
if (!Callee)
|
||||
return llvm::InlineCost::getNever();
|
||||
@ -1375,7 +1477,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int Threshold,
|
||||
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
|
||||
<< "...\n");
|
||||
|
||||
CallAnalyzer CA(CalleeTTI, ACT, *Callee, Threshold, CS);
|
||||
CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS);
|
||||
bool ShouldInline = CA.analyzeCall(CS);
|
||||
|
||||
DEBUG(CA.dump());
|
||||
|
@ -37,13 +37,11 @@ namespace {
|
||||
class AlwaysInliner : public Inliner {
|
||||
|
||||
public:
|
||||
// Use extremely low threshold.
|
||||
AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) {
|
||||
AlwaysInliner() : Inliner(ID, /*InsertLifetime*/ true) {
|
||||
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
AlwaysInliner(bool InsertLifetime)
|
||||
: Inliner(ID, -2000000000, InsertLifetime) {
|
||||
AlwaysInliner(bool InsertLifetime) : Inliner(ID, InsertLifetime) {
|
||||
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
|
@ -38,14 +38,19 @@ namespace {
|
||||
/// inliner pass and the always inliner pass. The two passes use different cost
|
||||
/// analyses to determine when to inline.
|
||||
class SimpleInliner : public Inliner {
|
||||
// This field is populated based on one of the following:
|
||||
// optimization or size optimization levels,
|
||||
// --inline-threshold flag,
|
||||
// user specified value.
|
||||
int DefaultThreshold;
|
||||
|
||||
public:
|
||||
SimpleInliner() : Inliner(ID) {
|
||||
SimpleInliner()
|
||||
: Inliner(ID), DefaultThreshold(llvm::getDefaultInlineThreshold()) {
|
||||
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
SimpleInliner(int Threshold)
|
||||
: Inliner(ID, Threshold, /*InsertLifetime*/ true) {
|
||||
SimpleInliner(int Threshold) : Inliner(ID), DefaultThreshold(Threshold) {
|
||||
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
@ -54,7 +59,7 @@ public:
|
||||
InlineCost getInlineCost(CallSite CS) override {
|
||||
Function *Callee = CS.getCalledFunction();
|
||||
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
|
||||
return llvm::getInlineCost(CS, getInlineThreshold(CS), TTI, ACT);
|
||||
return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT);
|
||||
}
|
||||
|
||||
bool runOnSCC(CallGraphSCC &SCC) override;
|
||||
@ -64,17 +69,6 @@ private:
|
||||
TargetTransformInfoWrapperPass *TTIWP;
|
||||
};
|
||||
|
||||
static int computeThresholdFromOptLevels(unsigned OptLevel,
|
||||
unsigned SizeOptLevel) {
|
||||
if (OptLevel > 2)
|
||||
return 275;
|
||||
if (SizeOptLevel == 1) // -Os
|
||||
return 75;
|
||||
if (SizeOptLevel == 2) // -Oz
|
||||
return 25;
|
||||
return 225;
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char SimpleInliner::ID = 0;
|
||||
@ -96,7 +90,7 @@ Pass *llvm::createFunctionInliningPass(int Threshold) {
|
||||
Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
|
||||
unsigned SizeOptLevel) {
|
||||
return new SimpleInliner(
|
||||
computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
|
||||
llvm::computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
|
||||
}
|
||||
|
||||
bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
|
||||
|
@ -47,33 +47,10 @@ STATISTIC(NumMergedAllocas, "Number of allocas merged together");
|
||||
// if those would be more profitable and blocked inline steps.
|
||||
STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
|
||||
|
||||
static cl::opt<int>
|
||||
InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
|
||||
cl::desc("Control the amount of inlining to perform (default = 225)"));
|
||||
Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {}
|
||||
|
||||
static cl::opt<int>
|
||||
HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
|
||||
cl::desc("Threshold for inlining functions with inline hint"));
|
||||
|
||||
// We instroduce this threshold to help performance of instrumentation based
|
||||
// PGO before we actually hook up inliner with analysis passes such as BPI and
|
||||
// BFI.
|
||||
static cl::opt<int>
|
||||
ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225),
|
||||
cl::desc("Threshold for inlining functions with cold attribute"));
|
||||
|
||||
// Threshold to use when optsize is specified (and there is no -inline-limit).
|
||||
const int OptSizeThreshold = 75;
|
||||
|
||||
Inliner::Inliner(char &ID)
|
||||
: CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {
|
||||
}
|
||||
|
||||
Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
|
||||
: CallGraphSCCPass(ID),
|
||||
InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit
|
||||
: Threshold),
|
||||
InsertLifetime(InsertLifetime) {}
|
||||
Inliner::Inliner(char &ID, bool InsertLifetime)
|
||||
: CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
|
||||
|
||||
/// For this class, we declare that we require and preserve the call graph.
|
||||
/// If the derived class implements this method, it should
|
||||
@ -243,67 +220,6 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned Inliner::getInlineThreshold(CallSite CS) const {
|
||||
int Threshold = InlineThreshold; // -inline-threshold or else selected by
|
||||
// overall opt level
|
||||
|
||||
// If -inline-threshold is not given, listen to the optsize attribute when it
|
||||
// would decrease the threshold.
|
||||
Function *Caller = CS.getCaller();
|
||||
bool OptSize = Caller && !Caller->isDeclaration() &&
|
||||
// FIXME: Use Function::optForSize().
|
||||
Caller->hasFnAttribute(Attribute::OptimizeForSize);
|
||||
if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
|
||||
OptSizeThreshold < Threshold)
|
||||
Threshold = OptSizeThreshold;
|
||||
|
||||
Function *Callee = CS.getCalledFunction();
|
||||
if (!Callee || Callee->isDeclaration())
|
||||
return Threshold;
|
||||
|
||||
// If profile information is available, use that to adjust threshold of hot
|
||||
// and cold functions.
|
||||
// FIXME: The heuristic used below for determining hotness and coldness are
|
||||
// based on preliminary SPEC tuning and may not be optimal. Replace this with
|
||||
// a well-tuned heuristic based on *callsite* hotness and not callee hotness.
|
||||
uint64_t FunctionCount = 0, MaxFunctionCount = 0;
|
||||
bool HasPGOCounts = false;
|
||||
if (Callee->getEntryCount() &&
|
||||
Callee->getParent()->getMaximumFunctionCount()) {
|
||||
HasPGOCounts = true;
|
||||
FunctionCount = Callee->getEntryCount().getValue();
|
||||
MaxFunctionCount =
|
||||
Callee->getParent()->getMaximumFunctionCount().getValue();
|
||||
}
|
||||
|
||||
// Listen to the inlinehint attribute or profile based hotness information
|
||||
// when it would increase the threshold and the caller does not need to
|
||||
// minimize its size.
|
||||
bool InlineHint =
|
||||
Callee->hasFnAttribute(Attribute::InlineHint) ||
|
||||
(HasPGOCounts &&
|
||||
FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
|
||||
if (InlineHint && HintThreshold > Threshold &&
|
||||
!Caller->hasFnAttribute(Attribute::MinSize))
|
||||
Threshold = HintThreshold;
|
||||
|
||||
// Listen to the cold attribute or profile based coldness information
|
||||
// when it would decrease the threshold.
|
||||
bool ColdCallee =
|
||||
Callee->hasFnAttribute(Attribute::Cold) ||
|
||||
(HasPGOCounts &&
|
||||
FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
|
||||
// Command line argument for InlineLimit will override the default
|
||||
// ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
|
||||
// do not use the default cold threshold even if it is smaller.
|
||||
if ((InlineLimit.getNumOccurrences() == 0 ||
|
||||
ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
|
||||
ColdThreshold < Threshold)
|
||||
Threshold = ColdThreshold;
|
||||
|
||||
return Threshold;
|
||||
}
|
||||
|
||||
static void emitAnalysis(CallSite CS, const Twine &Msg) {
|
||||
Function *Caller = CS.getCaller();
|
||||
LLVMContext &Ctx = Caller->getContext();
|
||||
|
Loading…
Reference in New Issue
Block a user