[Inliner,OptDiag] Add hotness attribute to opt diagnostics

Summary:
The inliner not being a function pass requires the work-around of
generating the OptimizationRemarkEmitter and in turn BFI on demand.
This will go away after the new PM is ready.

BFI is only computed inside ORE if the user has requested hotness
information for optimization diagnostitics (-pass-remark-with-hotness at
the 'opt' level).  Thus there is no additional overhead without the
flag.

Reviewers: hfinkel, davidxl, eraman

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D22694

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@278185 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Adam Nemet 2016-08-10 00:44:44 +00:00
parent 1e3f45d363
commit 37a4ac8678
5 changed files with 121 additions and 36 deletions

View File

@ -16,11 +16,11 @@
#define LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H #define LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H
#include "llvm/ADT/Optional.h" #include "llvm/ADT/Optional.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/IR/PassManager.h" #include "llvm/IR/PassManager.h"
#include "llvm/Pass.h" #include "llvm/Pass.h"
namespace llvm { namespace llvm {
class BlockFrequencyInfo;
class DebugLoc; class DebugLoc;
class Function; class Function;
class LLVMContext; class LLVMContext;
@ -34,6 +34,19 @@ public:
OptimizationRemarkEmitter(Function *F, BlockFrequencyInfo *BFI) OptimizationRemarkEmitter(Function *F, BlockFrequencyInfo *BFI)
: F(F), BFI(BFI) {} : F(F), BFI(BFI) {}
/// \brief This variant can be used to generate ORE on demand (without the
/// analysis pass).
///
/// Note that this ctor has a very different cost depending on whether
/// F->getContext().getDiagnosticHotnessRequested() is on or not. If it's off
/// the operation is free.
///
/// Whereas if DiagnosticHotnessRequested is on, it is fairly expensive
/// operation since BFI and all its required analyses are computed. This is
/// for example useful for CGSCC passes that can't use function analyses
/// passes in the old PM.
OptimizationRemarkEmitter(Function *F);
OptimizationRemarkEmitter(OptimizationRemarkEmitter &&Arg) OptimizationRemarkEmitter(OptimizationRemarkEmitter &&Arg)
: F(Arg.F), BFI(Arg.BFI) {} : F(Arg.F), BFI(Arg.BFI) {}
@ -149,6 +162,9 @@ private:
BlockFrequencyInfo *BFI; BlockFrequencyInfo *BFI;
/// If we generate BFI on demand, we need to free it when ORE is freed.
std::unique_ptr<BlockFrequencyInfo> OwnedBFI;
Optional<uint64_t> computeHotness(const Value *V); Optional<uint64_t> computeHotness(const Value *V);
OptimizationRemarkEmitter(const OptimizationRemarkEmitter &) = delete; OptimizationRemarkEmitter(const OptimizationRemarkEmitter &) = delete;

View File

@ -27,6 +27,7 @@ class AssumptionCacheTracker;
class CallSite; class CallSite;
class DataLayout; class DataLayout;
class InlineCost; class InlineCost;
class OptimizationRemarkEmitter;
class ProfileSummaryInfo; class ProfileSummaryInfo;
template <class PtrType, unsigned SmallSize> class SmallPtrSet; template <class PtrType, unsigned SmallSize> class SmallPtrSet;

View File

@ -13,13 +13,37 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/LLVMContext.h" #include "llvm/IR/LLVMContext.h"
using namespace llvm; using namespace llvm;
OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F)
: F(F), BFI(nullptr) {
if (!F->getContext().getDiagnosticHotnessRequested())
return;
// First create a dominator tree.
DominatorTree DT;
DT.recalculate(*F);
// Generate LoopInfo from it.
LoopInfo LI;
LI.analyze(DT);
// Then compute BranchProbabilityInfo.
BranchProbabilityInfo BPI;
BPI.calculate(*F, LI);
// Finally compute BFI.
OwnedBFI = llvm::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
BFI = OwnedBFI.get();
}
Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) { Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) {
if (!BFI) if (!BFI)
return None; return None;

View File

@ -20,6 +20,7 @@
#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h" #include "llvm/IR/CallSite.h"
@ -237,11 +238,9 @@ static bool InlineCallIfPossible(
return true; return true;
} }
static void emitAnalysis(CallSite CS, const Twine &Msg) { static void emitAnalysis(CallSite CS, OptimizationRemarkEmitter &ORE,
Function *Caller = CS.getCaller(); const Twine &Msg) {
LLVMContext &Ctx = Caller->getContext(); ORE.emitOptimizationRemarkAnalysis(DEBUG_TYPE, CS.getInstruction(), Msg);
DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
} }
/// Return true if inlining of CS can block the caller from being /// Return true if inlining of CS can block the caller from being
@ -323,13 +322,14 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
/// Return true if the inliner should attempt to inline at the given CallSite. /// Return true if the inliner should attempt to inline at the given CallSite.
static bool shouldInline(CallSite CS, static bool shouldInline(CallSite CS,
function_ref<InlineCost(CallSite CS)> GetInlineCost) { function_ref<InlineCost(CallSite CS)> GetInlineCost,
OptimizationRemarkEmitter &ORE) {
InlineCost IC = GetInlineCost(CS); InlineCost IC = GetInlineCost(CS);
if (IC.isAlways()) { if (IC.isAlways()) {
DEBUG(dbgs() << " Inlining: cost=always" DEBUG(dbgs() << " Inlining: cost=always"
<< ", Call: " << *CS.getInstruction() << "\n"); << ", Call: " << *CS.getInstruction() << "\n");
emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) + emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName()) +
" should always be inlined (cost=always)"); " should always be inlined (cost=always)");
return true; return true;
} }
@ -337,7 +337,7 @@ static bool shouldInline(CallSite CS,
if (IC.isNever()) { if (IC.isNever()) {
DEBUG(dbgs() << " NOT Inlining: cost=never" DEBUG(dbgs() << " NOT Inlining: cost=never"
<< ", Call: " << *CS.getInstruction() << "\n"); << ", Call: " << *CS.getInstruction() << "\n");
emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName() +
" should never be inlined (cost=never)")); " should never be inlined (cost=never)"));
return false; return false;
} }
@ -347,7 +347,7 @@ static bool shouldInline(CallSite CS,
DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
<< ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << "\n"); << ", Call: " << *CS.getInstruction() << "\n");
emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName() +
" too costly to inline (cost=") + " too costly to inline (cost=") +
Twine(IC.getCost()) + ", threshold=" + Twine(IC.getCost()) + ", threshold=" +
Twine(IC.getCostDelta() + IC.getCost()) + ")"); Twine(IC.getCostDelta() + IC.getCost()) + ")");
@ -359,7 +359,8 @@ static bool shouldInline(CallSite CS,
DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
<< " Cost = " << IC.getCost() << " Cost = " << IC.getCost()
<< ", outer Cost = " << TotalSecondaryCost << '\n'); << ", outer Cost = " << TotalSecondaryCost << '\n');
emitAnalysis(CS, Twine("Not inlining. Cost of inlining " + emitAnalysis(CS, ORE,
Twine("Not inlining. Cost of inlining " +
CS.getCalledFunction()->getName() + CS.getCalledFunction()->getName() +
" increases the cost of inlining " + " increases the cost of inlining " +
CS.getCaller()->getName() + " in other contexts")); CS.getCaller()->getName() + " in other contexts"));
@ -369,10 +370,11 @@ static bool shouldInline(CallSite CS,
DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() DEBUG(dbgs() << " Inlining: cost=" << IC.getCost()
<< ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << '\n'); << ", Call: " << *CS.getInstruction() << '\n');
emitAnalysis( emitAnalysis(CS, ORE, CS.getCalledFunction()->getName() +
CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") + Twine(" can be inlined into ") +
CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) + CS.getCaller()->getName() + " with cost=" +
" (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")"); Twine(IC.getCost()) + " (threshold=" +
Twine(IC.getCostDelta() + IC.getCost()) + ")");
return true; return true;
} }
@ -513,15 +515,18 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
continue; continue;
LLVMContext &CallerCtx = Caller->getContext();
// Get DebugLoc to report. CS will be invalid after Inliner. // Get DebugLoc to report. CS will be invalid after Inliner.
DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
BasicBlock *Block = CS.getParent();
// FIXME for new PM: because of the old PM we currently generate ORE and
// in turn BFI on demand. With the new PM, the ORE dependency should
// just become a regular analysis dependency.
OptimizationRemarkEmitter ORE(Caller);
// If the policy determines that we should inline this function, // If the policy determines that we should inline this function,
// try to do so. // try to do so.
if (!shouldInline(CS, GetInlineCost)) { if (!shouldInline(CS, GetInlineCost, ORE)) {
emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, ORE.emitOptimizationRemarkMissed(DEBUG_TYPE, DLoc, Block,
Twine(Callee->getName() + Twine(Callee->getName() +
" will not be inlined into " + " will not be inlined into " +
Caller->getName())); Caller->getName()));
@ -532,7 +537,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
InlineHistoryID, InsertLifetime, AARGetter, InlineHistoryID, InsertLifetime, AARGetter,
ImportedFunctionsStats)) { ImportedFunctionsStats)) {
emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, ORE.emitOptimizationRemarkMissed(DEBUG_TYPE, DLoc, Block,
Twine(Callee->getName() + Twine(Callee->getName() +
" will not be inlined into " + " will not be inlined into " +
Caller->getName())); Caller->getName()));
@ -541,8 +546,8 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
++NumInlined; ++NumInlined;
// Report the inline decision. // Report the inline decision.
emitOptimizationRemark( ORE.emitOptimizationRemark(
CallerCtx, DEBUG_TYPE, *Caller, DLoc, DEBUG_TYPE, DLoc, Block,
Twine(Callee->getName() + " inlined into " + Caller->getName())); Twine(Callee->getName() + " inlined into " + Caller->getName()));
// If inlining this function gave us any new call sites, throw them // If inlining this function gave us any new call sites, throw them

View File

@ -0,0 +1,39 @@
; RUN: opt < %s -inline -pass-remarks=inline -pass-remarks-missed=inline \
; RUN: -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 \
; RUN: | FileCheck %s
; CHECK: foo should always be inlined (cost=always) (hotness: 30)
; CHECK: foo inlined into bar (hotness: 30)
; CHECK: foz should never be inlined (cost=never) (hotness: 30)
; CHECK: foz will not be inlined into bar (hotness: 30)
; Function Attrs: alwaysinline nounwind uwtable
define i32 @foo() #0 !prof !1 {
entry:
ret i32 4
}
; Function Attrs: noinline nounwind uwtable
define i32 @foz() #1 !prof !2 {
entry:
ret i32 2
}
; Function Attrs: nounwind uwtable
define i32 @bar() !prof !3 {
entry:
%call = call i32 @foo()
%call2 = call i32 @foz()
%mul = mul i32 %call, %call2
ret i32 %mul
}
attributes #0 = { alwaysinline }
attributes #1 = { noinline }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.5.0 "}
!1 = !{!"function_entry_count", i64 10}
!2 = !{!"function_entry_count", i64 20}
!3 = !{!"function_entry_count", i64 30}