From 118cef0ca37ecc9b6034b49341f0587290ab2fa2 Mon Sep 17 00:00:00 2001 From: Xinliang David Li Date: Fri, 2 Jun 2017 22:08:04 +0000 Subject: [PATCH] [PartialInlining] Minor cost anaysis tuning Also added a test option and 2 cost analysis related tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304599 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/PartialInlining.cpp | 65 +++++++++++++++++++--- test/Transforms/CodeExtractor/cost.ll | 64 +++++++++++++++++++++ test/Transforms/CodeExtractor/cost_meta.ll | 41 ++++++++++++++ 3 files changed, 161 insertions(+), 9 deletions(-) create mode 100644 test/Transforms/CodeExtractor/cost.ll create mode 100644 test/Transforms/CodeExtractor/cost_meta.ll diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index bc0967448cd..eee250c4d54 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -68,6 +68,10 @@ static cl::opt cl::desc("Relative frequency of outline region to " "the entry block")); +static cl::opt ExtraOutliningPenalty( + "partial-inlining-extra-penalty", cl::init(0), cl::Hidden, + cl::desc("A debug option to add additional penalty to the computed one.")); + namespace { struct FunctionOutliningInfo { @@ -83,7 +87,7 @@ struct FunctionOutliningInfo { SmallVector Entries; // The return block that is not included in the outlined region. BasicBlock *ReturnBlock; - // The dominating block of the region ot be outlined. + // The dominating block of the region to be outlined. BasicBlock *NonReturnBlock; // The set of blocks in Entries that that are predecessors to ReturnBlock SmallVector ReturnBlockPreds; @@ -407,11 +411,23 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq( if (hasProfileData(F, OI)) return OutlineRegionRelFreq; - // When profile data is not available, we need to be very - // conservative in estimating the overall savings. We need to make sure - // the outline region relative frequency is not below the threshold - // specified by the option. - OutlineRegionRelFreq = std::max(OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100)); + // When profile data is not available, we need to be conservative in + // estimating the overall savings. Static branch prediction can usually + // guess the branch direction right (taken/non-taken), but the guessed + // branch probability is usually not biased enough. In case when the + // outlined region is predicted to be likely, its probability needs + // to be made higher (more biased) to not under-estimate the cost of + // function outlining. On the other hand, if the outlined region + // is predicted to be less likely, the predicted probablity is usually + // higher than the actual. For instance, the actual probability of the + // less likely target is only 5%, but the guessed probablity can be + // 40%. In the latter case, there is no need for further adjustement. + // FIXME: add an option for this. + if (OutlineRegionRelFreq < BranchProbability(45, 100)) + return OutlineRegionRelFreq; + + OutlineRegionRelFreq = std::max( + OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100)); return OutlineRegionRelFreq; } @@ -496,6 +512,26 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { if (isa(I)) continue; + switch (I->getOpcode()) { + case Instruction::BitCast: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::Alloca: + continue; + case Instruction::GetElementPtr: + if (cast(I)->hasAllZeroIndices()) + continue; + default: + break; + } + + IntrinsicInst *IntrInst = dyn_cast(I); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || + IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) + continue; + } + if (CallInst *CI = dyn_cast(I)) { InlineCost += getCallsiteCost(CallSite(CI), DL); continue; @@ -519,7 +555,13 @@ std::tuple PartialInlinerImpl::computeOutliningCosts( Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction, BasicBlock *OutliningCallBB) { // First compute the cost of the outlined region 'OI' in the original - // function 'F': + // function 'F'. + // FIXME: The code extractor (outliner) can now do code sinking/hoisting + // to reduce outlining cost. The hoisted/sunk code currently do not + // incur any runtime cost so it is still OK to compare the outlined + // function cost with the outlined region in the original function. + // If this ever changes, we will need to introduce new extractor api + // to pass the information. int OutlinedRegionCost = 0; for (BasicBlock &BB : *F) { if (&BB != OI->ReturnBlock && @@ -539,11 +581,16 @@ std::tuple PartialInlinerImpl::computeOutliningCosts( for (BasicBlock &BB : *OutlinedFunction) { OutlinedFunctionCost += computeBBInlineCost(&BB); } + // The code extractor introduces a new root and exit stub blocks with + // additional unconditional branches. Those branches will be eliminated + // later with bb layout. The cost should be adjusted accordingly: + OutlinedFunctionCost -= 2 * InlineConstants::InstrCost; assert(OutlinedFunctionCost >= OutlinedRegionCost && "Outlined function cost should be no less than the outlined region"); - int OutliningRuntimeOverhead = - OutliningFuncCallCost + (OutlinedFunctionCost - OutlinedRegionCost); + int OutliningRuntimeOverhead = OutliningFuncCallCost + + (OutlinedFunctionCost - OutlinedRegionCost) + + ExtraOutliningPenalty; return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead, OutlinedRegionCost); diff --git a/test/Transforms/CodeExtractor/cost.ll b/test/Transforms/CodeExtractor/cost.ll new file mode 100644 index 00000000000..4ac5acee019 --- /dev/null +++ b/test/Transforms/CodeExtractor/cost.ll @@ -0,0 +1,64 @@ +; RUN: opt -S < %s -partial-inliner -partial-inlining-extra-penalty=10 | FileCheck %s +; RUN: opt -S < %s -passes=partial-inliner -partial-inlining-extra-penalty=10 | FileCheck %s +define i32 @outline_region_notlikely(i32* %arg) local_unnamed_addr { +bb: +; ptr != null is predicted to be true + %tmp = icmp ne i32* %arg, null + br i1 %tmp, label %bb8, label %bb1 + +; bb1 is not likely +bb1: ; preds = %bb + %tmp2 = tail call i32 @foo(i32* nonnull %arg) + %tmp3 = tail call i32 @foo(i32* nonnull %arg) + %tmp4 = tail call i32 @foo(i32* nonnull %arg) + %tmp5 = tail call i32 @foo(i32* nonnull %arg) + %tmp6 = tail call i32 @foo(i32* nonnull %arg) + %tmp7 = tail call i32 @foo(i32* nonnull %arg) + br label %bb8 + +bb8: ; preds = %bb1, %bb + %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ] + ret i32 %tmp9 +} + +define i32 @outline_region_likely(i32* %arg) local_unnamed_addr { +bb: +; ptr == null is predicted to be false + %tmp = icmp eq i32* %arg, null + br i1 %tmp, label %bb8, label %bb1 + +; bb1 is likely +bb1: ; preds = %bb + %tmp2 = tail call i32 @foo(i32* nonnull %arg) + %tmp3 = tail call i32 @foo(i32* nonnull %arg) + %tmp4 = tail call i32 @foo(i32* nonnull %arg) + %tmp5 = tail call i32 @foo(i32* nonnull %arg) + %tmp6 = tail call i32 @foo(i32* nonnull %arg) + %tmp7 = tail call i32 @foo(i32* nonnull %arg) + br label %bb8 + +bb8: ; preds = %bb1, %bb + %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ] + ret i32 %tmp9 +} + +declare i32 @foo(i32* %arg) + +define i32 @dummy_caller(i32* %arg) local_unnamed_addr { +; CHECK-LABEL: @dummy_caller + %tmp = call i32 @outline_region_notlikely(i32* %arg) +; CHECK: call void @outline_region_notlikely.2_bb1 + %tmp2 = tail call i32 @outline_region_likely(i32* %arg) +; CHECK: %tmp2 = tail call i32 @outline_region_likely(i32* %arg) + ret i32 %tmp + +} + +; CHECK-LABEL: define internal void @outline_region_notlikely.2_bb1(i32* %arg) { +; CHECK-NEXT: newFuncRoot: + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 304489)"} diff --git a/test/Transforms/CodeExtractor/cost_meta.ll b/test/Transforms/CodeExtractor/cost_meta.ll new file mode 100644 index 00000000000..2e4467a8d0c --- /dev/null +++ b/test/Transforms/CodeExtractor/cost_meta.ll @@ -0,0 +1,41 @@ +; RUN: opt -S < %s -partial-inliner -partial-inlining-extra-penalty=2000 | FileCheck %s +; RUN: opt -S < %s -passes=partial-inliner -partial-inlining-extra-penalty=2000 | FileCheck %s +define i32 @outline_region_notlikely(i32* %arg) local_unnamed_addr { +bb: +; ptr != null is predicted to be true + %tmp = icmp ne i32* %arg, null + br i1 %tmp, label %bb8, label %bb1, !prof !2 + +; bb1 is not likely +bb1: ; preds = %bb + %tmp2 = tail call i32 @foo(i32* nonnull %arg) + %tmp3 = tail call i32 @foo(i32* nonnull %arg) + %tmp4 = tail call i32 @foo(i32* nonnull %arg) + %tmp5 = tail call i32 @foo(i32* nonnull %arg) + %tmp6 = tail call i32 @foo(i32* nonnull %arg) + %tmp7 = tail call i32 @foo(i32* nonnull %arg) + br label %bb8 + +bb8: ; preds = %bb1, %bb + %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ] + ret i32 %tmp9 +} + +define i32 @dummy_caller(i32* %arg) local_unnamed_addr { +; CHECK-LABEL: @dummy_caller + %tmp = call i32 @outline_region_notlikely(i32* %arg) + ret i32 %tmp + } + + +; CHECK-LABEL: define internal void @outline_region_notlikely.1_bb1(i32* %arg) { +; CHECK-NEXT: newFuncRoot: + +declare i32 @foo(i32 * %arg) + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 304489)"} +!2 = !{!"branch_weights", i32 2000, i32 1}