[HotColdSplitting] Allow outlining single-block cold regions

It can be profitable to outline single-block cold regions because they
may be large.

Allow outlining single-block regions if they have over some threshold of
non-debug, non-terminator instructions. I chose 3 as the threshold after
experimenting with several internal frameworks.

In practice, reducing the threshold further did not give much
improvement, whereas increasing it resulted in substantial regressions.

Differential Revision: https://reviews.llvm.org/D53824

llvm-svn: 345524
This commit is contained in:
Vedant Kumar 2018-10-29 19:15:39 +00:00
parent 2f628a1030
commit dd4be53b20
4 changed files with 90 additions and 54 deletions

View File

@ -31,6 +31,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
@ -65,6 +66,10 @@ using namespace llvm;
static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
cl::init(true), cl::Hidden);
static cl::opt<unsigned> MinOutliningInstCount(
"min-outlining-inst-count", cl::init(3), cl::Hidden,
cl::desc("Minimum number of instructions needed for a single-block region "
"to be an outlining candidate"));
namespace {
@ -130,6 +135,19 @@ static bool mayExtractBlock(const BasicBlock &BB) {
return !BB.hasAddressTaken();
}
/// Check whether \p BB has at least \p Min non-debug, non-terminator
/// instructions.
static bool hasMinimumInstCount(const BasicBlock &BB, unsigned Min) {
unsigned Count = 0;
for (const Instruction &I : BB) {
if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
continue;
if (++Count >= Min)
return true;
}
return false;
}
/// Identify the maximal region of cold blocks which includes \p SinkBB.
///
/// Include all blocks post-dominated by \p SinkBB, \p SinkBB itself, and all
@ -223,9 +241,8 @@ findMaximalColdRegion(BasicBlock &SinkBB, DominatorTree &DT, PostDomTree &PDT) {
++SuccIt;
}
// TODO: Consider outlining regions with just 1 block, but more than some
// threshold of instructions.
if (ColdRegion.size() == 1)
if (ColdRegion.size() == 1 &&
!hasMinimumInstCount(*ColdRegion[0], MinOutliningInstCount))
return {};
return ColdRegion;

View File

@ -6,7 +6,7 @@
; The cold region is too small to split.
; CHECK-LABEL: @foo
; CHECK-NOT: codeRepl
; CHECK-NOT: foo.cold.1
define void @foo() {
entry:
br i1 undef, label %if.then, label %if.end
@ -15,21 +15,28 @@ if.then: ; preds = %entry
unreachable
if.end: ; preds = %entry
br label %if.then12
ret void
}
if.then12: ; preds = %if.end
br label %cleanup40
; The cold region is still too small to split.
; CHECK-LABEL: @bar
; CHECK-NOT: bar.cold.1
define void @bar() {
entry:
br i1 undef, label %if.then, label %if.end
cleanup40: ; preds = %if.then12
br label %return
if.then: ; preds = %entry
call void @sink()
call void @sink()
ret void
return: ; preds = %cleanup40
if.end: ; preds = %entry
ret void
}
; Make sure we don't try to outline the entire function.
; CHECK-LABEL: @fun
; CHECK-NOT: codeRepl
; CHECK-NOT: fun.cold.1
define void @fun() {
entry:
br i1 undef, label %if.then, label %if.end
@ -43,14 +50,53 @@ if.end: ; preds = %entry
; Don't outline infinite loops.
; CHECK-LABEL: @infinite_loop
; CHECK-NOT: codeRepl
; CHECK-NOT: infinite_loop.cold.1
define void @infinite_loop() {
entry:
br label %loop
loop:
call void @sink()
call void @sink()
call void @sink()
br label %loop
}
; Don't count debug intrinsics towards the outlining threshold.
; CHECK-LABEL: @dont_count_debug_intrinsics
; CHECK-NOT: dont_count_debug_intrinsics.cold.1
define void @dont_count_debug_intrinsics(i32 %arg1) !dbg !6 {
entry:
%var = add i32 0, 0, !dbg !11
br i1 undef, label %if.then, label %if.end
if.then: ; preds = %entry
ret void
if.end: ; preds = %entry
call void @llvm.dbg.value(metadata i32 %arg1, metadata !9, metadata !DIExpression()), !dbg !11
call void @llvm.dbg.value(metadata i32 %arg1, metadata !9, metadata !DIExpression()), !dbg !11
call void @sink()
ret void
}
declare void @llvm.dbg.value(metadata, metadata, metadata)
declare void @sink() cold
!llvm.dbg.cu = !{!0}
!llvm.debugify = !{!3, !4}
!llvm.module.flags = !{!5}
!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "<stdin>", directory: "/")
!2 = !{}
!3 = !{i32 7}
!4 = !{i32 1}
!5 = !{i32 2, !"Debug Info Version", i32 3}
!6 = distinct !DISubprogram(name: "dont_count_debug_intrinsics", linkageName: "dont_count_debug_intrinsics", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !8)
!7 = !DISubroutineType(types: !2)
!8 = !{!9}
!9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10)
!10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned)
!11 = !DILocation(line: 1, column: 1, scope: !6)

View File

@ -1,8 +1,7 @@
; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
; CHECK-LABEL: @fun
; CHECK: codeRepl:
; CHECK-NEXT: call void @fun.cold.1
; CHECK: call void @fun.cold.1
define void @fun() {
entry:
@ -12,21 +11,13 @@ if.then:
ret void
if.else:
br label %if.then4
if.then4:
br i1 undef, label %if.then5, label %if.end
if.then5:
br label %cleanup
if.end:
br label %cleanup
cleanup:
%cleanup.dest.slot.0 = phi i32 [ 1, %if.then5 ], [ 0, %if.end ]
unreachable
call void @sink()
call void @sink()
call void @sink()
ret void
}
declare void @sink() cold
; CHECK: define {{.*}} @fun.cold.1{{.*}}#[[outlined_func_attr:[0-9]+]]
; CHECK: attributes #[[outlined_func_attr]] = { {{.*}}minsize

View File

@ -6,33 +6,23 @@
define void @foo(i32 %arg1) !dbg !6 {
entry:
%var = add i32 0, 0, !dbg !11
br i1 undef, label %if.then, label %if.end, !dbg !12
br i1 undef, label %if.then, label %if.end
if.then: ; preds = %entry
ret void, !dbg !13
ret void
if.end: ; preds = %entry
call void @llvm.dbg.value(metadata i32 %arg1, metadata !9, metadata !DIExpression()), !dbg !11
br label %if.then12, !dbg !14
if.then12: ; preds = %if.end
br label %cleanup40, !dbg !15
cleanup40: ; preds = %if.then12
br i1 undef, label %if.then5, label %if.end1, !dbg !16
if.then5:
br label %return, !dbg !17
if.end1:
br label %return, !dbg !18
return: ; preds = %cleanup40
unreachable, !dbg !19
call void @sink()
call void @sink()
call void @sink()
ret void
}
declare void @llvm.dbg.value(metadata, metadata, metadata)
declare void @sink() cold
!llvm.dbg.cu = !{!0}
!llvm.debugify = !{!3, !4}
!llvm.module.flags = !{!5}
@ -49,11 +39,3 @@ declare void @llvm.dbg.value(metadata, metadata, metadata)
!9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10)
!10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned)
!11 = !DILocation(line: 1, column: 1, scope: !6)
!12 = !DILocation(line: 2, column: 1, scope: !6)
!13 = !DILocation(line: 3, column: 1, scope: !6)
!14 = !DILocation(line: 4, column: 1, scope: !6)
!15 = !DILocation(line: 5, column: 1, scope: !6)
!16 = !DILocation(line: 6, column: 1, scope: !6)
!17 = !DILocation(line: 7, column: 1, scope: !6)
!18 = !DILocation(line: 8, column: 1, scope: !6)
!19 = !DILocation(line: 9, column: 1, scope: !6)