diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index 4f371a494e99..ce8a5060a3a1 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -31,6 +31,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -65,6 +66,10 @@ using namespace llvm; static cl::opt EnableStaticAnalyis("hot-cold-static-analysis", cl::init(true), cl::Hidden); +static cl::opt MinOutliningInstCount( + "min-outlining-inst-count", cl::init(3), cl::Hidden, + cl::desc("Minimum number of instructions needed for a single-block region " + "to be an outlining candidate")); namespace { @@ -130,6 +135,19 @@ static bool mayExtractBlock(const BasicBlock &BB) { return !BB.hasAddressTaken(); } +/// Check whether \p BB has at least \p Min non-debug, non-terminator +/// instructions. +static bool hasMinimumInstCount(const BasicBlock &BB, unsigned Min) { + unsigned Count = 0; + for (const Instruction &I : BB) { + if (isa(&I) || &I == BB.getTerminator()) + continue; + if (++Count >= Min) + return true; + } + return false; +} + /// Identify the maximal region of cold blocks which includes \p SinkBB. /// /// Include all blocks post-dominated by \p SinkBB, \p SinkBB itself, and all @@ -223,9 +241,8 @@ findMaximalColdRegion(BasicBlock &SinkBB, DominatorTree &DT, PostDomTree &PDT) { ++SuccIt; } - // TODO: Consider outlining regions with just 1 block, but more than some - // threshold of instructions. - if (ColdRegion.size() == 1) + if (ColdRegion.size() == 1 && + !hasMinimumInstCount(*ColdRegion[0], MinOutliningInstCount)) return {}; return ColdRegion; diff --git a/llvm/test/Transforms/HotColdSplit/do-not-split.ll b/llvm/test/Transforms/HotColdSplit/do-not-split.ll index 1f6265819193..213681383ea1 100644 --- a/llvm/test/Transforms/HotColdSplit/do-not-split.ll +++ b/llvm/test/Transforms/HotColdSplit/do-not-split.ll @@ -6,7 +6,7 @@ ; The cold region is too small to split. ; CHECK-LABEL: @foo -; CHECK-NOT: codeRepl +; CHECK-NOT: foo.cold.1 define void @foo() { entry: br i1 undef, label %if.then, label %if.end @@ -15,21 +15,28 @@ if.then: ; preds = %entry unreachable if.end: ; preds = %entry - br label %if.then12 + ret void +} -if.then12: ; preds = %if.end - br label %cleanup40 +; The cold region is still too small to split. +; CHECK-LABEL: @bar +; CHECK-NOT: bar.cold.1 +define void @bar() { +entry: + br i1 undef, label %if.then, label %if.end -cleanup40: ; preds = %if.then12 - br label %return +if.then: ; preds = %entry + call void @sink() + call void @sink() + ret void -return: ; preds = %cleanup40 +if.end: ; preds = %entry ret void } ; Make sure we don't try to outline the entire function. ; CHECK-LABEL: @fun -; CHECK-NOT: codeRepl +; CHECK-NOT: fun.cold.1 define void @fun() { entry: br i1 undef, label %if.then, label %if.end @@ -43,14 +50,53 @@ if.end: ; preds = %entry ; Don't outline infinite loops. ; CHECK-LABEL: @infinite_loop -; CHECK-NOT: codeRepl +; CHECK-NOT: infinite_loop.cold.1 define void @infinite_loop() { entry: br label %loop loop: + call void @sink() + call void @sink() call void @sink() br label %loop } +; Don't count debug intrinsics towards the outlining threshold. +; CHECK-LABEL: @dont_count_debug_intrinsics +; CHECK-NOT: dont_count_debug_intrinsics.cold.1 +define void @dont_count_debug_intrinsics(i32 %arg1) !dbg !6 { +entry: + %var = add i32 0, 0, !dbg !11 + br i1 undef, label %if.then, label %if.end + +if.then: ; preds = %entry + ret void + +if.end: ; preds = %entry + call void @llvm.dbg.value(metadata i32 %arg1, metadata !9, metadata !DIExpression()), !dbg !11 + call void @llvm.dbg.value(metadata i32 %arg1, metadata !9, metadata !DIExpression()), !dbg !11 + call void @sink() + ret void +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + declare void @sink() cold + +!llvm.dbg.cu = !{!0} +!llvm.debugify = !{!3, !4} +!llvm.module.flags = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "", directory: "/") +!2 = !{} +!3 = !{i32 7} +!4 = !{i32 1} +!5 = !{i32 2, !"Debug Info Version", i32 3} +!6 = distinct !DISubprogram(name: "dont_count_debug_intrinsics", linkageName: "dont_count_debug_intrinsics", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !8) +!7 = !DISubroutineType(types: !2) +!8 = !{!9} +!9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10) +!10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned) +!11 = !DILocation(line: 1, column: 1, scope: !6) diff --git a/llvm/test/Transforms/HotColdSplit/minsize.ll b/llvm/test/Transforms/HotColdSplit/minsize.ll index 4865fb6d0246..eb42ad14af20 100644 --- a/llvm/test/Transforms/HotColdSplit/minsize.ll +++ b/llvm/test/Transforms/HotColdSplit/minsize.ll @@ -1,8 +1,7 @@ ; RUN: opt -hotcoldsplit -S < %s | FileCheck %s ; CHECK-LABEL: @fun -; CHECK: codeRepl: -; CHECK-NEXT: call void @fun.cold.1 +; CHECK: call void @fun.cold.1 define void @fun() { entry: @@ -12,21 +11,13 @@ if.then: ret void if.else: - br label %if.then4 - -if.then4: - br i1 undef, label %if.then5, label %if.end - -if.then5: - br label %cleanup - -if.end: - br label %cleanup - -cleanup: - %cleanup.dest.slot.0 = phi i32 [ 1, %if.then5 ], [ 0, %if.end ] - unreachable + call void @sink() + call void @sink() + call void @sink() + ret void } +declare void @sink() cold + ; CHECK: define {{.*}} @fun.cold.1{{.*}}#[[outlined_func_attr:[0-9]+]] ; CHECK: attributes #[[outlined_func_attr]] = { {{.*}}minsize diff --git a/llvm/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll b/llvm/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll index 459ee6712bcb..b77201fe0d38 100644 --- a/llvm/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll +++ b/llvm/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll @@ -6,33 +6,23 @@ define void @foo(i32 %arg1) !dbg !6 { entry: %var = add i32 0, 0, !dbg !11 - br i1 undef, label %if.then, label %if.end, !dbg !12 + br i1 undef, label %if.then, label %if.end if.then: ; preds = %entry - ret void, !dbg !13 + ret void if.end: ; preds = %entry call void @llvm.dbg.value(metadata i32 %arg1, metadata !9, metadata !DIExpression()), !dbg !11 - br label %if.then12, !dbg !14 - -if.then12: ; preds = %if.end - br label %cleanup40, !dbg !15 - -cleanup40: ; preds = %if.then12 - br i1 undef, label %if.then5, label %if.end1, !dbg !16 - -if.then5: - br label %return, !dbg !17 - -if.end1: - br label %return, !dbg !18 - -return: ; preds = %cleanup40 - unreachable, !dbg !19 + call void @sink() + call void @sink() + call void @sink() + ret void } declare void @llvm.dbg.value(metadata, metadata, metadata) +declare void @sink() cold + !llvm.dbg.cu = !{!0} !llvm.debugify = !{!3, !4} !llvm.module.flags = !{!5} @@ -49,11 +39,3 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10) !10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned) !11 = !DILocation(line: 1, column: 1, scope: !6) -!12 = !DILocation(line: 2, column: 1, scope: !6) -!13 = !DILocation(line: 3, column: 1, scope: !6) -!14 = !DILocation(line: 4, column: 1, scope: !6) -!15 = !DILocation(line: 5, column: 1, scope: !6) -!16 = !DILocation(line: 6, column: 1, scope: !6) -!17 = !DILocation(line: 7, column: 1, scope: !6) -!18 = !DILocation(line: 8, column: 1, scope: !6) -!19 = !DILocation(line: 9, column: 1, scope: !6)