From 8af8091ef5a6cd3f79ab73d47ee07d91c9a5437f Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 14 Jan 2015 20:19:29 +0000 Subject: [PATCH] [MBP] Add flags to disable the BadCFGConflict check in MachineBlockPlacement. Some benchmarks have shown that this could lead to a potential performance benefit, and so adding some flags to try to help measure the difference. A possible explanation. In diamond-shaped CFGs (A followed by either B or C both followed by D), putting B and C both in between A and D leads to the code being less dense than it could be. Always either B or C have to be skipped increasing the chance of cache misses etc. Moving either B or C to after D might be beneficial on average. In the long run, but we should probably do a better job of analyzing the basic block and branch probabilities to move the correct one of B or C to after D. But even if we don't use this in the long run, it is a good baseline for benchmarking. Original patch authored by Daniel Jasper with test tweaks and a second flag added by me. Differential Revision: http://reviews.llvm.org/D6969 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226034 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 57 +++++---- .../X86/code_placement_bad_cfg_check.ll | 112 ++++++++++++++++++ 2 files changed, 148 insertions(+), 21 deletions(-) create mode 100644 test/CodeGen/X86/code_placement_bad_cfg_check.ll diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index aaa7d915697..779b84e99b8 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -60,6 +60,17 @@ static cl::opt AlignAllBlock("align-all-blocks", "blocks in the function."), cl::init(0), cl::Hidden); +static cl::opt OnlyHotBadCFGConflictCheck( + "only-hot-bad-cfg-conflict-check", + cl::desc("Only check that a hot successor doesn't have a hot predecessor."), + cl::init(false), cl::Hidden); + +static cl::opt NoBadCFGConflictCheck( + "no-bad-cfg-conflict-check", + cl::desc("Don't check whether a hot successor has a more important " + "predecessor."), + cl::init(false), cl::Hidden); + // FIXME: Find a good default for this flag and remove the flag. static cl::opt ExitBlockBias("block-placement-exit-block-bias", @@ -374,28 +385,32 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( continue; } - // Make sure that a hot successor doesn't have a globally more important - // predecessor. - BlockFrequency CandidateEdgeFreq - = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); - bool BadCFGConflict = false; - for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(), - PE = (*SI)->pred_end(); - PI != PE; ++PI) { - if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) || - BlockToChain[*PI] == &Chain) - continue; - BlockFrequency PredEdgeFreq - = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI); - if (PredEdgeFreq >= CandidateEdgeFreq) { - BadCFGConflict = true; - break; + if (!NoBadCFGConflictCheck) { + // Make sure that a hot successor doesn't have a globally more + // important predecessor. + BlockFrequency CandidateEdgeFreq = + OnlyHotBadCFGConflictCheck + ? MBFI->getBlockFreq(BB) * SuccProb + : MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); + bool BadCFGConflict = false; + for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(), + PE = (*SI)->pred_end(); + PI != PE; ++PI) { + if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) || + BlockToChain[*PI] == &Chain) + continue; + BlockFrequency PredEdgeFreq = + MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI); + if (PredEdgeFreq >= CandidateEdgeFreq) { + BadCFGConflict = true; + break; + } + } + if (BadCFGConflict) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob) (non-cold CFG conflict)\n"); + continue; } - } - if (BadCFGConflict) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb - << " (prob) (non-cold CFG conflict)\n"); - continue; } } diff --git a/test/CodeGen/X86/code_placement_bad_cfg_check.ll b/test/CodeGen/X86/code_placement_bad_cfg_check.ll new file mode 100644 index 00000000000..a5f9e8aab1b --- /dev/null +++ b/test/CodeGen/X86/code_placement_bad_cfg_check.ll @@ -0,0 +1,112 @@ +; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK-BAD-CFG +; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -no-bad-cfg-conflict-check < %s | FileCheck %s -check-prefix=CHECK-NO-BAD-CFG +; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -only-hot-bad-cfg-conflict-check < %s | FileCheck %s -check-prefix=CHECK-HOT-BAD-CFG + +define void @foo(i32 %t) { +; Test that we lift the call to 'c' up to immediately follow the call to 'b' +; when we disable the cfg conflict check. +; +; CHECK-BAD-CFG-LABEL: foo: +; CHECK-BAD-CFG: callq b +; CHECK-BAD-CFG: callq a +; CHECK-BAD-CFG: callq c +; +; CHECK-NO-BAD-CFG-LABEL: foo: +; CHECK-NO-BAD-CFG: callq b +; CHECK-NO-BAD-CFG: callq c +; CHECK-NO-BAD-CFG: callq a +; +; CHECK-HOT-BAD-CFG-LABEL: foo: +; CHECK-HOT-BAD-CFG: callq b +; CHECK-HOT-BAD-CFG: callq c +; CHECK-HOT-BAD-CFG: callq a + +entry: + %cmp = icmp eq i32 %t, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + call void @a() + br label %if.end + +if.else: + call void @b() + br label %if.end + +if.end: + call void @c() + ret void +} + +define void @bar(i32 %t1, i32 %t2, i32 %t3) { +; Test that we lift the call to 'c' up to immediately follow the call to 'b' +; when we disable the cfg conflict check. +; +; CHECK-BAD-CFG-LABEL: bar: +; CHECK-BAD-CFG: callq a +; CHECK-BAD-CFG: callq c +; CHECK-BAD-CFG: callq d +; CHECK-BAD-CFG: callq f +; CHECK-BAD-CFG: callq b +; CHECK-BAD-CFG: callq e +; CHECK-BAD-CFG: callq g +; +; CHECK-NO-BAD-CFG-LABEL: bar: +; CHECK-NO-BAD-CFG: callq a +; CHECK-NO-BAD-CFG: callq c +; CHECK-NO-BAD-CFG: callq g +; CHECK-NO-BAD-CFG: callq d +; CHECK-NO-BAD-CFG: callq f +; CHECK-NO-BAD-CFG: callq b +; CHECK-NO-BAD-CFG: callq e +; +; CHECK-HOT-BAD-CFG-LABEL: bar: +; CHECK-HOT-BAD-CFG: callq a +; CHECK-HOT-BAD-CFG: callq c +; CHECK-HOT-BAD-CFG: callq d +; CHECK-HOT-BAD-CFG: callq f +; CHECK-HOT-BAD-CFG: callq g +; CHECK-HOT-BAD-CFG: callq b +; CHECK-HOT-BAD-CFG: callq e + +entry: + br i1 undef, label %if1.then, label %if1.else + +if1.then: + call void @a() + %cmp2 = icmp eq i32 %t2, 0 + br i1 %cmp2, label %if2.then, label %if2.else + +if2.then: + call void @b() + br label %if.end + +if2.else: + call void @c() + br label %if.end + +if1.else: + call void @d() + %cmp3 = icmp eq i32 %t3, 0 + br i1 %cmp3, label %if3.then, label %if3.else + +if3.then: + call void @e() + br label %if.end + +if3.else: + call void @f() + br label %if.end + +if.end: + call void @g() + ret void +} + +declare void @a() +declare void @b() +declare void @c() +declare void @d() +declare void @e() +declare void @f() +declare void @g()