mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-02 16:36:40 +00:00
Support repeated machine outlining
Summary: The following change is to allow the machine outlining can be applied for Nth times, where N is specified by the compiler option. By default the value of N is 1. The motivation is that the repeated machine outlining can further reduce code size. Please refer to the presentation "Improving Swift Binary Size via Link Time Optimization" in LLVM Developers' Meeting in 2019. Reviewers: aschwaighofer, tellenbach, paquette Reviewed By: paquette Subscribers: tellenbach, hiraditya, llvm-commits, jinlin Tags: #llvm Differential Revision: https://reviews.llvm.org/D71027
This commit is contained in:
parent
cd616d89f0
commit
065aa83192
@ -97,6 +97,13 @@ static cl::opt<bool> EnableLinkOnceODROutlining(
|
||||
cl::desc("Enable the machine outliner on linkonceodr functions"),
|
||||
cl::init(false));
|
||||
|
||||
// Set the number of times to repeatedly apply outlining.
|
||||
// Defaults to 1, but more repetitions can save additional size.
|
||||
static cl::opt<unsigned>
|
||||
NumRepeat("machine-outline-runs", cl::Hidden,
|
||||
cl::desc("The number of times to apply machine outlining"),
|
||||
cl::init(1));
|
||||
|
||||
namespace {
|
||||
|
||||
/// Represents an undefined index in the suffix tree.
|
||||
@ -842,6 +849,9 @@ struct MachineOutliner : public ModulePass {
|
||||
/// linkonceodr linkage.
|
||||
bool OutlineFromLinkOnceODRs = false;
|
||||
|
||||
/// The current repeat number of machine outlining.
|
||||
unsigned OutlineRepeatedNum = 0;
|
||||
|
||||
/// Set to true if the outliner should run on all functions in the module
|
||||
/// considered safe for outlining.
|
||||
/// Set to true by default for compatibility with llc's -run-pass option.
|
||||
@ -900,9 +910,12 @@ struct MachineOutliner : public ModulePass {
|
||||
InstructionMapper &Mapper,
|
||||
unsigned Name);
|
||||
|
||||
/// Calls 'doOutline()'.
|
||||
/// Calls runOnceOnModule NumRepeat times
|
||||
bool runOnModule(Module &M) override;
|
||||
|
||||
/// Calls 'doOutline()'.
|
||||
bool runOnceOnModule(Module &M, unsigned Iter);
|
||||
|
||||
/// Construct a suffix tree on the instructions in \p M and outline repeated
|
||||
/// strings from that tree.
|
||||
bool doOutline(Module &M, unsigned &OutlinedFunctionNum);
|
||||
@ -1099,7 +1112,13 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
|
||||
// Create the function name. This should be unique.
|
||||
// FIXME: We should have a better naming scheme. This should be stable,
|
||||
// regardless of changes to the outliner's cost model/traversal order.
|
||||
std::string FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str();
|
||||
std::string FunctionName;
|
||||
if (OutlineRepeatedNum > 0)
|
||||
FunctionName = ("OUTLINED_FUNCTION_" + Twine(OutlineRepeatedNum + 1) + "_" +
|
||||
Twine(Name))
|
||||
.str();
|
||||
else
|
||||
FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str();
|
||||
|
||||
// Create the function using an IR-level function.
|
||||
LLVMContext &C = M.getContext();
|
||||
@ -1438,12 +1457,14 @@ void MachineOutliner::emitInstrCountChangedRemark(
|
||||
}
|
||||
}
|
||||
|
||||
bool MachineOutliner::runOnModule(Module &M) {
|
||||
bool MachineOutliner::runOnceOnModule(Module &M, unsigned Iter) {
|
||||
// Check if there's anything in the module. If it's empty, then there's
|
||||
// nothing to outline.
|
||||
if (M.empty())
|
||||
return false;
|
||||
|
||||
OutlineRepeatedNum = Iter;
|
||||
|
||||
// Number to append to the current outlined function.
|
||||
unsigned OutlinedFunctionNum = 0;
|
||||
|
||||
@ -1507,3 +1528,23 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
|
||||
|
||||
return OutlinedSomething;
|
||||
}
|
||||
|
||||
// Apply machine outlining for NumRepeat times.
|
||||
bool MachineOutliner::runOnModule(Module &M) {
|
||||
if (NumRepeat < 1)
|
||||
report_fatal_error("Expect NumRepeat for machine outlining "
|
||||
"to be greater than or equal to 1!\n");
|
||||
|
||||
bool Changed = false;
|
||||
for (unsigned I = 0; I < NumRepeat; I++) {
|
||||
if (!runOnceOnModule(M, I)) {
|
||||
LLVM_DEBUG(dbgs() << "Stopped outlining at iteration " << I
|
||||
<< " because no changes were found.\n";);
|
||||
return Changed;
|
||||
}
|
||||
Changed = true;
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "Stopped outlining because iteration is "
|
||||
"equal to " << NumRepeat << "\n";);
|
||||
return Changed;
|
||||
}
|
||||
|
149
test/CodeGen/AArch64/machine-outliner-iterative.mir
Normal file
149
test/CodeGen/AArch64/machine-outliner-iterative.mir
Normal file
@ -0,0 +1,149 @@
|
||||
# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -machine-outline-runs=2 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix TWO-RUNS
|
||||
# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -machine-outline-runs=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix ONE-RUN
|
||||
# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -machine-outline-runs=4 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix FOUR-RUNS
|
||||
|
||||
# Example of Repeated Instruction Sequence - Iterative Machine Outlining
|
||||
#
|
||||
#; define void @"$s12"(...) { define i64 @"$s5” (...) { define void @"$s13"(...) {
|
||||
# ... ... ...
|
||||
# %8 = load i1, i1* %7 %8 = load i1, i1* %7
|
||||
# %9 = load i4, i4*, %6 %9 = load i4, i4*, %6 %9 = load i4, i4*, %6
|
||||
# store i4 %9, i4* %5 store i4 %9, i4* %5 store i4 %9, i4* %5
|
||||
# ... ... ...
|
||||
# } } }
|
||||
#
|
||||
# After machine outliner (1st time)
|
||||
#
|
||||
# define void @"$s12"(...) { define i64 @"$s5” (...) { define void @"$s13"(...) {
|
||||
# ... ... ...
|
||||
# %8 = load i1, i1* %7 %8 = load i1, i1* %7
|
||||
# call void @outlined_function_1_1 call void @outlined_function_1_1 call void @outlined_function_1_1
|
||||
# ... ... ...
|
||||
# } } }
|
||||
#
|
||||
# After machine outliner (2nd time)
|
||||
#
|
||||
# define void @"$s12"(...) { define i64 @"$s5” (...) { define void @"$s13"(...) {
|
||||
# ... ... ...
|
||||
# call void @outlined_function_2_1 call void @outlined_function_1_1 call void @outlined_function_2_1
|
||||
# ... ... ...
|
||||
# } } }
|
||||
#
|
||||
# Check whether machine outliner can further find the outlining opportunity after machine
|
||||
# outlining has performed.
|
||||
#
|
||||
--- |
|
||||
declare void @foo() local_unnamed_addr
|
||||
|
||||
declare void @widget() local_unnamed_addr
|
||||
|
||||
; Function Attrs: minsize noredzone optsize
|
||||
define void @baz.14() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: minsize noredzone optsize
|
||||
define void @baz.15() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: minsize noredzone optsize
|
||||
define void @baz.16() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { minsize noredzone optsize }
|
||||
...
|
||||
---
|
||||
name: baz.14
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, offset: -8, size: 8 }
|
||||
- { id: 1, offset: -16, size: 8 }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0, $x19, $lr
|
||||
|
||||
early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0)
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
||||
frame-setup CFI_INSTRUCTION offset $w19, -8
|
||||
frame-setup CFI_INSTRUCTION offset $w30, -16
|
||||
renamable $x19 = COPY $x0
|
||||
renamable $x0 = nuw ADDXri $x0, 48, 0
|
||||
$x1 = ADDXri $sp, 0, 0
|
||||
dead $w2 = MOVi32imm 33, implicit-def $x2
|
||||
$x3 = COPY $xzr
|
||||
BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp
|
||||
$x0 = COPY killed renamable $x19
|
||||
BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
|
||||
early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: baz.15
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, offset: -8, size: 8 }
|
||||
- { id: 1, offset: -16, size: 8 }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0, $x19, $lr
|
||||
|
||||
early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0)
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
||||
frame-setup CFI_INSTRUCTION offset $w19, -8
|
||||
frame-setup CFI_INSTRUCTION offset $w30, -16
|
||||
renamable $x19 = COPY $x0
|
||||
renamable $x0 = nuw ADDXri killed renamable $x0, 16, 0
|
||||
$x1 = ADDXri $sp, 0, 0
|
||||
dead $w2 = MOVi32imm 33, implicit-def $x2
|
||||
$x3 = COPY $xzr
|
||||
BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp
|
||||
$x0 = COPY killed renamable $x19
|
||||
BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
|
||||
early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: baz.16
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, offset: -8, size: 8 }
|
||||
- { id: 1, offset: -16, size: 8 }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0, $x19, $lr
|
||||
|
||||
early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0)
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
||||
frame-setup CFI_INSTRUCTION offset $w19, -8
|
||||
frame-setup CFI_INSTRUCTION offset $w30, -16
|
||||
renamable $x19 = COPY $x0
|
||||
renamable $x0 = nuw ADDXri $x0, 48, 0
|
||||
$x1 = ADDXri $sp, 0, 0
|
||||
dead $w2 = MOVi32imm 33, implicit-def $x2
|
||||
$x3 = COPY $xzr
|
||||
BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp
|
||||
$x0 = COPY killed renamable $x19
|
||||
BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
|
||||
early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
|
||||
# TWO-RUNS: name: OUTLINED_FUNCTION_2_0
|
||||
# TWO-RUNS-DAG: bb.0:
|
||||
# TWO-RUNS-DAG: renamable $x19 = COPY $x0
|
||||
# TWO-RUNS-NEXT: renamable $x0 = nuw ADDXri $x0, 48, 0
|
||||
# TWO-RUNS-NEXT: TCRETURNdi @OUTLINED_FUNCTION_0, 0, implicit $sp
|
||||
#
|
||||
# The machine outliner is expected to stop at the 1st iteration for case ONE-RUN
|
||||
# since machine-outline-runs is specified as 1.
|
||||
# ONE-RUN-NOT: [[OUTLINED:OUTLINED_FUNCTION_2_[0-9]+]]
|
||||
#
|
||||
# The machine outliner is expected to stop at the 3rd iteration for case FOUR-RUNS
|
||||
# since the MIR has no change at the 3rd iteration.
|
||||
# FOUR-RUNS-NOT: [[OUTLINED:OUTLINED_FUNCTION_3_[0-9]+]]
|
||||
# FOUR-RUNS-NOT: [[OUTLINED:OUTLINED_FUNCTION_4_[0-9]+]]
|
Loading…
Reference in New Issue
Block a user