mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-05 02:07:56 +00:00
3152e057e1
Summary: This patch improves thinlto importer by importing 3x larger functions that are called from hot block. I compared performance with the trunk on spec, and there were about 2% on povray and 3.33% on milc. These results seems to be consistant and match the results Teresa got with her simple heuristic. Some benchmarks got slower but I think they are just noisy (mcf, xalancbmki, omnetpp)- running the benchmarks again with more iterations to confirm. Geomean of all benchmarks including the noisy ones were about +0.02%. I see much better improvement on google branch with Easwaran patch for pgo callsite inlining (the inliner actually inline those big functions) Over all I see +0.5% improvement, and I get +8.65% on povray. So I guess we will see much bigger change when Easwaran patch will land (it depends on new pass manager), but it is still worth putting this to trunk before it. Implementation details changes: - Removed CallsiteCount. - ProfileCount got replaced by Hotness - hot-import-multiplier is set to 3.0 for now, didn't have time to tune it up, but I see that we get most of the interesting functions with 3, so there is no much performance difference with higher, and binary size doesn't grow as much as with 10.0. Reviewers: eraman, mehdi_amini, tejohnson Subscribers: mehdi_amini, llvm-commits Differential Revision: https://reviews.llvm.org/D24638 llvm-svn: 282437
56 lines
2.4 KiB
LLVM
56 lines
2.4 KiB
LLVM
; Test to check the callgraph in summary when there is PGO
|
|
; RUN: opt -module-summary %s -o %t.o
|
|
; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
|
|
|
|
; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph.ll -o %t2.o
|
|
; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o
|
|
; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
|
|
|
|
; Check parsing for old summary versions generated from this file.
|
|
; RUN: llvm-lto -thinlto-index-stats %p/Inputs/thinlto-function-summary-callgraph-pgo.1.bc | FileCheck %s --check-prefix=OLD
|
|
; RUN: llvm-lto -thinlto-index-stats %p/Inputs/thinlto-function-summary-callgraph-pgo-combined.1.bc | FileCheck %s --check-prefix=OLD-COMBINED
|
|
|
|
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
|
|
; CHECK-NEXT: <VERSION
|
|
; See if the call to func is registered, using the expected callsite count
|
|
; and hotness type, with value id matching the subsequent value symbol table.
|
|
; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=[[FUNCID:[0-9]+]] op5=2/>
|
|
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
|
|
; CHECK-NEXT: <VALUE_SYMTAB
|
|
; CHECK-NEXT: <FNENTRY {{.*}} record string = 'main'
|
|
; External function func should have entry with value id FUNCID
|
|
; CHECK-NEXT: <ENTRY {{.*}} op0=[[FUNCID]] {{.*}} record string = 'func'
|
|
; CHECK-NEXT: </VALUE_SYMTAB>
|
|
|
|
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
|
|
; COMBINED-NEXT: <VERSION
|
|
; COMBINED-NEXT: <COMBINED
|
|
; See if the call to func is registered, using the expected callsite count
|
|
; and hotness type, with value id matching the subsequent value symbol table.
|
|
; op6=2 which is hotnessType::None.
|
|
; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op5=[[FUNCID:[0-9]+]] op6=2/>
|
|
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
|
|
; COMBINED-NEXT: <VALUE_SYMTAB
|
|
; Entry for function func should have entry with value id FUNCID
|
|
; COMBINED-NEXT: <COMBINED_ENTRY {{.*}} op0=[[FUNCID]] op1=7289175272376759421/>
|
|
; COMBINED-NEXT: <COMBINED
|
|
; COMBINED-NEXT: </VALUE_SYMTAB>
|
|
|
|
; ModuleID = 'thinlto-function-summary-callgraph.ll'
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
; Function Attrs: nounwind uwtable
|
|
define i32 @main() #0 !prof !2 {
|
|
entry:
|
|
call void (...) @func()
|
|
ret i32 0
|
|
}
|
|
|
|
declare void @func(...) #1
|
|
|
|
!2 = !{!"function_entry_count", i64 1}
|
|
|
|
; OLD: Index {{.*}} contains 1 nodes (1 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls)
|
|
; OLD-COMBINED: Index {{.*}} contains 2 nodes (2 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls)
|