mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-12 22:28:22 +00:00
dbab4dc942
running IPSCCP early, and we run functionattrs interlaced with the inliner, we often (particularly for small or noop functions) completely propagate all of the information about a call to its call site in IPSSCP (making a call dead) and functionattrs is smart enough to realize that the function is readonly (because it is interlaced with inliner). To improve compile time and make the inliner threshold more accurate, realize that we don't have to inline dead readonly function calls. Instead, just delete the call. This happens all the time for C++ codes, here are some counters from opt/llvm-ld counting the number of times calls were deleted vs inlined on various apps: Tramp3d opt: 5033 inline - Number of call sites deleted, not inlined 24596 inline - Number of functions inlined llvm-ld: 667 inline - Number of functions deleted because all callers found 699 inline - Number of functions inlined 483.xalancbmk opt: 8096 inline - Number of call sites deleted, not inlined 62528 inline - Number of functions inlined llvm-ld: 217 inline - Number of allocas merged together 2158 inline - Number of functions inlined 471.omnetpp: 331 inline - Number of call sites deleted, not inlined 8981 inline - Number of functions inlined llvm-ld: 171 inline - Number of functions deleted because all callers found 629 inline - Number of functions inlined Deleting a call is much faster than inlining it, and is insensitive to the size of the callee. :) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@86975 91177308-0d34-0410-b5e6-96231b3b80d8
23 lines
844 B
LLVM
23 lines
844 B
LLVM
; RUN: opt %s -S -inline -functionattrs -stats |& grep {Number of call sites deleted, not inlined}
|
|
; RUN: opt %s -S -inline -stats |& grep {Number of functions inlined}
|
|
|
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
|
|
target triple = "i386-apple-darwin9.8"
|
|
|
|
define internal i32 @test(i32 %x, i32 %y, i32 %z) nounwind {
|
|
entry:
|
|
%0 = add nsw i32 %y, %z ; <i32> [#uses=1]
|
|
%1 = mul i32 %0, %x ; <i32> [#uses=1]
|
|
%2 = mul i32 %y, %z ; <i32> [#uses=1]
|
|
%3 = add nsw i32 %1, %2 ; <i32> [#uses=1]
|
|
ret i32 %3
|
|
}
|
|
|
|
define i32 @test2() nounwind {
|
|
entry:
|
|
%0 = call i32 @test(i32 1, i32 2, i32 4) nounwind ; <i32> [#uses=1]
|
|
ret i32 14
|
|
}
|
|
|
|
|