Runtime flag to control branch funnel threshold

Reviewers: pcc

Subscribers: hiraditya, llvm-commits

Differential Revision: https://reviews.llvm.org/D45193

llvm-svn: 329459
This commit is contained in:
Vitaly Buka 2018-04-06 21:32:36 +00:00
parent 8c35013df2
commit 51d42e420e
2 changed files with 106 additions and 2 deletions

View File

@ -111,6 +111,11 @@ static cl::opt<std::string> ClWriteSummary(
cl::desc("Write summary to given YAML file after running pass"),
cl::Hidden);
static cl::opt<int> ClThreshold("wholeprogramdevirt-branch-funnel-threshold",
cl::Hidden, cl::init(10), cl::ZeroOrMore,
cl::desc("Maximum number of call targets per "
"call site to enable branch funnels"));
// Find the minimum offset that we may store a value of size Size bits at. If
// IsAfter is set, look for an offset before the object, otherwise look for an
// offset after the object.
@ -820,8 +825,7 @@ void DevirtModule::tryICallBranchFunnel(
if (T.getArch() != Triple::x86_64)
return;
const unsigned kBranchFunnelThreshold = 10;
if (TargetsForSlot.size() > kBranchFunnelThreshold)
if (TargetsForSlot.size() > ClThreshold)
return;
bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;

View File

@ -0,0 +1,100 @@
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=1 -S -o - %s | not grep @llvm.icall.branch.funnel | count 0
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=10 -S -o - %s | grep @llvm.icall.branch.funnel | count 4
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=100 -S -o - %s | grep @llvm.icall.branch.funnel | count 5
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"
@vt1_1 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf1_1 to i8*)], !type !0
@vt1_2 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf1_2 to i8*)], !type !0
declare i32 @vf1_1(i8* %this, i32 %arg)
declare i32 @vf1_2(i8* %this, i32 %arg)
@vt2_1 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_1 to i8*)], !type !1
@vt2_2 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_2 to i8*)], !type !1
@vt2_3 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_3 to i8*)], !type !1
@vt2_4 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_4 to i8*)], !type !1
@vt2_5 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_5 to i8*)], !type !1
@vt2_6 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_6 to i8*)], !type !1
@vt2_7 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_7 to i8*)], !type !1
@vt2_8 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_8 to i8*)], !type !1
@vt2_9 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_9 to i8*)], !type !1
@vt2_10 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_10 to i8*)], !type !1
@vt2_11 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_11 to i8*)], !type !1
declare i32 @vf2_1(i8* %this, i32 %arg)
declare i32 @vf2_2(i8* %this, i32 %arg)
declare i32 @vf2_3(i8* %this, i32 %arg)
declare i32 @vf2_4(i8* %this, i32 %arg)
declare i32 @vf2_5(i8* %this, i32 %arg)
declare i32 @vf2_6(i8* %this, i32 %arg)
declare i32 @vf2_7(i8* %this, i32 %arg)
declare i32 @vf2_8(i8* %this, i32 %arg)
declare i32 @vf2_9(i8* %this, i32 %arg)
declare i32 @vf2_10(i8* %this, i32 %arg)
declare i32 @vf2_11(i8* %this, i32 %arg)
@vt3_1 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf3_1 to i8*)], !type !2
@vt3_2 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf3_2 to i8*)], !type !2
declare i32 @vf3_1(i8* %this, i32 %arg)
declare i32 @vf3_2(i8* %this, i32 %arg)
@vt4_1 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf4_1 to i8*)], !type !3
@vt4_2 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf4_2 to i8*)], !type !3
declare i32 @vf4_1(i8* %this, i32 %arg)
declare i32 @vf4_2(i8* %this, i32 %arg)
define i32 @fn1(i8* %obj) #0 {
%vtableptr = bitcast i8* %obj to [1 x i8*]**
%vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
%vtablei8 = bitcast [1 x i8*]* %vtable to i8*
%p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid1")
call void @llvm.assume(i1 %p)
%fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
%fptr = load i8*, i8** %fptrptr
%fptr_casted = bitcast i8* %fptr to i32 (i8*, i32)*
%result = call i32 %fptr_casted(i8* %obj, i32 1)
ret i32 %result
}
define i32 @fn2(i8* %obj) #0 {
%vtableptr = bitcast i8* %obj to [1 x i8*]**
%vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
%vtablei8 = bitcast [1 x i8*]* %vtable to i8*
%p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid2")
call void @llvm.assume(i1 %p)
%fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
%fptr = load i8*, i8** %fptrptr
%fptr_casted = bitcast i8* %fptr to i32 (i8*, i32)*
%result = call i32 %fptr_casted(i8* %obj, i32 1)
ret i32 %result
}
define i32 @fn3(i8* %obj) #0 {
%vtableptr = bitcast i8* %obj to [1 x i8*]**
%vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
%vtablei8 = bitcast [1 x i8*]* %vtable to i8*
%p = call i1 @llvm.type.test(i8* %vtablei8, metadata !4)
call void @llvm.assume(i1 %p)
%fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
%fptr = load i8*, i8** %fptrptr
%fptr_casted = bitcast i8* %fptr to i32 (i8*, i32)*
%result = call i32 %fptr_casted(i8* %obj, i32 1)
ret i32 %result
}
declare i1 @llvm.type.test(i8*, metadata)
declare void @llvm.assume(i1)
!0 = !{i32 0, !"typeid1"}
!1 = !{i32 0, !"typeid2"}
!2 = !{i32 0, !"typeid3"}
!3 = !{i32 0, !4}
!4 = distinct !{}
attributes #0 = { "target-features"="+retpoline" }