mirror of
https://github.com/RPCSX/llvm.git
synced 2025-03-04 10:59:04 +00:00

Summary: The motivation example is like below which has 13 cases but only 2 distinct targets ``` lor.lhs.false2: ; preds = %if.then switch i32 %Status, label %if.then27 [ i32 -7012, label %if.end35 i32 -10008, label %if.end35 i32 -10016, label %if.end35 i32 15000, label %if.end35 i32 14013, label %if.end35 i32 10114, label %if.end35 i32 10107, label %if.end35 i32 10105, label %if.end35 i32 10013, label %if.end35 i32 10011, label %if.end35 i32 7008, label %if.end35 i32 7007, label %if.end35 i32 5002, label %if.end35 ] ``` which is compiled into a balanced binary tree like this on AArch64 (similar on X86) ``` .LBB853_9: // %lor.lhs.false2 mov w8, #10012 cmp w19, w8 b.gt .LBB853_14 // BB#10: // %lor.lhs.false2 mov w8, #5001 cmp w19, w8 b.gt .LBB853_18 // BB#11: // %lor.lhs.false2 mov w8, #-10016 cmp w19, w8 b.eq .LBB853_23 // BB#12: // %lor.lhs.false2 mov w8, #-10008 cmp w19, w8 b.eq .LBB853_23 // BB#13: // %lor.lhs.false2 mov w8, #-7012 cmp w19, w8 b.eq .LBB853_23 b .LBB853_3 .LBB853_14: // %lor.lhs.false2 mov w8, #14012 cmp w19, w8 b.gt .LBB853_21 // BB#15: // %lor.lhs.false2 mov w8, #-10105 add w8, w19, w8 cmp w8, #9 // =9 b.hi .LBB853_17 // BB#16: // %lor.lhs.false2 orr w9, wzr, #0x1 lsl w8, w9, w8 mov w9, #517 and w8, w8, w9 cbnz w8, .LBB853_23 .LBB853_17: // %lor.lhs.false2 mov w8, #10013 cmp w19, w8 b.eq .LBB853_23 b .LBB853_3 .LBB853_18: // %lor.lhs.false2 mov w8, #-7007 add w8, w19, w8 cmp w8, #2 // =2 b.lo .LBB853_23 // BB#19: // %lor.lhs.false2 mov w8, #5002 cmp w19, w8 b.eq .LBB853_23 // BB#20: // %lor.lhs.false2 mov w8, #10011 cmp w19, w8 b.eq .LBB853_23 b .LBB853_3 .LBB853_21: // %lor.lhs.false2 mov w8, #14013 cmp w19, w8 b.eq .LBB853_23 // BB#22: // %lor.lhs.false2 mov w8, #15000 cmp w19, w8 b.ne .LBB853_3 ``` However, the inline cost model estimates the cost to be linear with the number of distinct targets and the cost of the above switch is just 2 InstrCosts. The function containing this switch is then inlined about 900 times. This change use the general way of switch lowering for the inline heuristic. It etimate the number of case clusters with the suitability check for a jump table or bit test. Considering the binary search tree built for the clusters, this change modifies the model to be linear with the size of the balanced binary tree. The model is off by default for now : -inline-generic-switch-cost=false This change was originally proposed by Haicheng in D29870. Reviewers: hans, bmakam, chandlerc, eraman, haicheng, mcrosier Reviewed By: hans Subscribers: joerg, aemerson, llvm-commits, rengolin Differential Revision: https://reviews.llvm.org/D31085 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@301649 91177308-0d34-0410-b5e6-96231b3b80d8
124 lines
2.6 KiB
LLVM
124 lines
2.6 KiB
LLVM
; RUN: opt < %s -inline -inline-threshold=20 -S -mtriple=aarch64-none-linux -inline-generic-switch-cost=true | FileCheck %s
|
|
; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=20 -S -mtriple=aarch64-none-linux -inline-generic-switch-cost=true | FileCheck %s
|
|
|
|
define i32 @callee_range(i32 %a, i32* %P) {
|
|
switch i32 %a, label %sw.default [
|
|
i32 0, label %sw.bb0
|
|
i32 1000, label %sw.bb1
|
|
i32 2000, label %sw.bb1
|
|
i32 3000, label %sw.bb1
|
|
i32 4000, label %sw.bb1
|
|
i32 5000, label %sw.bb1
|
|
i32 6000, label %sw.bb1
|
|
i32 7000, label %sw.bb1
|
|
i32 8000, label %sw.bb1
|
|
i32 9000, label %sw.bb1
|
|
]
|
|
|
|
sw.default:
|
|
store volatile i32 %a, i32* %P
|
|
br label %return
|
|
sw.bb0:
|
|
store volatile i32 %a, i32* %P
|
|
br label %return
|
|
sw.bb1:
|
|
store volatile i32 %a, i32* %P
|
|
br label %return
|
|
return:
|
|
ret i32 42
|
|
}
|
|
|
|
define i32 @caller_range(i32 %a, i32* %P) {
|
|
; CHECK-LABEL: @caller_range(
|
|
; CHECK: call i32 @callee_range
|
|
%r = call i32 @callee_range(i32 %a, i32* %P)
|
|
ret i32 %r
|
|
}
|
|
|
|
define i32 @callee_bittest(i32 %a, i32* %P) {
|
|
switch i32 %a, label %sw.default [
|
|
i32 0, label %sw.bb0
|
|
i32 1, label %sw.bb1
|
|
i32 2, label %sw.bb2
|
|
i32 3, label %sw.bb0
|
|
i32 4, label %sw.bb1
|
|
i32 5, label %sw.bb2
|
|
i32 6, label %sw.bb0
|
|
i32 7, label %sw.bb1
|
|
i32 8, label %sw.bb2
|
|
]
|
|
|
|
sw.default:
|
|
store volatile i32 %a, i32* %P
|
|
br label %return
|
|
|
|
sw.bb0:
|
|
store volatile i32 %a, i32* %P
|
|
br label %return
|
|
|
|
sw.bb1:
|
|
store volatile i32 %a, i32* %P
|
|
br label %return
|
|
|
|
sw.bb2:
|
|
br label %return
|
|
|
|
return:
|
|
ret i32 42
|
|
}
|
|
|
|
|
|
define i32 @caller_bittest(i32 %a, i32* %P) {
|
|
; CHECK-LABEL: @caller_bittest(
|
|
; CHECK-NOT: call i32 @callee_bittest
|
|
%r= call i32 @callee_bittest(i32 %a, i32* %P)
|
|
ret i32 %r
|
|
}
|
|
|
|
define i32 @callee_jumptable(i32 %a, i32* %P) {
|
|
switch i32 %a, label %sw.default [
|
|
i32 1001, label %sw.bb101
|
|
i32 1002, label %sw.bb102
|
|
i32 1003, label %sw.bb103
|
|
i32 1004, label %sw.bb104
|
|
i32 1005, label %sw.bb101
|
|
i32 1006, label %sw.bb102
|
|
i32 1007, label %sw.bb103
|
|
i32 1008, label %sw.bb104
|
|
i32 1009, label %sw.bb101
|
|
i32 1010, label %sw.bb102
|
|
i32 1011, label %sw.bb103
|
|
i32 1012, label %sw.bb104
|
|
]
|
|
|
|
sw.default:
|
|
br label %return
|
|
|
|
sw.bb101:
|
|
store volatile i32 %a, i32* %P
|
|
br label %return
|
|
|
|
sw.bb102:
|
|
store volatile i32 %a, i32* %P
|
|
br label %return
|
|
|
|
sw.bb103:
|
|
store volatile i32 %a, i32* %P
|
|
br label %return
|
|
|
|
sw.bb104:
|
|
store volatile i32 %a, i32* %P
|
|
br label %return
|
|
|
|
return:
|
|
ret i32 42
|
|
}
|
|
|
|
define i32 @caller_jumptable(i32 %a, i32 %b, i32* %P) {
|
|
; CHECK-LABEL: @caller_jumptable(
|
|
; CHECK: call i32 @callee_jumptable
|
|
%r = call i32 @callee_jumptable(i32 %b, i32* %P)
|
|
ret i32 %r
|
|
}
|
|
|