Add support to optionally limit the size of jump tables.

Many high-performance processors have a dedicated branch predictor for
indirect branches, commonly used with jump tables.  As sophisticated as such
branch predictors are, they tend to have well defined limits beyond which
their effectiveness is hampered or even nullified.  One such limit is the
number of possible destinations for a given indirect branches that such
branch predictors can handle.

This patch considers a limit that a target may set to the number of
destination addresses in a jump table.

Patch by: Evandro Menezes <e.menezes@samsung.com>, Aditya Kumar
<aditya.k7@samsung.com>, Sebastian Pop <s.pop@samsung.com>.

Differential revision: https://reviews.llvm.org/D21940

llvm-svn: 282412
This commit is contained in:
Evandro Menezes 2016-09-26 15:32:33 +00:00
parent a554bff930
commit 44fc35bb88
7 changed files with 154 additions and 18 deletions

View File

@ -1022,12 +1022,15 @@ public:
return UseUnderscoreLongJmp;
}
/// Return integer threshold on number of blocks to use jump tables rather
/// than if sequence.
int getMinimumJumpTableEntries() const {
/// Return lower limit for number of blocks in a jump table.
unsigned getMinimumJumpTableEntries() const {
return MinimumJumpTableEntries;
}
/// Return upper limit for number of entries in a jump table.
/// Zero if no limit.
unsigned getMaximumJumpTableSize() const;
/// If a physical register, this specifies the register that
/// llvm.savestack/llvm.restorestack should save and restore.
unsigned getStackPointerRegisterToSaveRestore() const {
@ -1353,12 +1356,15 @@ protected:
UseUnderscoreLongJmp = Val;
}
/// Indicate the number of blocks to generate jump tables rather than if
/// sequence.
void setMinimumJumpTableEntries(int Val) {
/// Indicate the minimum number of blocks to generate jump tables.
void setMinimumJumpTableEntries(unsigned Val) {
MinimumJumpTableEntries = Val;
}
/// Indicate the maximum number of entries in jump tables.
/// Set to zero to generate unlimited jump tables.
void setMaximumJumpTableSize(unsigned);
/// If set to a physical register, this specifies the register that
/// llvm.savestack/llvm.restorestack should save and restore.
void setStackPointerRegisterToSaveRestore(unsigned R) {

View File

@ -8454,12 +8454,19 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
if (!areJTsAllowed(TLI, SI))
return;
const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize();
const int64_t N = Clusters.size();
const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries();
const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries();
const unsigned MaxJumpTableSize =
OptForSize ? UINT_MAX : TLI.getMaximumJumpTableSize() ?
TLI.getMaximumJumpTableSize() : UINT_MAX;
if (N < 2 || N < MinJumpTableEntries)
return;
// TotalCases[i]: Total nbr of cases in Clusters[0..i].
SmallVector<unsigned, 8> TotalCases(N);
for (unsigned i = 0; i < N; ++i) {
const APInt &Hi = Clusters[i].High->getValue();
const APInt &Lo = Clusters[i].Low->getValue();
@ -8468,12 +8475,16 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
TotalCases[i] += TotalCases[i - 1];
}
unsigned MinDensity = JumpTableDensity;
if (DefaultMBB->getParent()->getFunction()->optForSize())
MinDensity = OptsizeJumpTableDensity;
if (N >= MinJumpTableSize
&& isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) {
// Cheap case: the whole range might be suitable for jump table.
const unsigned MinDensity =
OptForSize ? OptsizeJumpTableDensity : JumpTableDensity;
// Cheap case: the whole range may be suitable for jump table.
unsigned JumpTableSize = (Clusters[N - 1].High->getValue() -
Clusters[0].Low->getValue())
.getLimitedValue(UINT_MAX - 1) + 1;
if (JumpTableSize <= MaxJumpTableSize &&
isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) {
CaseCluster JTCluster;
if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
Clusters[0] = JTCluster;
@ -8503,7 +8514,6 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
// Base case: There is only one way to partition Clusters[N-1].
MinPartitions[N - 1] = 1;
LastElement[N - 1] = N - 1;
assert(MinJumpTableSize > 1);
NumTables[N - 1] = 0;
// Note: loop indexes are signed to avoid underflow.
@ -8517,9 +8527,13 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
// Search for a solution that results in fewer partitions.
for (int64_t j = N - 1; j > i; j--) {
// Try building a partition from Clusters[i..j].
if (isDense(Clusters, TotalCases, i, j, MinDensity)) {
JumpTableSize = (Clusters[j].High->getValue() -
Clusters[i].Low->getValue())
.getLimitedValue(UINT_MAX - 1) + 1;
if (JumpTableSize <= MaxJumpTableSize &&
isDense(Clusters, TotalCases, i, j, MinDensity)) {
unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
bool IsTable = j - i + 1 >= MinJumpTableSize;
bool IsTable = j - i + 1 >= MinJumpTableEntries;
unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]);
// If this j leads to fewer partitions, or same number of partitions
@ -8543,7 +8557,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
unsigned NumClusters = Last - First + 1;
CaseCluster JTCluster;
if (NumClusters >= MinJumpTableSize &&
if (NumClusters >= MinJumpTableEntries &&
buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
Clusters[DstIndex++] = JTCluster;
} else {

View File

@ -44,6 +44,10 @@ static cl::opt<bool> JumpIsExpensiveOverride(
cl::desc("Do not create extra branches to split comparison logic."),
cl::Hidden);
static cl::opt<unsigned> MaximumJumpTableSize
("max-jump-table", cl::init(0), cl::Hidden,
cl::desc("Set maximum number of jump table entries; zero for no limit."));
// Although this default value is arbitrary, it is not random. It is assumed
// that a condition that evaluates the same way by a higher percentage than this
// is best represented as control flow. Therefore, the default value N should be
@ -1831,3 +1835,11 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
return nullptr;
}
unsigned TargetLoweringBase::getMaximumJumpTableSize() const {
return MaximumJumpTableSize;
}
void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
MaximumJumpTableSize = Val;
}

View File

@ -513,6 +513,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
setPrefLoopAlignment(STI.getPrefLoopAlignment());
// Only change the limit for entries in a jump table if specified by
// the subtarget, but not at the command line.
unsigned MaxJT = STI.getMaximumJumpTableSize();
if (MaxJT && getMaximumJumpTableSize() == 0)
setMaximumJumpTableSize(MaxJT);
setHasExtractBitsInsn(true);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

View File

@ -65,6 +65,7 @@ void AArch64Subtarget::initializeProperties() {
case ExynosM1:
PrefFunctionAlignment = 4;
PrefLoopAlignment = 3;
MaxJumpTableSize = 12;
break;
case Kryo:
MaxInterleaveFactor = 4;

View File

@ -90,6 +90,7 @@ protected:
unsigned MaxPrefetchIterationsAhead = UINT_MAX;
unsigned PrefFunctionAlignment = 0;
unsigned PrefLoopAlignment = 0;
unsigned MaxJumpTableSize = 0;
// ReserveX18 - X18 is not available as a general purpose register.
bool ReserveX18;
@ -203,6 +204,8 @@ public:
unsigned getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
unsigned getPrefLoopAlignment() const { return PrefLoopAlignment; }
unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
/// CPU has TBI (top byte of addresses is ignored during HW address
/// translation) and OS enables it.
bool supportsAddressTopByteIgnored() const;

View File

@ -0,0 +1,94 @@
; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECK0 <%t
; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -max-jump-table=4 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECK4 <%t
; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -max-jump-table=8 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECK8 <%t
; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -mcpu=exynos-m1 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECKM1 <%t
declare void @ext(i32)
define i32 @jt1(i32 %a, i32 %b) {
entry:
switch i32 %a, label %return [
i32 1, label %bb1
i32 2, label %bb2
i32 3, label %bb3
i32 4, label %bb4
i32 5, label %bb5
i32 6, label %bb6
i32 7, label %bb7
i32 8, label %bb8
i32 9, label %bb9
i32 10, label %bb10
i32 11, label %bb11
i32 12, label %bb12
i32 13, label %bb13
i32 14, label %bb14
i32 15, label %bb15
i32 16, label %bb16
i32 17, label %bb17
]
; CHECK-LABEL: function jt1:
; CHECK: Jump Tables:
; CHECK0-NEXT: jt#0:
; CHECK0-NOT: jt#1:
; CHECK4-NEXT: jt#0:
; CHECK4-SAME: jt#1:
; CHECK4-SAME: jt#2:
; CHECK4-SAME: jt#3:
; CHECK4-NOT: jt#4:
; CHECK8-NEXT: jt#0:
; CHECK8-SAME: jt#1:
; CHECK8-SAME: jt#2: BB#14 BB#15 BB#16 BB#17{{$}}
; CHECK8-NOT: jt#3:
; CHECKM1-NEXT: jt#0:
; CHECKM1-SAME: jt#1: BB#13 BB#14 BB#15 BB#16 BB#17{{$}}
; CHECKM1-NOT: jt#2:
; CHEC-NEXT: Function Live Ins:
bb1: tail call void @ext(i32 0) br label %return
bb2: tail call void @ext(i32 2) br label %return
bb3: tail call void @ext(i32 4) br label %return
bb4: tail call void @ext(i32 6) br label %return
bb5: tail call void @ext(i32 8) br label %return
bb6: tail call void @ext(i32 10) br label %return
bb7: tail call void @ext(i32 12) br label %return
bb8: tail call void @ext(i32 14) br label %return
bb9: tail call void @ext(i32 16) br label %return
bb10: tail call void @ext(i32 18) br label %return
bb11: tail call void @ext(i32 20) br label %return
bb12: tail call void @ext(i32 22) br label %return
bb13: tail call void @ext(i32 24) br label %return
bb14: tail call void @ext(i32 26) br label %return
bb15: tail call void @ext(i32 28) br label %return
bb16: tail call void @ext(i32 30) br label %return
bb17: tail call void @ext(i32 32) br label %return
return: ret i32 %b
}
define void @jt2(i32 %x) {
entry:
switch i32 %x, label %return [
i32 1, label %bb1
i32 2, label %bb2
i32 3, label %bb3
i32 4, label %bb4
i32 14, label %bb5
i32 15, label %bb6
]
; CHECK-LABEL: function jt2:
; CHECK: Jump Tables:
; CHECK0-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#5 BB#6{{$}}
; CHECK4-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}}
; CHECK8-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}}
; CHECKM1-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}}
; CHEC-NEXT: Function Live Ins:
bb1: tail call void @ext(i32 1) br label %return
bb2: tail call void @ext(i32 2) br label %return
bb3: tail call void @ext(i32 3) br label %return
bb4: tail call void @ext(i32 4) br label %return
bb5: tail call void @ext(i32 5) br label %return
bb6: tail call void @ext(i32 6) br label %return
return: ret void
}