mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-04-16 06:30:13 +00:00
Add support to optionally limit the size of jump tables.
Many high-performance processors have a dedicated branch predictor for indirect branches, commonly used with jump tables. As sophisticated as such branch predictors are, they tend to have well defined limits beyond which their effectiveness is hampered or even nullified. One such limit is the number of possible destinations for a given indirect branches that such branch predictors can handle. This patch considers a limit that a target may set to the number of destination addresses in a jump table. Patch by: Evandro Menezes <e.menezes@samsung.com>, Aditya Kumar <aditya.k7@samsung.com>, Sebastian Pop <s.pop@samsung.com>. Differential revision: https://reviews.llvm.org/D21940 llvm-svn: 282412
This commit is contained in:
parent
a554bff930
commit
44fc35bb88
include/llvm/Target
lib
CodeGen
Target/AArch64
test/CodeGen/AArch64
@ -1022,12 +1022,15 @@ public:
|
|||||||
return UseUnderscoreLongJmp;
|
return UseUnderscoreLongJmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return integer threshold on number of blocks to use jump tables rather
|
/// Return lower limit for number of blocks in a jump table.
|
||||||
/// than if sequence.
|
unsigned getMinimumJumpTableEntries() const {
|
||||||
int getMinimumJumpTableEntries() const {
|
|
||||||
return MinimumJumpTableEntries;
|
return MinimumJumpTableEntries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return upper limit for number of entries in a jump table.
|
||||||
|
/// Zero if no limit.
|
||||||
|
unsigned getMaximumJumpTableSize() const;
|
||||||
|
|
||||||
/// If a physical register, this specifies the register that
|
/// If a physical register, this specifies the register that
|
||||||
/// llvm.savestack/llvm.restorestack should save and restore.
|
/// llvm.savestack/llvm.restorestack should save and restore.
|
||||||
unsigned getStackPointerRegisterToSaveRestore() const {
|
unsigned getStackPointerRegisterToSaveRestore() const {
|
||||||
@ -1353,12 +1356,15 @@ protected:
|
|||||||
UseUnderscoreLongJmp = Val;
|
UseUnderscoreLongJmp = Val;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Indicate the number of blocks to generate jump tables rather than if
|
/// Indicate the minimum number of blocks to generate jump tables.
|
||||||
/// sequence.
|
void setMinimumJumpTableEntries(unsigned Val) {
|
||||||
void setMinimumJumpTableEntries(int Val) {
|
|
||||||
MinimumJumpTableEntries = Val;
|
MinimumJumpTableEntries = Val;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Indicate the maximum number of entries in jump tables.
|
||||||
|
/// Set to zero to generate unlimited jump tables.
|
||||||
|
void setMaximumJumpTableSize(unsigned);
|
||||||
|
|
||||||
/// If set to a physical register, this specifies the register that
|
/// If set to a physical register, this specifies the register that
|
||||||
/// llvm.savestack/llvm.restorestack should save and restore.
|
/// llvm.savestack/llvm.restorestack should save and restore.
|
||||||
void setStackPointerRegisterToSaveRestore(unsigned R) {
|
void setStackPointerRegisterToSaveRestore(unsigned R) {
|
||||||
|
@ -8454,12 +8454,19 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
|
|||||||
if (!areJTsAllowed(TLI, SI))
|
if (!areJTsAllowed(TLI, SI))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize();
|
||||||
|
|
||||||
const int64_t N = Clusters.size();
|
const int64_t N = Clusters.size();
|
||||||
const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries();
|
const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries();
|
||||||
|
const unsigned MaxJumpTableSize =
|
||||||
|
OptForSize ? UINT_MAX : TLI.getMaximumJumpTableSize() ?
|
||||||
|
TLI.getMaximumJumpTableSize() : UINT_MAX;
|
||||||
|
|
||||||
|
if (N < 2 || N < MinJumpTableEntries)
|
||||||
|
return;
|
||||||
|
|
||||||
// TotalCases[i]: Total nbr of cases in Clusters[0..i].
|
// TotalCases[i]: Total nbr of cases in Clusters[0..i].
|
||||||
SmallVector<unsigned, 8> TotalCases(N);
|
SmallVector<unsigned, 8> TotalCases(N);
|
||||||
|
|
||||||
for (unsigned i = 0; i < N; ++i) {
|
for (unsigned i = 0; i < N; ++i) {
|
||||||
const APInt &Hi = Clusters[i].High->getValue();
|
const APInt &Hi = Clusters[i].High->getValue();
|
||||||
const APInt &Lo = Clusters[i].Low->getValue();
|
const APInt &Lo = Clusters[i].Low->getValue();
|
||||||
@ -8468,12 +8475,16 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
|
|||||||
TotalCases[i] += TotalCases[i - 1];
|
TotalCases[i] += TotalCases[i - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned MinDensity = JumpTableDensity;
|
const unsigned MinDensity =
|
||||||
if (DefaultMBB->getParent()->getFunction()->optForSize())
|
OptForSize ? OptsizeJumpTableDensity : JumpTableDensity;
|
||||||
MinDensity = OptsizeJumpTableDensity;
|
|
||||||
if (N >= MinJumpTableSize
|
// Cheap case: the whole range may be suitable for jump table.
|
||||||
&& isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) {
|
unsigned JumpTableSize = (Clusters[N - 1].High->getValue() -
|
||||||
// Cheap case: the whole range might be suitable for jump table.
|
Clusters[0].Low->getValue())
|
||||||
|
.getLimitedValue(UINT_MAX - 1) + 1;
|
||||||
|
if (JumpTableSize <= MaxJumpTableSize &&
|
||||||
|
isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) {
|
||||||
|
|
||||||
CaseCluster JTCluster;
|
CaseCluster JTCluster;
|
||||||
if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
|
if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
|
||||||
Clusters[0] = JTCluster;
|
Clusters[0] = JTCluster;
|
||||||
@ -8503,7 +8514,6 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
|
|||||||
// Base case: There is only one way to partition Clusters[N-1].
|
// Base case: There is only one way to partition Clusters[N-1].
|
||||||
MinPartitions[N - 1] = 1;
|
MinPartitions[N - 1] = 1;
|
||||||
LastElement[N - 1] = N - 1;
|
LastElement[N - 1] = N - 1;
|
||||||
assert(MinJumpTableSize > 1);
|
|
||||||
NumTables[N - 1] = 0;
|
NumTables[N - 1] = 0;
|
||||||
|
|
||||||
// Note: loop indexes are signed to avoid underflow.
|
// Note: loop indexes are signed to avoid underflow.
|
||||||
@ -8517,9 +8527,13 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
|
|||||||
// Search for a solution that results in fewer partitions.
|
// Search for a solution that results in fewer partitions.
|
||||||
for (int64_t j = N - 1; j > i; j--) {
|
for (int64_t j = N - 1; j > i; j--) {
|
||||||
// Try building a partition from Clusters[i..j].
|
// Try building a partition from Clusters[i..j].
|
||||||
if (isDense(Clusters, TotalCases, i, j, MinDensity)) {
|
JumpTableSize = (Clusters[j].High->getValue() -
|
||||||
|
Clusters[i].Low->getValue())
|
||||||
|
.getLimitedValue(UINT_MAX - 1) + 1;
|
||||||
|
if (JumpTableSize <= MaxJumpTableSize &&
|
||||||
|
isDense(Clusters, TotalCases, i, j, MinDensity)) {
|
||||||
unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
|
unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
|
||||||
bool IsTable = j - i + 1 >= MinJumpTableSize;
|
bool IsTable = j - i + 1 >= MinJumpTableEntries;
|
||||||
unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]);
|
unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]);
|
||||||
|
|
||||||
// If this j leads to fewer partitions, or same number of partitions
|
// If this j leads to fewer partitions, or same number of partitions
|
||||||
@ -8543,7 +8557,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
|
|||||||
unsigned NumClusters = Last - First + 1;
|
unsigned NumClusters = Last - First + 1;
|
||||||
|
|
||||||
CaseCluster JTCluster;
|
CaseCluster JTCluster;
|
||||||
if (NumClusters >= MinJumpTableSize &&
|
if (NumClusters >= MinJumpTableEntries &&
|
||||||
buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
|
buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
|
||||||
Clusters[DstIndex++] = JTCluster;
|
Clusters[DstIndex++] = JTCluster;
|
||||||
} else {
|
} else {
|
||||||
|
@ -44,6 +44,10 @@ static cl::opt<bool> JumpIsExpensiveOverride(
|
|||||||
cl::desc("Do not create extra branches to split comparison logic."),
|
cl::desc("Do not create extra branches to split comparison logic."),
|
||||||
cl::Hidden);
|
cl::Hidden);
|
||||||
|
|
||||||
|
static cl::opt<unsigned> MaximumJumpTableSize
|
||||||
|
("max-jump-table", cl::init(0), cl::Hidden,
|
||||||
|
cl::desc("Set maximum number of jump table entries; zero for no limit."));
|
||||||
|
|
||||||
// Although this default value is arbitrary, it is not random. It is assumed
|
// Although this default value is arbitrary, it is not random. It is assumed
|
||||||
// that a condition that evaluates the same way by a higher percentage than this
|
// that a condition that evaluates the same way by a higher percentage than this
|
||||||
// is best represented as control flow. Therefore, the default value N should be
|
// is best represented as control flow. Therefore, the default value N should be
|
||||||
@ -1831,3 +1835,11 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
|
|||||||
Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
|
Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned TargetLoweringBase::getMaximumJumpTableSize() const {
|
||||||
|
return MaximumJumpTableSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
|
||||||
|
MaximumJumpTableSize = Val;
|
||||||
|
}
|
||||||
|
@ -513,6 +513,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||||||
setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
|
setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
|
||||||
setPrefLoopAlignment(STI.getPrefLoopAlignment());
|
setPrefLoopAlignment(STI.getPrefLoopAlignment());
|
||||||
|
|
||||||
|
// Only change the limit for entries in a jump table if specified by
|
||||||
|
// the subtarget, but not at the command line.
|
||||||
|
unsigned MaxJT = STI.getMaximumJumpTableSize();
|
||||||
|
if (MaxJT && getMaximumJumpTableSize() == 0)
|
||||||
|
setMaximumJumpTableSize(MaxJT);
|
||||||
|
|
||||||
setHasExtractBitsInsn(true);
|
setHasExtractBitsInsn(true);
|
||||||
|
|
||||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||||
|
@ -65,6 +65,7 @@ void AArch64Subtarget::initializeProperties() {
|
|||||||
case ExynosM1:
|
case ExynosM1:
|
||||||
PrefFunctionAlignment = 4;
|
PrefFunctionAlignment = 4;
|
||||||
PrefLoopAlignment = 3;
|
PrefLoopAlignment = 3;
|
||||||
|
MaxJumpTableSize = 12;
|
||||||
break;
|
break;
|
||||||
case Kryo:
|
case Kryo:
|
||||||
MaxInterleaveFactor = 4;
|
MaxInterleaveFactor = 4;
|
||||||
|
@ -90,6 +90,7 @@ protected:
|
|||||||
unsigned MaxPrefetchIterationsAhead = UINT_MAX;
|
unsigned MaxPrefetchIterationsAhead = UINT_MAX;
|
||||||
unsigned PrefFunctionAlignment = 0;
|
unsigned PrefFunctionAlignment = 0;
|
||||||
unsigned PrefLoopAlignment = 0;
|
unsigned PrefLoopAlignment = 0;
|
||||||
|
unsigned MaxJumpTableSize = 0;
|
||||||
|
|
||||||
// ReserveX18 - X18 is not available as a general purpose register.
|
// ReserveX18 - X18 is not available as a general purpose register.
|
||||||
bool ReserveX18;
|
bool ReserveX18;
|
||||||
@ -203,6 +204,8 @@ public:
|
|||||||
unsigned getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
|
unsigned getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
|
||||||
unsigned getPrefLoopAlignment() const { return PrefLoopAlignment; }
|
unsigned getPrefLoopAlignment() const { return PrefLoopAlignment; }
|
||||||
|
|
||||||
|
unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
|
||||||
|
|
||||||
/// CPU has TBI (top byte of addresses is ignored during HW address
|
/// CPU has TBI (top byte of addresses is ignored during HW address
|
||||||
/// translation) and OS enables it.
|
/// translation) and OS enables it.
|
||||||
bool supportsAddressTopByteIgnored() const;
|
bool supportsAddressTopByteIgnored() const;
|
||||||
|
94
test/CodeGen/AArch64/max-jump-table.ll
Normal file
94
test/CodeGen/AArch64/max-jump-table.ll
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECK0 <%t
|
||||||
|
; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -max-jump-table=4 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECK4 <%t
|
||||||
|
; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -max-jump-table=8 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECK8 <%t
|
||||||
|
; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -mcpu=exynos-m1 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECKM1 <%t
|
||||||
|
|
||||||
|
declare void @ext(i32)
|
||||||
|
|
||||||
|
define i32 @jt1(i32 %a, i32 %b) {
|
||||||
|
entry:
|
||||||
|
switch i32 %a, label %return [
|
||||||
|
i32 1, label %bb1
|
||||||
|
i32 2, label %bb2
|
||||||
|
i32 3, label %bb3
|
||||||
|
i32 4, label %bb4
|
||||||
|
i32 5, label %bb5
|
||||||
|
i32 6, label %bb6
|
||||||
|
i32 7, label %bb7
|
||||||
|
i32 8, label %bb8
|
||||||
|
i32 9, label %bb9
|
||||||
|
i32 10, label %bb10
|
||||||
|
i32 11, label %bb11
|
||||||
|
i32 12, label %bb12
|
||||||
|
i32 13, label %bb13
|
||||||
|
i32 14, label %bb14
|
||||||
|
i32 15, label %bb15
|
||||||
|
i32 16, label %bb16
|
||||||
|
i32 17, label %bb17
|
||||||
|
]
|
||||||
|
; CHECK-LABEL: function jt1:
|
||||||
|
; CHECK: Jump Tables:
|
||||||
|
; CHECK0-NEXT: jt#0:
|
||||||
|
; CHECK0-NOT: jt#1:
|
||||||
|
; CHECK4-NEXT: jt#0:
|
||||||
|
; CHECK4-SAME: jt#1:
|
||||||
|
; CHECK4-SAME: jt#2:
|
||||||
|
; CHECK4-SAME: jt#3:
|
||||||
|
; CHECK4-NOT: jt#4:
|
||||||
|
; CHECK8-NEXT: jt#0:
|
||||||
|
; CHECK8-SAME: jt#1:
|
||||||
|
; CHECK8-SAME: jt#2: BB#14 BB#15 BB#16 BB#17{{$}}
|
||||||
|
; CHECK8-NOT: jt#3:
|
||||||
|
; CHECKM1-NEXT: jt#0:
|
||||||
|
; CHECKM1-SAME: jt#1: BB#13 BB#14 BB#15 BB#16 BB#17{{$}}
|
||||||
|
; CHECKM1-NOT: jt#2:
|
||||||
|
; CHEC-NEXT: Function Live Ins:
|
||||||
|
|
||||||
|
bb1: tail call void @ext(i32 0) br label %return
|
||||||
|
bb2: tail call void @ext(i32 2) br label %return
|
||||||
|
bb3: tail call void @ext(i32 4) br label %return
|
||||||
|
bb4: tail call void @ext(i32 6) br label %return
|
||||||
|
bb5: tail call void @ext(i32 8) br label %return
|
||||||
|
bb6: tail call void @ext(i32 10) br label %return
|
||||||
|
bb7: tail call void @ext(i32 12) br label %return
|
||||||
|
bb8: tail call void @ext(i32 14) br label %return
|
||||||
|
bb9: tail call void @ext(i32 16) br label %return
|
||||||
|
bb10: tail call void @ext(i32 18) br label %return
|
||||||
|
bb11: tail call void @ext(i32 20) br label %return
|
||||||
|
bb12: tail call void @ext(i32 22) br label %return
|
||||||
|
bb13: tail call void @ext(i32 24) br label %return
|
||||||
|
bb14: tail call void @ext(i32 26) br label %return
|
||||||
|
bb15: tail call void @ext(i32 28) br label %return
|
||||||
|
bb16: tail call void @ext(i32 30) br label %return
|
||||||
|
bb17: tail call void @ext(i32 32) br label %return
|
||||||
|
|
||||||
|
return: ret i32 %b
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @jt2(i32 %x) {
|
||||||
|
entry:
|
||||||
|
switch i32 %x, label %return [
|
||||||
|
i32 1, label %bb1
|
||||||
|
i32 2, label %bb2
|
||||||
|
i32 3, label %bb3
|
||||||
|
i32 4, label %bb4
|
||||||
|
|
||||||
|
i32 14, label %bb5
|
||||||
|
i32 15, label %bb6
|
||||||
|
]
|
||||||
|
; CHECK-LABEL: function jt2:
|
||||||
|
; CHECK: Jump Tables:
|
||||||
|
; CHECK0-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#5 BB#6{{$}}
|
||||||
|
; CHECK4-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}}
|
||||||
|
; CHECK8-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}}
|
||||||
|
; CHECKM1-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}}
|
||||||
|
; CHEC-NEXT: Function Live Ins:
|
||||||
|
|
||||||
|
bb1: tail call void @ext(i32 1) br label %return
|
||||||
|
bb2: tail call void @ext(i32 2) br label %return
|
||||||
|
bb3: tail call void @ext(i32 3) br label %return
|
||||||
|
bb4: tail call void @ext(i32 4) br label %return
|
||||||
|
bb5: tail call void @ext(i32 5) br label %return
|
||||||
|
bb6: tail call void @ext(i32 6) br label %return
|
||||||
|
return: ret void
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user