From 44fc35bb88668cda0d1639d25f2fd9f1a2d209bd Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Mon, 26 Sep 2016 15:32:33 +0000 Subject: [PATCH] Add support to optionally limit the size of jump tables. Many high-performance processors have a dedicated branch predictor for indirect branches, commonly used with jump tables. As sophisticated as such branch predictors are, they tend to have well defined limits beyond which their effectiveness is hampered or even nullified. One such limit is the number of possible destinations for a given indirect branches that such branch predictors can handle. This patch considers a limit that a target may set to the number of destination addresses in a jump table. Patch by: Evandro Menezes , Aditya Kumar , Sebastian Pop . Differential revision: https://reviews.llvm.org/D21940 llvm-svn: 282412 --- include/llvm/Target/TargetLowering.h | 18 ++-- .../SelectionDAG/SelectionDAGBuilder.cpp | 38 +++++--- lib/CodeGen/TargetLoweringBase.cpp | 12 +++ lib/Target/AArch64/AArch64ISelLowering.cpp | 6 ++ lib/Target/AArch64/AArch64Subtarget.cpp | 1 + lib/Target/AArch64/AArch64Subtarget.h | 3 + test/CodeGen/AArch64/max-jump-table.ll | 94 +++++++++++++++++++ 7 files changed, 154 insertions(+), 18 deletions(-) create mode 100644 test/CodeGen/AArch64/max-jump-table.ll diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 93bdeaaff0e..66eb4179ff6 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -1022,12 +1022,15 @@ public: return UseUnderscoreLongJmp; } - /// Return integer threshold on number of blocks to use jump tables rather - /// than if sequence. - int getMinimumJumpTableEntries() const { + /// Return lower limit for number of blocks in a jump table. + unsigned getMinimumJumpTableEntries() const { return MinimumJumpTableEntries; } + /// Return upper limit for number of entries in a jump table. + /// Zero if no limit. + unsigned getMaximumJumpTableSize() const; + /// If a physical register, this specifies the register that /// llvm.savestack/llvm.restorestack should save and restore. unsigned getStackPointerRegisterToSaveRestore() const { @@ -1353,12 +1356,15 @@ protected: UseUnderscoreLongJmp = Val; } - /// Indicate the number of blocks to generate jump tables rather than if - /// sequence. - void setMinimumJumpTableEntries(int Val) { + /// Indicate the minimum number of blocks to generate jump tables. + void setMinimumJumpTableEntries(unsigned Val) { MinimumJumpTableEntries = Val; } + /// Indicate the maximum number of entries in jump tables. + /// Set to zero to generate unlimited jump tables. + void setMaximumJumpTableSize(unsigned); + /// If set to a physical register, this specifies the register that /// llvm.savestack/llvm.restorestack should save and restore. void setStackPointerRegisterToSaveRestore(unsigned R) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index a0db9ae34f8..5a4d49d9368 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8454,12 +8454,19 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, if (!areJTsAllowed(TLI, SI)) return; + const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize(); + const int64_t N = Clusters.size(); - const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries(); + const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries(); + const unsigned MaxJumpTableSize = + OptForSize ? UINT_MAX : TLI.getMaximumJumpTableSize() ? + TLI.getMaximumJumpTableSize() : UINT_MAX; + + if (N < 2 || N < MinJumpTableEntries) + return; // TotalCases[i]: Total nbr of cases in Clusters[0..i]. SmallVector TotalCases(N); - for (unsigned i = 0; i < N; ++i) { const APInt &Hi = Clusters[i].High->getValue(); const APInt &Lo = Clusters[i].Low->getValue(); @@ -8468,12 +8475,16 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, TotalCases[i] += TotalCases[i - 1]; } - unsigned MinDensity = JumpTableDensity; - if (DefaultMBB->getParent()->getFunction()->optForSize()) - MinDensity = OptsizeJumpTableDensity; - if (N >= MinJumpTableSize - && isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) { - // Cheap case: the whole range might be suitable for jump table. + const unsigned MinDensity = + OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; + + // Cheap case: the whole range may be suitable for jump table. + unsigned JumpTableSize = (Clusters[N - 1].High->getValue() - + Clusters[0].Low->getValue()) + .getLimitedValue(UINT_MAX - 1) + 1; + if (JumpTableSize <= MaxJumpTableSize && + isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) { + CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { Clusters[0] = JTCluster; @@ -8503,7 +8514,6 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, // Base case: There is only one way to partition Clusters[N-1]. MinPartitions[N - 1] = 1; LastElement[N - 1] = N - 1; - assert(MinJumpTableSize > 1); NumTables[N - 1] = 0; // Note: loop indexes are signed to avoid underflow. @@ -8517,9 +8527,13 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, // Search for a solution that results in fewer partitions. for (int64_t j = N - 1; j > i; j--) { // Try building a partition from Clusters[i..j]. - if (isDense(Clusters, TotalCases, i, j, MinDensity)) { + JumpTableSize = (Clusters[j].High->getValue() - + Clusters[i].Low->getValue()) + .getLimitedValue(UINT_MAX - 1) + 1; + if (JumpTableSize <= MaxJumpTableSize && + isDense(Clusters, TotalCases, i, j, MinDensity)) { unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); - bool IsTable = j - i + 1 >= MinJumpTableSize; + bool IsTable = j - i + 1 >= MinJumpTableEntries; unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]); // If this j leads to fewer partitions, or same number of partitions @@ -8543,7 +8557,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, unsigned NumClusters = Last - First + 1; CaseCluster JTCluster; - if (NumClusters >= MinJumpTableSize && + if (NumClusters >= MinJumpTableEntries && buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) { Clusters[DstIndex++] = JTCluster; } else { diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 4c11f2131f2..be66f7e9e0e 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -44,6 +44,10 @@ static cl::opt JumpIsExpensiveOverride( cl::desc("Do not create extra branches to split comparison logic."), cl::Hidden); +static cl::opt MaximumJumpTableSize + ("max-jump-table", cl::init(0), cl::Hidden, + cl::desc("Set maximum number of jump table entries; zero for no limit.")); + // Although this default value is arbitrary, it is not random. It is assumed // that a condition that evaluates the same way by a higher percentage than this // is best represented as control flow. Therefore, the default value N should be @@ -1831,3 +1835,11 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const { Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { return nullptr; } + +unsigned TargetLoweringBase::getMaximumJumpTableSize() const { + return MaximumJumpTableSize; +} + +void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) { + MaximumJumpTableSize = Val; +} diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index a611549c912..7647ea2fd65 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -513,6 +513,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setPrefFunctionAlignment(STI.getPrefFunctionAlignment()); setPrefLoopAlignment(STI.getPrefLoopAlignment()); + // Only change the limit for entries in a jump table if specified by + // the subtarget, but not at the command line. + unsigned MaxJT = STI.getMaximumJumpTableSize(); + if (MaxJT && getMaximumJumpTableSize() == 0) + setMaximumJumpTableSize(MaxJT); + setHasExtractBitsInsn(true); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index d257e81a952..2d346cb512a 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -65,6 +65,7 @@ void AArch64Subtarget::initializeProperties() { case ExynosM1: PrefFunctionAlignment = 4; PrefLoopAlignment = 3; + MaxJumpTableSize = 12; break; case Kryo: MaxInterleaveFactor = 4; diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 0ab823b4e6b..bda29196388 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -90,6 +90,7 @@ protected: unsigned MaxPrefetchIterationsAhead = UINT_MAX; unsigned PrefFunctionAlignment = 0; unsigned PrefLoopAlignment = 0; + unsigned MaxJumpTableSize = 0; // ReserveX18 - X18 is not available as a general purpose register. bool ReserveX18; @@ -203,6 +204,8 @@ public: unsigned getPrefFunctionAlignment() const { return PrefFunctionAlignment; } unsigned getPrefLoopAlignment() const { return PrefLoopAlignment; } + unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } + /// CPU has TBI (top byte of addresses is ignored during HW address /// translation) and OS enables it. bool supportsAddressTopByteIgnored() const; diff --git a/test/CodeGen/AArch64/max-jump-table.ll b/test/CodeGen/AArch64/max-jump-table.ll new file mode 100644 index 00000000000..9b46ff64904 --- /dev/null +++ b/test/CodeGen/AArch64/max-jump-table.ll @@ -0,0 +1,94 @@ +; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECK0 <%t +; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -max-jump-table=4 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECK4 <%t +; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -max-jump-table=8 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECK8 <%t +; RUN: llc %s -O2 -print-machineinstrs -march=aarch64 -jump-table-density=40 -mcpu=exynos-m1 -o - 2>%t; FileCheck %s --check-prefixes=CHECK,CHECKM1 <%t + +declare void @ext(i32) + +define i32 @jt1(i32 %a, i32 %b) { +entry: + switch i32 %a, label %return [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + i32 5, label %bb5 + i32 6, label %bb6 + i32 7, label %bb7 + i32 8, label %bb8 + i32 9, label %bb9 + i32 10, label %bb10 + i32 11, label %bb11 + i32 12, label %bb12 + i32 13, label %bb13 + i32 14, label %bb14 + i32 15, label %bb15 + i32 16, label %bb16 + i32 17, label %bb17 + ] +; CHECK-LABEL: function jt1: +; CHECK: Jump Tables: +; CHECK0-NEXT: jt#0: +; CHECK0-NOT: jt#1: +; CHECK4-NEXT: jt#0: +; CHECK4-SAME: jt#1: +; CHECK4-SAME: jt#2: +; CHECK4-SAME: jt#3: +; CHECK4-NOT: jt#4: +; CHECK8-NEXT: jt#0: +; CHECK8-SAME: jt#1: +; CHECK8-SAME: jt#2: BB#14 BB#15 BB#16 BB#17{{$}} +; CHECK8-NOT: jt#3: +; CHECKM1-NEXT: jt#0: +; CHECKM1-SAME: jt#1: BB#13 BB#14 BB#15 BB#16 BB#17{{$}} +; CHECKM1-NOT: jt#2: +; CHEC-NEXT: Function Live Ins: + +bb1: tail call void @ext(i32 0) br label %return +bb2: tail call void @ext(i32 2) br label %return +bb3: tail call void @ext(i32 4) br label %return +bb4: tail call void @ext(i32 6) br label %return +bb5: tail call void @ext(i32 8) br label %return +bb6: tail call void @ext(i32 10) br label %return +bb7: tail call void @ext(i32 12) br label %return +bb8: tail call void @ext(i32 14) br label %return +bb9: tail call void @ext(i32 16) br label %return +bb10: tail call void @ext(i32 18) br label %return +bb11: tail call void @ext(i32 20) br label %return +bb12: tail call void @ext(i32 22) br label %return +bb13: tail call void @ext(i32 24) br label %return +bb14: tail call void @ext(i32 26) br label %return +bb15: tail call void @ext(i32 28) br label %return +bb16: tail call void @ext(i32 30) br label %return +bb17: tail call void @ext(i32 32) br label %return + +return: ret i32 %b +} + +define void @jt2(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + + i32 14, label %bb5 + i32 15, label %bb6 + ] +; CHECK-LABEL: function jt2: +; CHECK: Jump Tables: +; CHECK0-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#5 BB#6{{$}} +; CHECK4-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}} +; CHECK8-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}} +; CHECKM1-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}} +; CHEC-NEXT: Function Live Ins: + +bb1: tail call void @ext(i32 1) br label %return +bb2: tail call void @ext(i32 2) br label %return +bb3: tail call void @ext(i32 3) br label %return +bb4: tail call void @ext(i32 4) br label %return +bb5: tail call void @ext(i32 5) br label %return +bb6: tail call void @ext(i32 6) br label %return +return: ret void +}