Re-commit r235560: Switch lowering: extract jump tables and bit tests before building binary tree (PR22262)

Third time's the charm. The previous commit was reverted as a
reverse for-loop in SelectionDAGBuilder::lowerWorkItem did 'I--'
on an iterator at the beginning of a vector, causing asserts
when using debugging iterators. This commit fixes that.

llvm-svn: 235608
This commit is contained in:
Hans Wennborg 2015-04-23 16:45:24 +00:00
parent d205a174d9
commit 8823c80ce0
14 changed files with 1325 additions and 913 deletions

File diff suppressed because it is too large Load Diff

View File

@ -134,26 +134,65 @@ private:
/// SDNodes we create.
unsigned SDNodeOrder;
/// Case - A struct to record the Value for a switch case, and the
/// case's target basic block.
struct Case {
const ConstantInt *Low;
const ConstantInt *High;
MachineBasicBlock* BB;
uint32_t ExtraWeight;
enum CaseClusterKind {
/// A cluster of adjacent case labels with the same destination, or just one
/// case.
CC_Range,
/// A cluster of cases suitable for jump table lowering.
CC_JumpTable,
/// A cluster of cases suitable for bit test lowering.
CC_BitTests
};
Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { }
Case(const ConstantInt *low, const ConstantInt *high, MachineBasicBlock *bb,
uint32_t extraweight) : Low(low), High(high), BB(bb),
ExtraWeight(extraweight) { }
/// A cluster of case labels.
struct CaseCluster {
CaseClusterKind Kind;
const ConstantInt *Low, *High;
union {
MachineBasicBlock *MBB;
unsigned JTCasesIndex;
unsigned BTCasesIndex;
};
uint64_t Weight;
APInt size() const {
const APInt &rHigh = High->getValue();
const APInt &rLow = Low->getValue();
return (rHigh - rLow + 1ULL);
static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
MachineBasicBlock *MBB, uint32_t Weight) {
CaseCluster C;
C.Kind = CC_Range;
C.Low = Low;
C.High = High;
C.MBB = MBB;
C.Weight = Weight;
return C;
}
static CaseCluster jumpTable(const ConstantInt *Low,
const ConstantInt *High, unsigned JTCasesIndex,
uint32_t Weight) {
CaseCluster C;
C.Kind = CC_JumpTable;
C.Low = Low;
C.High = High;
C.JTCasesIndex = JTCasesIndex;
C.Weight = Weight;
return C;
}
static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
unsigned BTCasesIndex, uint32_t Weight) {
CaseCluster C;
C.Kind = CC_BitTests;
C.Low = Low;
C.High = High;
C.BTCasesIndex = BTCasesIndex;
C.Weight = Weight;
return C;
}
};
typedef std::vector<CaseCluster> CaseClusterVector;
typedef CaseClusterVector::iterator CaseClusterIt;
struct CaseBits {
uint64_t Mask;
MachineBasicBlock* BB;
@ -163,42 +202,14 @@ private:
CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
uint32_t Weight):
Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { }
CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {}
};
typedef std::vector<Case> CaseVector;
typedef std::vector<CaseBits> CaseBitsVector;
typedef CaseVector::iterator CaseItr;
typedef std::pair<CaseItr, CaseItr> CaseRange;
typedef std::vector<CaseBits> CaseBitsVector;
/// CaseRec - A struct with ctor used in lowering switches to a binary tree
/// of conditional branches.
struct CaseRec {
CaseRec(MachineBasicBlock *bb, const ConstantInt *lt, const ConstantInt *ge,
CaseRange r) :
CaseBB(bb), LT(lt), GE(ge), Range(r) {}
/// CaseBB - The MBB in which to emit the compare and branch
MachineBasicBlock *CaseBB;
/// LT, GE - If nonzero, we know the current case value must be less-than or
/// greater-than-or-equal-to these Constants.
const ConstantInt *LT;
const ConstantInt *GE;
/// Range - A pair of iterators representing the range of case values to be
/// processed at this point in the binary search tree.
CaseRange Range;
};
typedef std::vector<CaseRec> CaseRecVector;
struct CaseBitsCmp {
bool operator()(const CaseBits &C1, const CaseBits &C2) {
return C1.Bits > C2.Bits;
}
};
/// Populate Cases with the cases in SI, clustering adjacent cases with the
/// same destination together.
void Clusterify(CaseVector &Cases, const SwitchInst *SI);
/// Sort Clusters and merge adjacent cases.
void sortAndRangeify(CaseClusterVector &Clusters);
/// CaseBlock - This structure is used to communicate between
/// SelectionDAGBuilder and SDISel for the code generation of additional basic
@ -288,6 +299,58 @@ private:
BitTestInfo Cases;
};
/// Minimum jump table density, in percent.
enum { MinJumpTableDensity = 40 };
/// Check whether a range of clusters is dense enough for a jump table.
bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases,
unsigned First, unsigned Last);
/// Build a jump table cluster from Clusters[First..Last]. Returns false if it
/// decides it's not a good idea.
bool buildJumpTable(CaseClusterVector &Clusters, unsigned First,
unsigned Last, const SwitchInst *SI,
MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
/// Find clusters of cases suitable for jump table lowering.
void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
MachineBasicBlock *DefaultMBB);
/// Check whether the range [Low,High] fits in a machine word.
bool rangeFitsInWord(const APInt &Low, const APInt &High);
/// Check whether these clusters are suitable for lowering with bit tests based
/// on the number of destinations, comparison metric, and range.
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
const APInt &Low, const APInt &High);
/// Build a bit test cluster from Clusters[First..Last]. Returns false if it
/// decides it's not a good idea.
bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last,
const SwitchInst *SI, CaseCluster &BTCluster);
/// Find clusters of cases suitable for bit test lowering.
void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI);
struct SwitchWorkListItem {
MachineBasicBlock *MBB;
CaseClusterIt FirstCluster;
CaseClusterIt LastCluster;
const ConstantInt *GE;
const ConstantInt *LT;
};
typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList;
/// Emit comparison and split W into two subtrees.
void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W,
Value *Cond, MachineBasicBlock *SwitchMBB);
/// Lower W.
void lowerWorkItem(SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *SwitchMBB,
MachineBasicBlock *DefaultMBB);
/// A class which encapsulates all of the information needed to generate a
/// stack protector check and signals to isel via its state being initialized
/// that a stack protector needs to be generated.
@ -670,29 +733,6 @@ private:
void visitIndirectBr(const IndirectBrInst &I);
void visitUnreachable(const UnreachableInst &I);
// Helpers for visitSwitch
bool handleSmallSwitchRange(CaseRec& CR,
CaseRecVector& WorkList,
const Value* SV,
MachineBasicBlock* Default,
MachineBasicBlock *SwitchBB);
bool handleJTSwitchCase(CaseRec& CR,
CaseRecVector& WorkList,
const Value* SV,
MachineBasicBlock* Default,
MachineBasicBlock *SwitchBB);
bool handleBTSplitSwitchCase(CaseRec& CR,
CaseRecVector& WorkList,
const Value* SV,
MachineBasicBlock *SwitchBB);
void splitSwitchCase(CaseRec &CR, CaseItr Pivot, CaseRecVector &WorkList,
const Value *SV, MachineBasicBlock *SwitchBB);
bool handleBitTestsSwitchCase(CaseRec& CR,
CaseRecVector& WorkList,
const Value* SV,
MachineBasicBlock* Default,
MachineBasicBlock *SwitchBB);
uint32_t getEdgeWeight(const MachineBasicBlock *Src,
const MachineBasicBlock *Dst) const;
void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,

View File

@ -1413,21 +1413,15 @@ SelectionDAGISel::FinishBasicBlock() {
<< FuncInfo->PHINodesToUpdate[i].first
<< ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
const bool MustUpdatePHINodes = SDB->SwitchCases.empty() &&
SDB->JTCases.empty() &&
SDB->BitTestCases.empty();
// Next, now that we know what the last MBB the LLVM BB expanded is, update
// PHI nodes in successors.
if (MustUpdatePHINodes) {
for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
assert(PHI->isPHI() &&
"This is not a machine PHI node that we are updating!");
if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
continue;
PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
}
for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
assert(PHI->isPHI() &&
"This is not a machine PHI node that we are updating!");
if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
continue;
PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
}
// Handle stack protector.
@ -1472,10 +1466,6 @@ SelectionDAGISel::FinishBasicBlock() {
SDB->SPDescriptor.resetPerBBState();
}
// If we updated PHI Nodes, return early.
if (MustUpdatePHINodes)
return;
for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
// Lower header first, if it wasn't already lowered
if (!SDB->BitTestCases[i].Emitted) {
@ -1589,16 +1579,6 @@ SelectionDAGISel::FinishBasicBlock() {
}
SDB->JTCases.clear();
// If the switch block involved a branch to one of the actual successors, we
// need to update PHI nodes in that block.
for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
assert(PHI->isPHI() &&
"This is not a machine PHI node that we are updating!");
if (FuncInfo->MBB->isSuccessor(PHI->getParent()))
PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
}
// If we generated any switch lowering information, build and codegen any
// additional DAGs necessary.
for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {

View File

@ -151,12 +151,13 @@ protected:
if (Changed && !ReturnMBB.hasAddressTaken()) {
// We now might be able to merge this blr-only block into its
// by-layout predecessor.
if (ReturnMBB.pred_size() == 1 &&
(*ReturnMBB.pred_begin())->isLayoutSuccessor(&ReturnMBB)) {
// Move the blr into the preceding block.
if (ReturnMBB.pred_size() == 1) {
MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin();
PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
PrevMBB.removeSuccessor(&ReturnMBB);
if (PrevMBB.isLayoutSuccessor(&ReturnMBB) && PrevMBB.canFallThrough()) {
// Move the blr into the preceding block.
PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
PrevMBB.removeSuccessor(&ReturnMBB);
}
}
if (ReturnMBB.pred_empty())

View File

@ -4,8 +4,8 @@
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: t1:
; CHECK: cmp r2, #1
; CHECK: cmpne r2, #7
; CHECK: cmp r2, #7
; CHECK: cmpne r2, #1
switch i32 %c, label %cond_next [
i32 1, label %cond_true
i32 7, label %cond_true

View File

@ -194,7 +194,7 @@ lor.lhs.false459: ; preds = %if.end454
%18 = load i32, i32* %mb_type, align 4
switch i32 %18, label %for.inc503 [
i32 9, label %if.then475
i32 10, label %if.then475
i32 11, label %if.then475
i32 13, label %if.then475
i32 14, label %if.then475
]

View File

@ -17,9 +17,9 @@ entry:
; CHECK: BB#0: derived from LLVM BB %entry
; CHECK: Successors according to CFG: BB#2(64) BB#4(14)
; CHECK: BB#4: derived from LLVM BB %entry
; CHECK: Successors according to CFG: BB#1(10) BB#5(4)
; CHECK: Successors according to CFG: BB#1(4) BB#5(10)
; CHECK: BB#5: derived from LLVM BB %entry
; CHECK: Successors according to CFG: BB#1(4) BB#3(7)
; CHECK: Successors according to CFG: BB#1(10) BB#3(7)
sw.bb:
br label %return

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@ -45,4 +45,37 @@ if.end3: ; preds = %if.then, %if.then2,
; CHECK: blr
}
@.str0 = private unnamed_addr constant [2 x i8] c"a\00"
@.str1 = private unnamed_addr constant [2 x i8] c"b\00"
@.str2 = private unnamed_addr constant [2 x i8] c"c\00"
@.str3 = private unnamed_addr constant [2 x i8] c"d\00"
@.str4 = private unnamed_addr constant [2 x i8] c"e\00"
define i8* @dont_assert(i32 %x) {
; LLVM would assert due to moving an early return into the jump table block and
; removing one of its predecessors despite that block ending with an indirect
; branch.
entry:
switch i32 %x, label %sw.epilog [
i32 1, label %return
i32 2, label %sw.bb1
i32 3, label %sw.bb2
i32 4, label %sw.bb3
i32 255, label %sw.bb4
]
sw.bb1: br label %return
sw.bb2: br label %return
sw.bb3: br label %return
sw.bb4: br label %return
sw.epilog: br label %return
return:
%retval.0 = phi i8* [ null, %sw.epilog ],
[ getelementptr inbounds ([2 x i8], [2 x i8]* @.str4, i64 0, i64 0), %sw.bb4 ],
[ getelementptr inbounds ([2 x i8], [2 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ],
[ getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ],
[ getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ],
[ getelementptr inbounds ([2 x i8], [2 x i8]* @.str0, i64 0, i64 0), %entry ]
ret i8* %retval.0
}
attributes #0 = { nounwind }

View File

@ -1,5 +1,5 @@
; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
; RUN: llc -mcpu=pwr7 -code-model=medium <%s | FileCheck %s
; RUN: llc -mcpu=pwr7 -code-model=large <%s | FileCheck %s
; Test correct code generation for medium and large code model
; for loading the address of a jump table from the TOC.

View File

@ -3,6 +3,12 @@
; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE %s
; Run jump table test separately since jump tables aren't generated at -O0.
; RUN: llc -mcpu=pwr7 -code-model=medium -filetype=obj -fast-isel=false %s -o - | \
; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM-JT %s
; RUN: llc -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE-JT %s
; FIXME: When asm-parse is available, could make this an assembly test.
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
@ -92,6 +98,46 @@ entry:
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM4]]
@ti = common global i32 0, align 4
define signext i32 @test_tentative() nounwind {
entry:
%0 = load i32, i32* @ti, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @ti, align 4
ret i32 %0
}
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
; accessing tentatively declared variable ti.
;
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
;
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
define i8* @test_fnaddr() nounwind {
entry:
%func = alloca i32 (i32)*, align 8
store i32 (i32)* @foo, i32 (i32)** %func, align 8
%0 = load i32 (i32)*, i32 (i32)** %func, align 8
%1 = bitcast i32 (i32)* %0 to i8*
ret i8* %1
}
declare signext i32 @foo(i32 signext)
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
; accessing function address foo.
;
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
;
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
define signext i32 @test_jump_table(i32 signext %i) nounwind {
entry:
%i.addr = alloca i32, align 4
@ -139,47 +185,12 @@ sw.epilog: ; preds = %sw.bb3, %sw.default
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
; accessing a jump table address.
;
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
; MEDIUM-JT: Relocations [
; MEDIUM-JT: Section ({{.*}}) .rela.text {
; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]
;
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
@ti = common global i32 0, align 4
define signext i32 @test_tentative() nounwind {
entry:
%0 = load i32, i32* @ti, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @ti, align 4
ret i32 %0
}
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
; accessing tentatively declared variable ti.
;
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
;
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
define i8* @test_fnaddr() nounwind {
entry:
%func = alloca i32 (i32)*, align 8
store i32 (i32)* @foo, i32 (i32)** %func, align 8
%0 = load i32 (i32)*, i32 (i32)** %func, align 8
%1 = bitcast i32 (i32)* %0 to i8*
ret i8* %1
}
declare signext i32 @foo(i32 signext)
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
; accessing function address foo.
;
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
;
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
; LARGE-JT: Relocations [
; LARGE-JT: Section ({{.*}}) .rela.text {
; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]

View File

@ -55,13 +55,15 @@ entry:
]
bb: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
call void @_Z3bari( i32 0 )
br label %bb1
bb1: ; preds = %bb, %entry
call void @_Z3bari( i32 1 )
br label %bb2
bb2: ; preds = %bb1, %entry
call void @_Z3bari( i32 1 )
call void @_Z3bari( i32 2 )
br label %bb11
bb3: ; preds = %entry

View File

@ -140,19 +140,17 @@ sw.epilog:
; The balanced binary switch here would start with a comparison against 39, but
; it is currently starting with 29 because of the density-sum heuristic.
; CHECK: cmpl $29
; CHECK: cmpl $39
; CHECK: jg
; CHECK: cmpl $10
; CHECK: jne
; CHECK: cmpl $49
; CHECK: jg
; CHECK: cmpl $30
; CHECK: jne
; CHECK: je
; CHECK: cmpl $20
; CHECK: jne
; CHECK: cmpl $40
; CHECK: je
; CHECK: cmpl $50
; CHECK: jne
; CHECK: cmpl $40
; CHECK: cmpl $30
; CHECK: jne
; CHECK: cmpl $60
; CHECK: jne

306
test/CodeGen/X86/switch.ll Normal file
View File

@ -0,0 +1,306 @@
; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s
; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -O0 | FileCheck --check-prefix=NOOPT %s
declare void @g(i32)
define void @basic(i32 %x) {
entry:
switch i32 %x, label %return [
i32 3, label %bb0
i32 1, label %bb1
i32 4, label %bb1
i32 5, label %bb0
]
bb0: tail call void @g(i32 0) br label %return
bb1: tail call void @g(i32 1) br label %return
return: ret void
; Should be lowered as straight compares in -O0 mode.
; NOOPT-LABEL: basic
; NOOPT: subl $3, %eax
; NOOPT: je
; NOOPT: subl $1, %eax
; NOOPT: je
; NOOPT: subl $4, %eax
; NOOPT: je
; NOOPT: subl $5, %eax
; NOOPT: je
; Jump table otherwise.
; CHECK-LABEL: basic
; CHECK: decl
; CHECK: cmpl $4
; CHECK: ja
; CHECK: jmpq *.LJTI
}
define void @simple_ranges(i32 %x) {
entry:
switch i32 %x, label %return [
i32 0, label %bb0
i32 1, label %bb0
i32 2, label %bb0
i32 3, label %bb0
i32 100, label %bb1
i32 101, label %bb1
i32 102, label %bb1
i32 103, label %bb1
]
bb0: tail call void @g(i32 0) br label %return
bb1: tail call void @g(i32 1) br label %return
return: ret void
; Should be lowered to two range checks.
; CHECK-LABEL: simple_ranges
; CHECK: leal -100
; CHECK: cmpl $4
; CHECK: jae
; CHECK: cmpl $3
; CHECK: ja
}
define void @jt_is_better(i32 %x) {
entry:
switch i32 %x, label %return [
i32 0, label %bb0
i32 2, label %bb0
i32 4, label %bb0
i32 1, label %bb1
i32 3, label %bb1
i32 5, label %bb1
i32 6, label %bb2
i32 7, label %bb3
i32 8, label %bb4
]
bb0: tail call void @g(i32 0) br label %return
bb1: tail call void @g(i32 1) br label %return
bb2: tail call void @g(i32 2) br label %return
bb3: tail call void @g(i32 3) br label %return
bb4: tail call void @g(i32 4) br label %return
return: ret void
; Cases 0-5 could be lowered with two bit tests,
; but with 6-8, the whole switch is suitable for a jump table.
; CHECK-LABEL: jt_is_better
; CHECK: cmpl $8
; CHECK: jbe
; CHECK: jmpq *.LJTI
}
define void @bt_is_better(i32 %x) {
entry:
switch i32 %x, label %return [
i32 0, label %bb0
i32 3, label %bb0
i32 6, label %bb0
i32 1, label %bb1
i32 4, label %bb1
i32 7, label %bb1
i32 2, label %bb2
i32 5, label %bb2
i32 8, label %bb2
]
bb0: tail call void @g(i32 0) br label %return
bb1: tail call void @g(i32 1) br label %return
bb2: tail call void @g(i32 2) br label %return
return: ret void
; This could be lowered as a jump table, but bit tests is more efficient.
; CHECK-LABEL: bt_is_better
; 73 = 2^0 + 2^3 + 2^6
; CHECK: movl $73
; CHECK: btl
; 146 = 2^1 + 2^4 + 2^7
; CHECK: movl $146
; CHECK: btl
; 292 = 2^2 + 2^5 + 2^8
; CHECK: movl $292
; CHECK: btl
}
define void @optimal_pivot1(i32 %x) {
entry:
switch i32 %x, label %return [
i32 100, label %bb0
i32 200, label %bb1
i32 300, label %bb0
i32 400, label %bb1
i32 500, label %bb0
i32 600, label %bb1
]
bb0: tail call void @g(i32 0) br label %return
bb1: tail call void @g(i32 1) br label %return
return: ret void
; Should pivot around 400 for two subtrees of equal size.
; CHECK-LABEL: optimal_pivot1
; CHECK-NOT: cmpl
; CHECK: cmpl $399
}
define void @optimal_pivot2(i32 %x) {
entry:
switch i32 %x, label %return [
i32 100, label %bb0 i32 101, label %bb1 i32 102, label %bb2 i32 103, label %bb3
i32 200, label %bb0 i32 201, label %bb1 i32 202, label %bb2 i32 203, label %bb3
i32 300, label %bb0 i32 301, label %bb1 i32 302, label %bb2 i32 303, label %bb3
i32 400, label %bb0 i32 401, label %bb1 i32 402, label %bb2 i32 403, label %bb3
]
bb0: tail call void @g(i32 0) br label %return
bb1: tail call void @g(i32 1) br label %return
bb2: tail call void @g(i32 2) br label %return
bb3: tail call void @g(i32 3) br label %return
return: ret void
; Should pivot around 300 for two subtrees with two jump tables each.
; CHECK-LABEL: optimal_pivot2
; CHECK-NOT: cmpl
; CHECK: cmpl $299
; CHECK: jmpq *.LJTI
; CHECK: jmpq *.LJTI
; CHECK: jmpq *.LJTI
; CHECK: jmpq *.LJTI
}
define void @optimal_jump_table1(i32 %x) {
entry:
switch i32 %x, label %return [
i32 0, label %bb0
i32 5, label %bb1
i32 6, label %bb2
i32 12, label %bb3
i32 13, label %bb4
i32 15, label %bb5
]
bb0: tail call void @g(i32 0) br label %return
bb1: tail call void @g(i32 1) br label %return
bb2: tail call void @g(i32 2) br label %return
bb3: tail call void @g(i32 3) br label %return
bb4: tail call void @g(i32 4) br label %return
bb5: tail call void @g(i32 5) br label %return
return: ret void
; Splitting in the largest gap (between 6 and 12) would yield suboptimal result.
; Expecting a jump table from 5 to 15.
; CHECK-LABEL: optimal_jump_table1
; CHECK: leal -5
; CHECK: cmpl $10
; CHECK: jmpq *.LJTI
}
define void @optimal_jump_table2(i32 %x) {
entry:
switch i32 %x, label %return [
i32 0, label %bb0
i32 1, label %bb1
i32 2, label %bb2
i32 9, label %bb3
i32 14, label %bb4
i32 15, label %bb5
]
bb0: tail call void @g(i32 0) br label %return
bb1: tail call void @g(i32 1) br label %return
bb2: tail call void @g(i32 2) br label %return
bb3: tail call void @g(i32 3) br label %return
bb4: tail call void @g(i32 4) br label %return
bb5: tail call void @g(i32 5) br label %return
return: ret void
; Partitioning the cases to the minimum number of dense sets is not good enough.
; This can be partitioned as {0,1,2,9},{14,15} or {0,1,2},{9,14,15}. The former
; should be preferred. Expecting a table from 0-9.
; CHECK-LABEL: optimal_jump_table2
; CHECK: cmpl $9
; CHECK: jmpq *.LJTI
}
define void @optimal_jump_table3(i32 %x) {
entry:
switch i32 %x, label %return [
i32 1, label %bb0
i32 2, label %bb1
i32 3, label %bb2
i32 10, label %bb3
i32 13, label %bb0
i32 14, label %bb1
i32 15, label %bb2
i32 20, label %bb3
i32 25, label %bb4
]
bb0: tail call void @g(i32 0) br label %return
bb1: tail call void @g(i32 1) br label %return
bb2: tail call void @g(i32 2) br label %return
bb3: tail call void @g(i32 3) br label %return
bb4: tail call void @g(i32 4) br label %return
return: ret void
; Splitting to maximize left-right density sum and gap size would split this
; between 3 and 10, and then between 20 and 25. It's better to build a table
; from 1-20.
; CHECK-LABEL: optimal_jump_table3
; CHECK: leal -1
; CHECK: cmpl $19
; CHECK: jmpq *.LJTI
}
%struct.S = type { %struct.S*, i32 }
define void @phi_node_trouble(%struct.S* %s) {
entry:
br label %header
header:
%ptr = phi %struct.S* [ %s, %entry ], [ %next, %loop ]
%bool = icmp eq %struct.S* %ptr, null
br i1 %bool, label %exit, label %loop
loop:
%nextptr = getelementptr inbounds %struct.S, %struct.S* %ptr, i64 0, i32 0
%next = load %struct.S*, %struct.S** %nextptr
%xptr = getelementptr inbounds %struct.S, %struct.S* %next, i64 0, i32 1
%x = load i32, i32* %xptr
switch i32 %x, label %exit [
i32 4, label %header
i32 36, label %exit2
i32 69, label %exit2
i32 25, label %exit2
]
exit:
ret void
exit2:
ret void
; This will be lowered to a comparison with 4 and then bit tests. Make sure
; that the phi node in %header gets a value from the comparison block.
; CHECK-LABEL: phi_node_trouble
; CHECK: movq (%[[REG1:[a-z]+]]), %[[REG1]]
; CHECK: movl 8(%[[REG1]]), %[[REG2:[a-z]+]]
; CHECK: cmpl $4, %[[REG2]]
}
define void @default_only(i32 %x) {
entry:
br label %sw
return:
ret void
sw:
switch i32 %x, label %return [
]
; Branch directly to the default.
; (In optimized builds the switch is removed earlier.)
; NOOPT-LABEL: default_only
; NOOPT: .[[L:[A-Z0-9_]+]]:
; NOOPT-NEXT: retq
; NOOPT: jmp .[[L]]
}

View File

@ -1,8 +1,8 @@
;; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 \
;; RUN: llc -verify-machineinstrs \
;; RUN: -mtriple=armv7-linux-gnueabi -filetype=obj %s -o - | \
;; RUN: llvm-readobj -t | FileCheck -check-prefix=ARM %s
;; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 \
;; RUN: llc -verify-machineinstrs \
;; RUN: -mtriple=thumbv7-linux-gnueabi -filetype=obj %s -o - | \
;; RUN: llvm-readobj -t | FileCheck -check-prefix=TMB %s
@ -11,102 +11,25 @@
define void @foo(i32* %ptr) nounwind ssp {
%tmp = load i32, i32* %ptr, align 4
switch i32 %tmp, label %default [
i32 11, label %bb0
i32 10, label %bb1
i32 8, label %bb2
i32 4, label %bb3
i32 2, label %bb4
i32 6, label %bb5
i32 9, label %bb6
i32 15, label %bb7
i32 1, label %bb8
i32 3, label %bb9
i32 5, label %bb10
i32 30, label %bb11
i32 31, label %bb12
i32 13, label %bb13
i32 14, label %bb14
i32 20, label %bb15
i32 19, label %bb16
i32 17, label %bb17
i32 18, label %bb18
i32 21, label %bb19
i32 22, label %bb20
i32 16, label %bb21
i32 24, label %bb22
i32 25, label %bb23
i32 26, label %bb24
i32 27, label %bb25
i32 28, label %bb26
i32 23, label %bb27
i32 12, label %bb28
switch i32 %tmp, label %exit [
i32 0, label %bb0
i32 1, label %bb1
i32 2, label %bb2
i32 3, label %bb3
]
default:
br label %exit
bb0:
store i32 0, i32* %ptr, align 4
br label %exit
bb1:
store i32 1, i32* %ptr, align 4
br label %exit
bb2:
store i32 2, i32* %ptr, align 4
br label %exit
bb3:
store i32 3, i32* %ptr, align 4
br label %exit
bb4:
br label %exit
bb5:
br label %exit
bb6:
br label %exit
bb7:
br label %exit
bb8:
br label %exit
bb9:
br label %exit
bb10:
br label %exit
bb11:
br label %exit
bb12:
br label %exit
bb13:
br label %exit
bb14:
br label %exit
bb15:
br label %exit
bb16:
br label %exit
bb17:
br label %exit
bb18:
br label %exit
bb19:
br label %exit
bb20:
br label %exit
bb21:
br label %exit
bb22:
br label %exit
bb23:
br label %exit
bb24:
br label %exit
bb25:
br label %exit
bb26:
br label %exit
bb27:
br label %exit
bb28:
br label %exit
exit:
ret void
}