ARM: allow jump tables to be placed as constant islands.

Previously, they were forced to immediately follow the actual branch
instruction. This was usually OK (the LEAs actually accessing them got emitted
nearby, and weren't usually separated much afterwards). Unfortunately, a
sufficiently nasty phi elimination dumps many instructions right before the
basic block terminator, and this can increase the range too much.

This patch frees them up to be placed as usual by the constant islands pass,
and consequently has to slightly modify the form of TBB/TBH tables to refer to
a PC-relative label at the final jump. The other jump table formats were
already position-independent.

rdar://20813304

llvm-svn: 237590
This commit is contained in:
Tim Northover 2015-05-18 17:10:40 +00:00
parent 2982d4d795
commit 12c41af07c
11 changed files with 365 additions and 173 deletions

View File

@ -922,17 +922,14 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
OutStreamer->EmitValue(Expr, Size);
}
void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
unsigned Opcode = MI->getOpcode();
int OpNum = 1;
if (Opcode == ARM::BR_JTadd)
OpNum = 2;
else if (Opcode == ARM::BR_JTm)
OpNum = 3;
const MachineOperand &MO1 = MI->getOperand(OpNum);
void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) {
const MachineOperand &MO1 = MI->getOperand(1);
unsigned JTI = MO1.getIndex();
// Make sure the Thumb jump table is 4-byte aligned. This will be a nop for
// ARM mode tables.
EmitAlignment(2);
// Emit a label for the jump table.
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
OutStreamer->EmitLabel(JTISymbol);
@ -972,10 +969,8 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
}
void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
unsigned Opcode = MI->getOpcode();
int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1;
const MachineOperand &MO1 = MI->getOperand(OpNum);
void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) {
const MachineOperand &MO1 = MI->getOperand(1);
unsigned JTI = MO1.getIndex();
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
@ -985,42 +980,56 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
unsigned OffsetWidth = 4;
if (MI->getOpcode() == ARM::t2TBB_JT) {
OffsetWidth = 1;
// Mark the jump table as data-in-code.
OutStreamer->EmitDataRegion(MCDR_DataRegionJT8);
} else if (MI->getOpcode() == ARM::t2TBH_JT) {
OffsetWidth = 2;
// Mark the jump table as data-in-code.
OutStreamer->EmitDataRegion(MCDR_DataRegionJT16);
}
for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
MachineBasicBlock *MBB = JTBBs[i];
const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
OutContext);
// If this isn't a TBB or TBH, the entries are direct branch instructions.
if (OffsetWidth == 4) {
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2B)
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2B)
.addExpr(MBBSymbolExpr)
.addImm(ARMCC::AL)
.addReg(0));
continue;
}
}
}
void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
unsigned OffsetWidth) {
assert((OffsetWidth == 1 || OffsetWidth == 2) && "invalid tbb/tbh width");
const MachineOperand &MO1 = MI->getOperand(1);
unsigned JTI = MO1.getIndex();
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
OutStreamer->EmitLabel(JTISymbol);
// Emit each entry of the table.
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
// Mark the jump table as data-in-code.
OutStreamer->EmitDataRegion(OffsetWidth == 1 ? MCDR_DataRegionJT8
: MCDR_DataRegionJT16);
for (auto MBB : JTBBs) {
const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
OutContext);
// Otherwise it's an offset from the dispatch instruction. Construct an
// MCExpr for the entry. We want a value of the form:
// (BasicBlockAddr - TableBeginAddr) / 2
// (BasicBlockAddr - TBBInstAddr + 4) / 2
//
// For example, a TBB table with entries jumping to basic blocks BB0 and BB1
// would look like:
// LJTI_0_0:
// .byte (LBB0 - LJTI_0_0) / 2
// .byte (LBB1 - LJTI_0_0) / 2
const MCExpr *Expr =
MCBinaryExpr::CreateSub(MBBSymbolExpr,
MCSymbolRefExpr::Create(JTISymbol, OutContext),
OutContext);
// .byte (LBB0 - (LCPI0_0 + 4)) / 2
// .byte (LBB1 - (LCPI0_0 + 4)) / 2
// where LCPI0_0 is a label defined just before the TBB instruction using
// this table.
MCSymbol *TBInstPC = GetCPISymbol(MI->getOperand(0).getImm());
const MCExpr *Expr = MCBinaryExpr::CreateAdd(
MCSymbolRefExpr::Create(TBInstPC, OutContext),
MCConstantExpr::Create(4, OutContext), OutContext);
Expr = MCBinaryExpr::CreateSub(MBBSymbolExpr, Expr, OutContext);
Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext),
OutContext);
OutStreamer->EmitValue(Expr, OffsetWidth);
@ -1028,8 +1037,10 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
// Mark the end of jump table data-in-code region. 32-bit offsets use
// actual branch instructions here, so we don't mark those as a data-region
// at all.
if (OffsetWidth != 4)
OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
// Make sure the next instruction is 2-byte aligned.
EmitAlignment(1);
}
void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
@ -1501,6 +1512,16 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitGlobalConstant(MCPE.Val.ConstVal);
return;
}
case ARM::JUMPTABLE_ADDRS:
EmitJumpTableAddrs(MI);
return;
case ARM::JUMPTABLE_INSTS:
EmitJumpTableInsts(MI);
return;
case ARM::JUMPTABLE_TBB:
case ARM::JUMPTABLE_TBH:
EmitJumpTableTBInst(MI, MI->getOpcode() == ARM::JUMPTABLE_TBB ? 1 : 2);
return;
case ARM::t2BR_JT: {
// Lower and emit the instruction itself, then the jump table following it.
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
@ -1509,37 +1530,19 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Add predicate operands.
.addImm(ARMCC::AL)
.addReg(0));
// Output the data for the jump table itself
EmitJump2Table(MI);
return;
}
case ARM::t2TBB_JT: {
// Lower and emit the instruction itself, then the jump table following it.
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2TBB)
.addReg(ARM::PC)
.addReg(MI->getOperand(0).getReg())
// Add predicate operands.
.addImm(ARMCC::AL)
.addReg(0));
// Output the data for the jump table itself
EmitJump2Table(MI);
// Make sure the next instruction is 2-byte aligned.
EmitAlignment(1);
return;
}
case ARM::t2TBB_JT:
case ARM::t2TBH_JT: {
// Lower and emit the instruction itself, then the jump table following it.
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2TBH)
.addReg(ARM::PC)
.addReg(MI->getOperand(0).getReg())
// Add predicate operands.
.addImm(ARMCC::AL)
.addReg(0));
// Output the data for the jump table itself
EmitJump2Table(MI);
unsigned Opc = MI->getOpcode() == ARM::t2TBB_JT ? ARM::t2TBB : ARM::t2TBH;
// Lower and emit the PC label, then the instruction itself.
OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm()));
EmitToStreamer(*OutStreamer, MCInstBuilder(Opc)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
// Add predicate operands.
.addImm(ARMCC::AL)
.addReg(0));
return;
}
case ARM::tBR_JTr:
@ -1559,13 +1562,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (Opc == ARM::MOVr)
TmpInst.addOperand(MCOperand::createReg(0));
EmitToStreamer(*OutStreamer, TmpInst);
// Make sure the Thumb jump table is 4-byte aligned.
if (Opc == ARM::tMOVr)
EmitAlignment(2);
// Output the data for the jump table itself
EmitJumpTable(MI);
return;
}
case ARM::BR_JTm: {
@ -1589,9 +1585,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
TmpInst.addOperand(MCOperand::createImm(ARMCC::AL));
TmpInst.addOperand(MCOperand::createReg(0));
EmitToStreamer(*OutStreamer, TmpInst);
// Output the data for the jump table itself
EmitJumpTable(MI);
return;
}
case ARM::BR_JTadd: {
@ -1606,9 +1599,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(0)
// Add 's' bit operand (always reg0 for this)
.addReg(0));
// Output the data for the jump table itself
EmitJumpTable(MI);
return;
}
case ARM::SPACE:

View File

@ -71,8 +71,9 @@ public:
void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
const MCSubtargetInfo *EndInfo) const override;
void EmitJumpTable(const MachineInstr *MI);
void EmitJump2Table(const MachineInstr *MI);
void EmitJumpTableAddrs(const MachineInstr *MI);
void EmitJumpTableInsts(const MachineInstr *MI);
void EmitJumpTableTBInst(const MachineInstr *MI, unsigned OffsetWidth);
void EmitInstruction(const MachineInstr *MI) override;
bool runOnMachineFunction(MachineFunction &F) override;

View File

@ -627,6 +627,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case ARM::t2MOVi32imm:
return 8;
case ARM::CONSTPOOL_ENTRY:
case ARM::JUMPTABLE_INSTS:
case ARM::JUMPTABLE_ADDRS:
case ARM::JUMPTABLE_TBB:
case ARM::JUMPTABLE_TBH:
// If this machine instr is a constant pool entry, its size is recorded as
// operand #2.
return MI->getOperand(2).getImm();
@ -641,42 +645,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case ARM::t2Int_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp_nofp:
return 12;
case ARM::BR_JTr:
case ARM::BR_JTm:
case ARM::BR_JTadd:
case ARM::tBR_JTr:
case ARM::t2BR_JT:
case ARM::t2TBB_JT:
case ARM::t2TBH_JT: {
// These are jumptable branches, i.e. a branch followed by an inlined
// jumptable. The size is 4 + 4 * number of entries. For TBB, each
// entry is one byte; TBH two byte each.
unsigned EntrySize = (Opc == ARM::t2TBB_JT)
? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
unsigned NumOps = MCID.getNumOperands();
MachineOperand JTOP =
MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1));
unsigned JTI = JTOP.getIndex();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
assert(MJTI != nullptr);
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
assert(JTI < JT.size());
// Thumb instructions are 2 byte aligned, but JT entries are 4 byte
// 4 aligned. The assembler / linker may add 2 byte padding just before
// the JT entries. The size does not include this padding; the
// constant islands pass does separate bookkeeping for it.
// FIXME: If we know the size of the function is less than (1 << 16) *2
// bytes, we can use 16-bit entries instead. Then there won't be an
// alignment issue.
unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
unsigned NumEntries = JT[JTI].MBBs.size();
if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
// Make sure the instruction that follows TBB is 2-byte aligned.
// FIXME: Constant island pass should insert an "ALIGN" instruction
// instead.
++NumEntries;
return NumEntries * EntrySize + InstSize;
}
case ARM::SPACE:
return MI->getOperand(1).getImm();
}

View File

@ -180,9 +180,7 @@ namespace {
MachineInstr *MI;
MachineInstr *CPEMI;
MachineBasicBlock *HighWaterMark;
private:
unsigned MaxDisp;
public:
bool NegOk;
bool IsSoImm;
bool KnownAlignment;
@ -216,12 +214,24 @@ namespace {
};
/// CPEntries - Keep track of all of the constant pool entry machine
/// instructions. For each original constpool index (i.e. those that
/// existed upon entry to this pass), it keeps a vector of entries.
/// Original elements are cloned as we go along; the clones are
/// put in the vector of the original element, but have distinct CPIs.
/// instructions. For each original constpool index (i.e. those that existed
/// upon entry to this pass), it keeps a vector of entries. Original
/// elements are cloned as we go along; the clones are put in the vector of
/// the original element, but have distinct CPIs.
///
/// The first half of CPEntries contains generic constants, the second half
/// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up
/// which vector it will be in here.
std::vector<std::vector<CPEntry> > CPEntries;
/// Maps a JT index to the offset in CPEntries containing copies of that
/// table. The equivalent map for a CONSTPOOL_ENTRY is the identity.
DenseMap<int, int> JumpTableEntryIndices;
/// Maps a JT index to the LEA that actually uses the index to calculate its
/// base address.
DenseMap<int, int> JumpTableUserIndices;
/// ImmBranch - One per immediate branch, keeping the machine instruction
/// pointer, conditional or unconditional, the max displacement,
/// and (if isCond is true) the corresponding unconditional branch
@ -269,7 +279,8 @@ namespace {
}
private:
void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
void doInitialConstPlacement(std::vector<MachineInstr *> &CPEMIs);
void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs);
bool BBHasFallthrough(MachineBasicBlock *MBB);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
unsigned getCPELogAlign(const MachineInstr *CPEMI);
@ -279,6 +290,7 @@ namespace {
void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
void adjustBBOffsetsAfter(MachineBasicBlock *BB);
bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
unsigned getCombinedIndex(const MachineInstr *CPEMI);
int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
bool findAvailableWater(CPUser&U, unsigned UserOffset,
water_iterator &WaterIter);
@ -413,7 +425,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
// we put them all at the end of the function.
std::vector<MachineInstr*> CPEMIs;
if (!MCP->isEmpty())
doInitialPlacement(CPEMIs);
doInitialConstPlacement(CPEMIs);
if (MF->getJumpTableInfo())
doInitialJumpTablePlacement(CPEMIs);
/// The next UID to take is the first unused one.
AFI->initPICLabelUId(CPEMIs.size());
@ -478,7 +493,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
const CPEntry & CPE = CPEntries[i][j];
AFI->recordCPEClone(i, CPE.CPI);
if (CPE.CPEMI && CPE.CPEMI->getOperand(1).isCPI())
AFI->recordCPEClone(i, CPE.CPI);
}
}
@ -488,6 +504,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
WaterList.clear();
CPUsers.clear();
CPEntries.clear();
JumpTableEntryIndices.clear();
JumpTableUserIndices.clear();
ImmBranches.clear();
PushPopMIs.clear();
T2JumpTables.clear();
@ -495,10 +513,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
return MadeChange;
}
/// doInitialPlacement - Perform the initial placement of the constant pool
/// entries. To start with, we put them all at the end of the function.
/// \brief Perform the initial placement of the regular constant pool entries.
/// To start with, we put them all at the end of the function.
void
ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) {
// Create the basic block to hold the CPE's.
MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
MF->push_back(BB);
@ -556,6 +574,66 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
DEBUG(BB->dump());
}
/// \brief Do initial placement of the jump tables. Because Thumb2's TBB and TBH
/// instructions can be made more efficient if the jump table immediately
/// follows the instruction, it's best to place them immediately next to their
/// jumps to begin with. In almost all cases they'll never be moved from that
/// position.
void ARMConstantIslands::doInitialJumpTablePlacement(
std::vector<MachineInstr *> &CPEMIs) {
unsigned i = CPEntries.size();
auto MJTI = MF->getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
for (MachineBasicBlock &MBB : *MF) {
auto MI = MBB.getLastNonDebugInstr();
unsigned JTOpcode;
switch (MI->getOpcode()) {
default:
continue;
case ARM::BR_JTadd:
case ARM::BR_JTr:
case ARM::tBR_JTr:
case ARM::BR_JTm:
JTOpcode = ARM::JUMPTABLE_ADDRS;
break;
case ARM::t2BR_JT:
JTOpcode = ARM::JUMPTABLE_INSTS;
break;
case ARM::t2TBB_JT:
JTOpcode = ARM::JUMPTABLE_TBB;
break;
case ARM::t2TBH_JT:
JTOpcode = ARM::JUMPTABLE_TBH;
break;
}
unsigned NumOps = MI->getDesc().getNumOperands();
MachineOperand JTOp =
MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1));
unsigned JTI = JTOp.getIndex();
unsigned Size = JT[JTI].MBBs.size() * sizeof(uint32_t);
MachineBasicBlock *JumpTableBB = MF->CreateMachineBasicBlock();
MF->insert(std::next(MachineFunction::iterator(MBB)), JumpTableBB);
MachineInstr *CPEMI = BuildMI(*JumpTableBB, JumpTableBB->begin(),
DebugLoc(), TII->get(JTOpcode))
.addImm(i++)
.addJumpTableIndex(JTI)
.addImm(Size);
CPEMIs.push_back(CPEMI);
CPEntries.emplace_back(1, CPEntry(CPEMI, JTI));
JumpTableEntryIndices.insert(std::make_pair(JTI, CPEntries.size() - 1));
if (!LastCorrectlyNumberedBB)
LastCorrectlyNumberedBB = &MBB;
}
// If we did anything then we need to renumber the subsequent blocks.
if (LastCorrectlyNumberedBB)
MF->RenumberBlocks(LastCorrectlyNumberedBB);
}
/// BBHasFallthrough - Return true if the specified basic block can fallthrough
/// into the block immediately after it.
bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
@ -595,9 +673,21 @@ ARMConstantIslands::CPEntry
/// getCPELogAlign - Returns the required alignment of the constant pool entry
/// represented by CPEMI. Alignment is measured in log2(bytes) units.
unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
switch (CPEMI->getOpcode()) {
case ARM::CONSTPOOL_ENTRY:
break;
case ARM::JUMPTABLE_TBB:
return 0;
case ARM::JUMPTABLE_TBH:
case ARM::JUMPTABLE_INSTS:
return 1;
case ARM::JUMPTABLE_ADDRS:
return 2;
default:
llvm_unreachable("unknown constpool entry kind");
}
unsigned CPI = CPEMI->getOperand(1).getIndex();
unsigned CPI = getCombinedIndex(CPEMI);
assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
unsigned Align = MCP->getConstants()[CPI].getAlignment();
assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
@ -706,12 +796,14 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
PushPopMIs.push_back(I);
if (Opc == ARM::CONSTPOOL_ENTRY)
if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS ||
Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB ||
Opc == ARM::JUMPTABLE_TBH)
continue;
// Scan the instructions for constant pool operands.
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
if (I->getOperand(op).isCPI()) {
if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) {
// We found one. The addressing mode tells us the max displacement
// from the PC that this instruction permits.
@ -727,6 +819,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// Taking the address of a CP entry.
case ARM::LEApcrel:
case ARM::LEApcrelJT:
// This takes a SoImm, which is 8 bit immediate rotated. We'll
// pretend the maximum offset is 255 * 4. Since each instruction
// 4 byte wide, this is always correct. We'll check for other
@ -737,10 +830,12 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
IsSoImm = true;
break;
case ARM::t2LEApcrel:
case ARM::t2LEApcrelJT:
Bits = 12;
NegOk = true;
break;
case ARM::tLEApcrel:
case ARM::tLEApcrelJT:
Bits = 8;
Scale = 4;
break;
@ -768,6 +863,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// Remember that this is a user of a CP entry.
unsigned CPI = I->getOperand(op).getIndex();
if (I->getOperand(op).isJTI()) {
JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size()));
CPI = JumpTableEntryIndices[CPI];
}
MachineInstr *CPEMI = CPEMIs[CPI];
unsigned MaxOffs = ((1 << Bits)-1) * Scale;
CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
@ -1101,6 +1201,13 @@ bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
return false;
}
unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) {
if (CPEMI->getOperand(1).isCPI())
return CPEMI->getOperand(1).getIndex();
return JumpTableEntryIndices[CPEMI->getOperand(1).getIndex()];
}
/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
/// if not, see if an in-range clone of the CPE is in range, and if so,
/// change the data structures so the user references the clone. Returns:
@ -1120,7 +1227,7 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
}
// No. Look for previously created clones of the CPE that are in range.
unsigned CPI = CPEMI->getOperand(1).getIndex();
unsigned CPI = getCombinedIndex(CPEMI);
std::vector<CPEntry> &CPEs = CPEntries[CPI];
for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
// We already tried this one
@ -1365,7 +1472,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
unsigned CPI = CPEMI->getOperand(1).getIndex();
unsigned CPI = getCombinedIndex(CPEMI);
unsigned Size = CPEMI->getOperand(2).getImm();
// Compute this only once, it's expensive.
unsigned UserOffset = getUserOffset(U);
@ -1429,17 +1536,17 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// Update internal data structures to account for the newly inserted MBB.
updateForInsertedWaterBlock(NewIsland);
// Decrement the old entry, and remove it if refcount becomes 0.
decrementCPEReferenceCount(CPI, CPEMI);
// Now that we have an island to add the CPE to, clone the original CPE and
// add it to the island.
U.HighWaterMark = NewIsland;
U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
.addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc())
.addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size);
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
++NumCPEs;
// Decrement the old entry, and remove it if refcount becomes 0.
decrementCPEReferenceCount(CPI, CPEMI);
// Mark the basic block as aligned as required by the const-pool entry.
NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
@ -1917,6 +2024,19 @@ unsigned ARMConstantIslands::removeDeadDefinitions(MachineInstr *MI,
return BytesRemoved;
}
/// \brief Returns whether CPEMI is the first instruction in the block
/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
/// we can switch the first register to PC and usually remove the address
/// calculation that preceeded it.
static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
MachineFunction::iterator MBB = JTMI->getParent();
MachineFunction *MF = MBB->getParent();
++MBB;
return MBB != MF->end() && MBB->begin() != MBB->end() &&
&*MBB->begin() == CPEMI;
}
/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
/// jumptables when it's possible.
bool ARMConstantIslands::optimizeThumb2JumpTables() {
@ -1955,37 +2075,73 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
break;
}
if (ByteOk || HalfWordOk) {
MachineBasicBlock *MBB = MI->getParent();
unsigned BaseReg = MI->getOperand(0).getReg();
bool BaseRegKill = MI->getOperand(0).isKill();
if (!BaseRegKill)
continue;
unsigned IdxReg = MI->getOperand(1).getReg();
bool IdxRegKill = MI->getOperand(1).isKill();
if (!ByteOk && !HalfWordOk)
continue;
DEBUG(dbgs() << "Shrink JT: " << *MI);
unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
MachineBasicBlock::iterator MI_JT = MI;
MachineInstr *NewJTMI =
MachineBasicBlock *MBB = MI->getParent();
unsigned BaseReg = MI->getOperand(0).getReg();
bool BaseRegKill = MI->getOperand(0).isKill();
if (!BaseRegKill)
continue;
unsigned IdxReg = MI->getOperand(1).getReg();
bool IdxRegKill = MI->getOperand(1).isKill();
DEBUG(dbgs() << "Shrink JT: " << *MI);
CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
MachineInstr *CPEMI = User.CPEMI;
unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
MachineBasicBlock::iterator MI_JT = MI;
MachineInstr *NewJTMI =
BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
.addReg(IdxReg, getKillRegState(IdxRegKill))
.addJumpTableIndex(JTI, JTOP.getTargetFlags());
DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
// FIXME: Insert an "ALIGN" instruction to ensure the next instruction
// is 2-byte aligned. For now, asm printer will fix it up.
unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
unsigned OrigSize = TII->GetInstSizeInBytes(MI);
unsigned DeadSize = removeDeadDefinitions(MI, BaseReg, IdxReg);
MI->eraseFromParent();
.addReg(BaseReg, getKillRegState(BaseRegKill))
.addReg(IdxReg, getKillRegState(IdxRegKill))
.addJumpTableIndex(JTI, JTOP.getTargetFlags())
.addImm(CPEMI->getOperand(0).getImm());
DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
int delta = OrigSize - NewSize + DeadSize;
BBInfo[MBB->getNumber()].Size -= delta;
adjustBBOffsetsAfter(MBB);
unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH;
CPEMI->setDesc(TII->get(JTOpc));
++NumTBs;
MadeChange = true;
// Now we need to determine whether the actual jump table has been moved
// from immediately after this instruction. If not, we can replace BaseReg
// with PC and probably eliminate the BaseReg calculations.
unsigned DeadSize = 0;
if (jumpTableFollowsTB(NewJTMI, User.CPEMI)) {
NewJTMI->getOperand(0).setReg(ARM::PC);
NewJTMI->getOperand(0).setIsKill(false);
DeadSize = removeDeadDefinitions(MI, BaseReg, IdxReg);
if (!User.MI->getParent()) {
// The LEA was eliminated, the TBB instruction becomes the only new user
// of the jump table.
User.MI = NewJTMI;
User.MaxDisp = 4;
User.NegOk = false;
User.IsSoImm = false;
User.KnownAlignment = false;
} else {
// The LEA couldn't be eliminated, so we must add another CPUser to
// record the TBB or TBH use.
int CPEntryIdx = JumpTableEntryIndices[JTI];
auto &CPEs = CPEntries[CPEntryIdx];
auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) {
return E.CPEMI == User.CPEMI;
});
++Entry->RefCount;
CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false));
}
}
unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
unsigned OrigSize = TII->GetInstSizeInBytes(MI);
MI->eraseFromParent();
int Delta = OrigSize - NewSize + DeadSize;
BBInfo[MBB->getNumber()].Size -= Delta;
adjustBBOffsetsAfter(MBB);
++NumTBs;
MadeChange = true;
}
return MadeChange;

View File

@ -1826,6 +1826,32 @@ def CONSTPOOL_ENTRY :
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
i32imm:$size), NoItinerary, []>;
/// A jumptable consisting of direct 32-bit addresses of the destination basic
/// blocks (either absolute, or relative to the start of the jump-table in PIC
/// mode). Used mostly in ARM and Thumb-1 modes.
def JUMPTABLE_ADDRS :
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
i32imm:$size), NoItinerary, []>;
/// A jumptable consisting of 32-bit jump instructions. Used for Thumb-2 tables
/// that cannot be optimised to use TBB or TBH.
def JUMPTABLE_INSTS :
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
i32imm:$size), NoItinerary, []>;
/// A jumptable consisting of 8-bit unsigned integers representing offsets from
/// a TBB instruction.
def JUMPTABLE_TBB :
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
i32imm:$size), NoItinerary, []>;
/// A jumptable consisting of 16-bit unsigned integers representing offsets from
/// a TBH instruction.
def JUMPTABLE_TBH :
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
i32imm:$size), NoItinerary, []>;
// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
// from removing one half of the matched pairs. That breaks PEI, which assumes
// these will always be in pairs, and asserts if it finds otherwise. Better way?
@ -2224,7 +2250,7 @@ let isBranch = 1, isTerminator = 1 in {
[(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>,
Sched<[WriteBr]>;
let isNotDuplicable = 1, isIndirectBranch = 1 in {
let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
def BR_JTr : ARMPseudoInst<(outs),
(ins GPR:$target, i32imm:$jt),
0, IIC_Br,

View File

@ -526,6 +526,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
0, IIC_Br,
[(ARMbrjt tGPR:$target, tjumptable:$jt)]>,
Sched<[WriteBrTbl]> {
let Size = 2;
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
}
}

View File

@ -3531,20 +3531,20 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
let AsmMatchConverter = "cvtThumbBranches";
}
let isNotDuplicable = 1, isIndirectBranch = 1 in {
let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
def t2BR_JT : t2PseudoInst<(outs),
(ins GPR:$target, GPR:$index, i32imm:$jt),
0, IIC_Br,
[(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt)]>,
Sched<[WriteBr]>;
// FIXME: Add a non-pc based case that can be predicated.
// FIXME: Add a case that can be predicated.
def t2TBB_JT : t2PseudoInst<(outs),
(ins GPR:$index, i32imm:$jt), 0, IIC_Br, []>,
(ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
Sched<[WriteBr]>;
def t2TBH_JT : t2PseudoInst<(outs),
(ins GPR:$index, i32imm:$jt), 0, IIC_Br, []>,
(ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
Sched<[WriteBr]>;
def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,

View File

@ -0,0 +1,40 @@
; RUN: llc -mtriple=armv7-apple-ios8.0 -o - %s | FileCheck %s
%BigInt = type i5500
define %BigInt @test_moved_jumptable(i1 %tst, i32 %sw, %BigInt %l) {
; CHECK-LABEL: test_moved_jumptable:
; CHECK: adr {{r[0-9]+}}, [[JUMP_TABLE:LJTI[0-9]+_[0-9]+]]
; CHECK: b [[SKIP_TABLE:LBB[0-9]+_[0-9]+]]
; CHECK: [[JUMP_TABLE]]:
; CHECK: .data_region jt32
; CHECK: .long LBB{{[0-9]+_[0-9]+}}-[[JUMP_TABLE]]
; CHECK: [[SKIP_TABLE]]:
; CHECK: add pc, {{r[0-9]+}}, {{r[0-9]+}}
br i1 %tst, label %simple, label %complex
simple:
br label %end
complex:
switch i32 %sw, label %simple [ i32 0, label %other
i32 1, label %third
i32 5, label %end
i32 6, label %other ]
third:
ret %BigInt 0
other:
call void @bar()
unreachable
end:
%val = phi %BigInt [ %l, %complex ], [ -1, %simple ]
ret %BigInt %val
}
declare void @bar()

View File

@ -2,8 +2,8 @@
; test that we print the label of a bb that is only used in a jump table.
; CHECK: .long LBB0_2
; CHECK: LBB0_2:
; CHECK: .long [[JUMPTABLE_DEST:LBB[0-9]+_[0-9]+]]
; CHECK: [[JUMPTABLE_DEST]]:
define i32 @calculate() {
entry:

View File

@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=thumbv7-linux-gnueabihf -O1 %s -o - | FileCheck %s
; CHECK-LABEL: test_jump_table:
; CHECK: b .LBB
; CHECK: b{{.*}} .LBB
; CHECK-NOT: tbh
define i32 @test_jump_table(i32 %x, float %in) {

View File

@ -14,9 +14,19 @@ declare void @Z_fatal(i8*) noreturn nounwind
declare noalias i8* @calloc(i32, i32) nounwind
; Jump tables are not anchored next to the TBB/TBH any more. Make sure the
; correct address is still calculated (i.e. via a PC-relative symbol *at* the
; TBB/TBH).
define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
; CHECK-LABEL: main:
; CHECK: tbb
; CHECK-NOT: adr {{r[0-9]+}}, LJTI
; CHECK: [[PCREL_ANCHOR:LCPI[0-9]+_[0-9]+]]:
; CHECK-NEXT: tbb [pc, {{r[0-9]+}}]
; CHECK: LJTI0_0:
; CHECK-NEXT: .data_region jt8
; CHECK-NEXT: .byte (LBB{{[0-9]+_[0-9]+}}-([[PCREL_ANCHOR]]+4))/2
entry:
br label %bb42.i