mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-14 15:39:06 +00:00
ARM: recommit r237590: allow jump tables to be placed as constant islands.
The original version didn't properly account for the base register being modified before the final jump, so caused miscompilations in Chromium and LLVM. I've fixed this and tested with an LLVM self-host (I don't have the means to build & test Chromium). The general idea remains the same: in pathological cases jump tables can be too far away from the instructions referencing them (like other constants) so they need to be movable. Should fix PR23627. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238680 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d79915c6fe
commit
876dd978b8
@ -922,17 +922,14 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
|
||||
OutStreamer->EmitValue(Expr, Size);
|
||||
}
|
||||
|
||||
void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
|
||||
unsigned Opcode = MI->getOpcode();
|
||||
int OpNum = 1;
|
||||
if (Opcode == ARM::BR_JTadd)
|
||||
OpNum = 2;
|
||||
else if (Opcode == ARM::BR_JTm)
|
||||
OpNum = 3;
|
||||
|
||||
const MachineOperand &MO1 = MI->getOperand(OpNum);
|
||||
void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) {
|
||||
const MachineOperand &MO1 = MI->getOperand(1);
|
||||
unsigned JTI = MO1.getIndex();
|
||||
|
||||
// Make sure the Thumb jump table is 4-byte aligned. This will be a nop for
|
||||
// ARM mode tables.
|
||||
EmitAlignment(2);
|
||||
|
||||
// Emit a label for the jump table.
|
||||
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
|
||||
OutStreamer->EmitLabel(JTISymbol);
|
||||
@ -972,10 +969,8 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
|
||||
OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
|
||||
}
|
||||
|
||||
void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
|
||||
unsigned Opcode = MI->getOpcode();
|
||||
int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1;
|
||||
const MachineOperand &MO1 = MI->getOperand(OpNum);
|
||||
void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) {
|
||||
const MachineOperand &MO1 = MI->getOperand(1);
|
||||
unsigned JTI = MO1.getIndex();
|
||||
|
||||
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
|
||||
@ -985,42 +980,56 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
|
||||
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
|
||||
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
|
||||
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
|
||||
unsigned OffsetWidth = 4;
|
||||
if (MI->getOpcode() == ARM::t2TBB_JT) {
|
||||
OffsetWidth = 1;
|
||||
// Mark the jump table as data-in-code.
|
||||
OutStreamer->EmitDataRegion(MCDR_DataRegionJT8);
|
||||
} else if (MI->getOpcode() == ARM::t2TBH_JT) {
|
||||
OffsetWidth = 2;
|
||||
// Mark the jump table as data-in-code.
|
||||
OutStreamer->EmitDataRegion(MCDR_DataRegionJT16);
|
||||
}
|
||||
|
||||
for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
|
||||
MachineBasicBlock *MBB = JTBBs[i];
|
||||
const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::create(MBB->getSymbol(),
|
||||
OutContext);
|
||||
// If this isn't a TBB or TBH, the entries are direct branch instructions.
|
||||
if (OffsetWidth == 4) {
|
||||
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2B)
|
||||
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2B)
|
||||
.addExpr(MBBSymbolExpr)
|
||||
.addImm(ARMCC::AL)
|
||||
.addReg(0));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
|
||||
unsigned OffsetWidth) {
|
||||
assert((OffsetWidth == 1 || OffsetWidth == 2) && "invalid tbb/tbh width");
|
||||
const MachineOperand &MO1 = MI->getOperand(1);
|
||||
unsigned JTI = MO1.getIndex();
|
||||
|
||||
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
|
||||
OutStreamer->EmitLabel(JTISymbol);
|
||||
|
||||
// Emit each entry of the table.
|
||||
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
|
||||
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
|
||||
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
|
||||
|
||||
// Mark the jump table as data-in-code.
|
||||
OutStreamer->EmitDataRegion(OffsetWidth == 1 ? MCDR_DataRegionJT8
|
||||
: MCDR_DataRegionJT16);
|
||||
|
||||
for (auto MBB : JTBBs) {
|
||||
const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::create(MBB->getSymbol(),
|
||||
OutContext);
|
||||
// Otherwise it's an offset from the dispatch instruction. Construct an
|
||||
// MCExpr for the entry. We want a value of the form:
|
||||
// (BasicBlockAddr - TableBeginAddr) / 2
|
||||
// (BasicBlockAddr - TBBInstAddr + 4) / 2
|
||||
//
|
||||
// For example, a TBB table with entries jumping to basic blocks BB0 and BB1
|
||||
// would look like:
|
||||
// LJTI_0_0:
|
||||
// .byte (LBB0 - LJTI_0_0) / 2
|
||||
// .byte (LBB1 - LJTI_0_0) / 2
|
||||
const MCExpr *Expr =
|
||||
MCBinaryExpr::createSub(MBBSymbolExpr,
|
||||
MCSymbolRefExpr::create(JTISymbol, OutContext),
|
||||
OutContext);
|
||||
// .byte (LBB0 - (LCPI0_0 + 4)) / 2
|
||||
// .byte (LBB1 - (LCPI0_0 + 4)) / 2
|
||||
// where LCPI0_0 is a label defined just before the TBB instruction using
|
||||
// this table.
|
||||
MCSymbol *TBInstPC = GetCPISymbol(MI->getOperand(0).getImm());
|
||||
const MCExpr *Expr = MCBinaryExpr::createAdd(
|
||||
MCSymbolRefExpr::create(TBInstPC, OutContext),
|
||||
MCConstantExpr::create(4, OutContext), OutContext);
|
||||
Expr = MCBinaryExpr::createSub(MBBSymbolExpr, Expr, OutContext);
|
||||
Expr = MCBinaryExpr::createDiv(Expr, MCConstantExpr::create(2, OutContext),
|
||||
OutContext);
|
||||
OutStreamer->EmitValue(Expr, OffsetWidth);
|
||||
@ -1028,8 +1037,10 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
|
||||
// Mark the end of jump table data-in-code region. 32-bit offsets use
|
||||
// actual branch instructions here, so we don't mark those as a data-region
|
||||
// at all.
|
||||
if (OffsetWidth != 4)
|
||||
OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
|
||||
OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
|
||||
|
||||
// Make sure the next instruction is 2-byte aligned.
|
||||
EmitAlignment(1);
|
||||
}
|
||||
|
||||
void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
|
||||
@ -1501,6 +1512,16 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
EmitGlobalConstant(MCPE.Val.ConstVal);
|
||||
return;
|
||||
}
|
||||
case ARM::JUMPTABLE_ADDRS:
|
||||
EmitJumpTableAddrs(MI);
|
||||
return;
|
||||
case ARM::JUMPTABLE_INSTS:
|
||||
EmitJumpTableInsts(MI);
|
||||
return;
|
||||
case ARM::JUMPTABLE_TBB:
|
||||
case ARM::JUMPTABLE_TBH:
|
||||
EmitJumpTableTBInst(MI, MI->getOpcode() == ARM::JUMPTABLE_TBB ? 1 : 2);
|
||||
return;
|
||||
case ARM::t2BR_JT: {
|
||||
// Lower and emit the instruction itself, then the jump table following it.
|
||||
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
|
||||
@ -1509,37 +1530,19 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
// Add predicate operands.
|
||||
.addImm(ARMCC::AL)
|
||||
.addReg(0));
|
||||
|
||||
// Output the data for the jump table itself
|
||||
EmitJump2Table(MI);
|
||||
return;
|
||||
}
|
||||
case ARM::t2TBB_JT: {
|
||||
// Lower and emit the instruction itself, then the jump table following it.
|
||||
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2TBB)
|
||||
.addReg(ARM::PC)
|
||||
.addReg(MI->getOperand(0).getReg())
|
||||
// Add predicate operands.
|
||||
.addImm(ARMCC::AL)
|
||||
.addReg(0));
|
||||
|
||||
// Output the data for the jump table itself
|
||||
EmitJump2Table(MI);
|
||||
// Make sure the next instruction is 2-byte aligned.
|
||||
EmitAlignment(1);
|
||||
return;
|
||||
}
|
||||
case ARM::t2TBB_JT:
|
||||
case ARM::t2TBH_JT: {
|
||||
// Lower and emit the instruction itself, then the jump table following it.
|
||||
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2TBH)
|
||||
.addReg(ARM::PC)
|
||||
.addReg(MI->getOperand(0).getReg())
|
||||
// Add predicate operands.
|
||||
.addImm(ARMCC::AL)
|
||||
.addReg(0));
|
||||
|
||||
// Output the data for the jump table itself
|
||||
EmitJump2Table(MI);
|
||||
unsigned Opc = MI->getOpcode() == ARM::t2TBB_JT ? ARM::t2TBB : ARM::t2TBH;
|
||||
// Lower and emit the PC label, then the instruction itself.
|
||||
OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm()));
|
||||
EmitToStreamer(*OutStreamer, MCInstBuilder(Opc)
|
||||
.addReg(MI->getOperand(0).getReg())
|
||||
.addReg(MI->getOperand(1).getReg())
|
||||
// Add predicate operands.
|
||||
.addImm(ARMCC::AL)
|
||||
.addReg(0));
|
||||
return;
|
||||
}
|
||||
case ARM::tBR_JTr:
|
||||
@ -1559,13 +1562,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
if (Opc == ARM::MOVr)
|
||||
TmpInst.addOperand(MCOperand::createReg(0));
|
||||
EmitToStreamer(*OutStreamer, TmpInst);
|
||||
|
||||
// Make sure the Thumb jump table is 4-byte aligned.
|
||||
if (Opc == ARM::tMOVr)
|
||||
EmitAlignment(2);
|
||||
|
||||
// Output the data for the jump table itself
|
||||
EmitJumpTable(MI);
|
||||
return;
|
||||
}
|
||||
case ARM::BR_JTm: {
|
||||
@ -1589,9 +1585,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
TmpInst.addOperand(MCOperand::createImm(ARMCC::AL));
|
||||
TmpInst.addOperand(MCOperand::createReg(0));
|
||||
EmitToStreamer(*OutStreamer, TmpInst);
|
||||
|
||||
// Output the data for the jump table itself
|
||||
EmitJumpTable(MI);
|
||||
return;
|
||||
}
|
||||
case ARM::BR_JTadd: {
|
||||
@ -1606,9 +1599,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
.addReg(0)
|
||||
// Add 's' bit operand (always reg0 for this)
|
||||
.addReg(0));
|
||||
|
||||
// Output the data for the jump table itself
|
||||
EmitJumpTable(MI);
|
||||
return;
|
||||
}
|
||||
case ARM::SPACE:
|
||||
|
@ -71,8 +71,9 @@ public:
|
||||
void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
|
||||
const MCSubtargetInfo *EndInfo) const override;
|
||||
|
||||
void EmitJumpTable(const MachineInstr *MI);
|
||||
void EmitJump2Table(const MachineInstr *MI);
|
||||
void EmitJumpTableAddrs(const MachineInstr *MI);
|
||||
void EmitJumpTableInsts(const MachineInstr *MI);
|
||||
void EmitJumpTableTBInst(const MachineInstr *MI, unsigned OffsetWidth);
|
||||
void EmitInstruction(const MachineInstr *MI) override;
|
||||
bool runOnMachineFunction(MachineFunction &F) override;
|
||||
|
||||
|
@ -627,6 +627,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
|
||||
case ARM::t2MOVi32imm:
|
||||
return 8;
|
||||
case ARM::CONSTPOOL_ENTRY:
|
||||
case ARM::JUMPTABLE_INSTS:
|
||||
case ARM::JUMPTABLE_ADDRS:
|
||||
case ARM::JUMPTABLE_TBB:
|
||||
case ARM::JUMPTABLE_TBH:
|
||||
// If this machine instr is a constant pool entry, its size is recorded as
|
||||
// operand #2.
|
||||
return MI->getOperand(2).getImm();
|
||||
@ -641,42 +645,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
|
||||
case ARM::t2Int_eh_sjlj_setjmp:
|
||||
case ARM::t2Int_eh_sjlj_setjmp_nofp:
|
||||
return 12;
|
||||
case ARM::BR_JTr:
|
||||
case ARM::BR_JTm:
|
||||
case ARM::BR_JTadd:
|
||||
case ARM::tBR_JTr:
|
||||
case ARM::t2BR_JT:
|
||||
case ARM::t2TBB_JT:
|
||||
case ARM::t2TBH_JT: {
|
||||
// These are jumptable branches, i.e. a branch followed by an inlined
|
||||
// jumptable. The size is 4 + 4 * number of entries. For TBB, each
|
||||
// entry is one byte; TBH two byte each.
|
||||
unsigned EntrySize = (Opc == ARM::t2TBB_JT)
|
||||
? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
|
||||
unsigned NumOps = MCID.getNumOperands();
|
||||
MachineOperand JTOP =
|
||||
MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1));
|
||||
unsigned JTI = JTOP.getIndex();
|
||||
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
|
||||
assert(MJTI != nullptr);
|
||||
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
|
||||
assert(JTI < JT.size());
|
||||
// Thumb instructions are 2 byte aligned, but JT entries are 4 byte
|
||||
// 4 aligned. The assembler / linker may add 2 byte padding just before
|
||||
// the JT entries. The size does not include this padding; the
|
||||
// constant islands pass does separate bookkeeping for it.
|
||||
// FIXME: If we know the size of the function is less than (1 << 16) *2
|
||||
// bytes, we can use 16-bit entries instead. Then there won't be an
|
||||
// alignment issue.
|
||||
unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
|
||||
unsigned NumEntries = JT[JTI].MBBs.size();
|
||||
if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
|
||||
// Make sure the instruction that follows TBB is 2-byte aligned.
|
||||
// FIXME: Constant island pass should insert an "ALIGN" instruction
|
||||
// instead.
|
||||
++NumEntries;
|
||||
return NumEntries * EntrySize + InstSize;
|
||||
}
|
||||
case ARM::SPACE:
|
||||
return MI->getOperand(1).getImm();
|
||||
}
|
||||
|
@ -180,9 +180,7 @@ namespace {
|
||||
MachineInstr *MI;
|
||||
MachineInstr *CPEMI;
|
||||
MachineBasicBlock *HighWaterMark;
|
||||
private:
|
||||
unsigned MaxDisp;
|
||||
public:
|
||||
bool NegOk;
|
||||
bool IsSoImm;
|
||||
bool KnownAlignment;
|
||||
@ -216,12 +214,24 @@ namespace {
|
||||
};
|
||||
|
||||
/// CPEntries - Keep track of all of the constant pool entry machine
|
||||
/// instructions. For each original constpool index (i.e. those that
|
||||
/// existed upon entry to this pass), it keeps a vector of entries.
|
||||
/// Original elements are cloned as we go along; the clones are
|
||||
/// put in the vector of the original element, but have distinct CPIs.
|
||||
/// instructions. For each original constpool index (i.e. those that existed
|
||||
/// upon entry to this pass), it keeps a vector of entries. Original
|
||||
/// elements are cloned as we go along; the clones are put in the vector of
|
||||
/// the original element, but have distinct CPIs.
|
||||
///
|
||||
/// The first half of CPEntries contains generic constants, the second half
|
||||
/// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up
|
||||
/// which vector it will be in here.
|
||||
std::vector<std::vector<CPEntry> > CPEntries;
|
||||
|
||||
/// Maps a JT index to the offset in CPEntries containing copies of that
|
||||
/// table. The equivalent map for a CONSTPOOL_ENTRY is the identity.
|
||||
DenseMap<int, int> JumpTableEntryIndices;
|
||||
|
||||
/// Maps a JT index to the LEA that actually uses the index to calculate its
|
||||
/// base address.
|
||||
DenseMap<int, int> JumpTableUserIndices;
|
||||
|
||||
/// ImmBranch - One per immediate branch, keeping the machine instruction
|
||||
/// pointer, conditional or unconditional, the max displacement,
|
||||
/// and (if isCond is true) the corresponding unconditional branch
|
||||
@ -269,7 +279,8 @@ namespace {
|
||||
}
|
||||
|
||||
private:
|
||||
void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
|
||||
void doInitialConstPlacement(std::vector<MachineInstr *> &CPEMIs);
|
||||
void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs);
|
||||
bool BBHasFallthrough(MachineBasicBlock *MBB);
|
||||
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
|
||||
unsigned getCPELogAlign(const MachineInstr *CPEMI);
|
||||
@ -279,6 +290,7 @@ namespace {
|
||||
void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
|
||||
void adjustBBOffsetsAfter(MachineBasicBlock *BB);
|
||||
bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
|
||||
unsigned getCombinedIndex(const MachineInstr *CPEMI);
|
||||
int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
|
||||
bool findAvailableWater(CPUser&U, unsigned UserOffset,
|
||||
water_iterator &WaterIter);
|
||||
@ -301,8 +313,9 @@ namespace {
|
||||
bool optimizeThumb2Instructions();
|
||||
bool optimizeThumb2Branches();
|
||||
bool reorderThumb2JumpTables();
|
||||
unsigned removeDeadDefinitions(MachineInstr *MI, unsigned BaseReg,
|
||||
unsigned IdxReg);
|
||||
bool preserveBaseRegister(MachineInstr *JumpMI, MachineInstr *LEAMI,
|
||||
unsigned &DeadSize, bool &CanDeleteLEA,
|
||||
bool &BaseRegKill);
|
||||
bool optimizeThumb2JumpTables();
|
||||
MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
|
||||
MachineBasicBlock *JTBB);
|
||||
@ -413,7 +426,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
|
||||
// we put them all at the end of the function.
|
||||
std::vector<MachineInstr*> CPEMIs;
|
||||
if (!MCP->isEmpty())
|
||||
doInitialPlacement(CPEMIs);
|
||||
doInitialConstPlacement(CPEMIs);
|
||||
|
||||
if (MF->getJumpTableInfo())
|
||||
doInitialJumpTablePlacement(CPEMIs);
|
||||
|
||||
/// The next UID to take is the first unused one.
|
||||
AFI->initPICLabelUId(CPEMIs.size());
|
||||
@ -478,7 +494,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
|
||||
for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
|
||||
for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
|
||||
const CPEntry & CPE = CPEntries[i][j];
|
||||
AFI->recordCPEClone(i, CPE.CPI);
|
||||
if (CPE.CPEMI && CPE.CPEMI->getOperand(1).isCPI())
|
||||
AFI->recordCPEClone(i, CPE.CPI);
|
||||
}
|
||||
}
|
||||
|
||||
@ -488,6 +505,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
|
||||
WaterList.clear();
|
||||
CPUsers.clear();
|
||||
CPEntries.clear();
|
||||
JumpTableEntryIndices.clear();
|
||||
JumpTableUserIndices.clear();
|
||||
ImmBranches.clear();
|
||||
PushPopMIs.clear();
|
||||
T2JumpTables.clear();
|
||||
@ -495,10 +514,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
/// doInitialPlacement - Perform the initial placement of the constant pool
|
||||
/// entries. To start with, we put them all at the end of the function.
|
||||
/// \brief Perform the initial placement of the regular constant pool entries.
|
||||
/// To start with, we put them all at the end of the function.
|
||||
void
|
||||
ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
|
||||
ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) {
|
||||
// Create the basic block to hold the CPE's.
|
||||
MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
|
||||
MF->push_back(BB);
|
||||
@ -556,6 +575,66 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
|
||||
DEBUG(BB->dump());
|
||||
}
|
||||
|
||||
/// \brief Do initial placement of the jump tables. Because Thumb2's TBB and TBH
|
||||
/// instructions can be made more efficient if the jump table immediately
|
||||
/// follows the instruction, it's best to place them immediately next to their
|
||||
/// jumps to begin with. In almost all cases they'll never be moved from that
|
||||
/// position.
|
||||
void ARMConstantIslands::doInitialJumpTablePlacement(
|
||||
std::vector<MachineInstr *> &CPEMIs) {
|
||||
unsigned i = CPEntries.size();
|
||||
auto MJTI = MF->getJumpTableInfo();
|
||||
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
|
||||
|
||||
MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
|
||||
for (MachineBasicBlock &MBB : *MF) {
|
||||
auto MI = MBB.getLastNonDebugInstr();
|
||||
|
||||
unsigned JTOpcode;
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
continue;
|
||||
case ARM::BR_JTadd:
|
||||
case ARM::BR_JTr:
|
||||
case ARM::tBR_JTr:
|
||||
case ARM::BR_JTm:
|
||||
JTOpcode = ARM::JUMPTABLE_ADDRS;
|
||||
break;
|
||||
case ARM::t2BR_JT:
|
||||
JTOpcode = ARM::JUMPTABLE_INSTS;
|
||||
break;
|
||||
case ARM::t2TBB_JT:
|
||||
JTOpcode = ARM::JUMPTABLE_TBB;
|
||||
break;
|
||||
case ARM::t2TBH_JT:
|
||||
JTOpcode = ARM::JUMPTABLE_TBH;
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned NumOps = MI->getDesc().getNumOperands();
|
||||
MachineOperand JTOp =
|
||||
MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1));
|
||||
unsigned JTI = JTOp.getIndex();
|
||||
unsigned Size = JT[JTI].MBBs.size() * sizeof(uint32_t);
|
||||
MachineBasicBlock *JumpTableBB = MF->CreateMachineBasicBlock();
|
||||
MF->insert(std::next(MachineFunction::iterator(MBB)), JumpTableBB);
|
||||
MachineInstr *CPEMI = BuildMI(*JumpTableBB, JumpTableBB->begin(),
|
||||
DebugLoc(), TII->get(JTOpcode))
|
||||
.addImm(i++)
|
||||
.addJumpTableIndex(JTI)
|
||||
.addImm(Size);
|
||||
CPEMIs.push_back(CPEMI);
|
||||
CPEntries.emplace_back(1, CPEntry(CPEMI, JTI));
|
||||
JumpTableEntryIndices.insert(std::make_pair(JTI, CPEntries.size() - 1));
|
||||
if (!LastCorrectlyNumberedBB)
|
||||
LastCorrectlyNumberedBB = &MBB;
|
||||
}
|
||||
|
||||
// If we did anything then we need to renumber the subsequent blocks.
|
||||
if (LastCorrectlyNumberedBB)
|
||||
MF->RenumberBlocks(LastCorrectlyNumberedBB);
|
||||
}
|
||||
|
||||
/// BBHasFallthrough - Return true if the specified basic block can fallthrough
|
||||
/// into the block immediately after it.
|
||||
bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
|
||||
@ -595,9 +674,21 @@ ARMConstantIslands::CPEntry
|
||||
/// getCPELogAlign - Returns the required alignment of the constant pool entry
|
||||
/// represented by CPEMI. Alignment is measured in log2(bytes) units.
|
||||
unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
|
||||
assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
|
||||
switch (CPEMI->getOpcode()) {
|
||||
case ARM::CONSTPOOL_ENTRY:
|
||||
break;
|
||||
case ARM::JUMPTABLE_TBB:
|
||||
return 0;
|
||||
case ARM::JUMPTABLE_TBH:
|
||||
case ARM::JUMPTABLE_INSTS:
|
||||
return 1;
|
||||
case ARM::JUMPTABLE_ADDRS:
|
||||
return 2;
|
||||
default:
|
||||
llvm_unreachable("unknown constpool entry kind");
|
||||
}
|
||||
|
||||
unsigned CPI = CPEMI->getOperand(1).getIndex();
|
||||
unsigned CPI = getCombinedIndex(CPEMI);
|
||||
assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
|
||||
unsigned Align = MCP->getConstants()[CPI].getAlignment();
|
||||
assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
|
||||
@ -706,12 +797,14 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
|
||||
if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
|
||||
PushPopMIs.push_back(I);
|
||||
|
||||
if (Opc == ARM::CONSTPOOL_ENTRY)
|
||||
if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS ||
|
||||
Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB ||
|
||||
Opc == ARM::JUMPTABLE_TBH)
|
||||
continue;
|
||||
|
||||
// Scan the instructions for constant pool operands.
|
||||
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
|
||||
if (I->getOperand(op).isCPI()) {
|
||||
if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) {
|
||||
// We found one. The addressing mode tells us the max displacement
|
||||
// from the PC that this instruction permits.
|
||||
|
||||
@ -727,6 +820,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
|
||||
|
||||
// Taking the address of a CP entry.
|
||||
case ARM::LEApcrel:
|
||||
case ARM::LEApcrelJT:
|
||||
// This takes a SoImm, which is 8 bit immediate rotated. We'll
|
||||
// pretend the maximum offset is 255 * 4. Since each instruction
|
||||
// 4 byte wide, this is always correct. We'll check for other
|
||||
@ -737,10 +831,12 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
|
||||
IsSoImm = true;
|
||||
break;
|
||||
case ARM::t2LEApcrel:
|
||||
case ARM::t2LEApcrelJT:
|
||||
Bits = 12;
|
||||
NegOk = true;
|
||||
break;
|
||||
case ARM::tLEApcrel:
|
||||
case ARM::tLEApcrelJT:
|
||||
Bits = 8;
|
||||
Scale = 4;
|
||||
break;
|
||||
@ -768,6 +864,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
|
||||
|
||||
// Remember that this is a user of a CP entry.
|
||||
unsigned CPI = I->getOperand(op).getIndex();
|
||||
if (I->getOperand(op).isJTI()) {
|
||||
JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size()));
|
||||
CPI = JumpTableEntryIndices[CPI];
|
||||
}
|
||||
|
||||
MachineInstr *CPEMI = CPEMIs[CPI];
|
||||
unsigned MaxOffs = ((1 << Bits)-1) * Scale;
|
||||
CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
|
||||
@ -1101,6 +1202,13 @@ bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) {
|
||||
if (CPEMI->getOperand(1).isCPI())
|
||||
return CPEMI->getOperand(1).getIndex();
|
||||
|
||||
return JumpTableEntryIndices[CPEMI->getOperand(1).getIndex()];
|
||||
}
|
||||
|
||||
/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
|
||||
/// if not, see if an in-range clone of the CPE is in range, and if so,
|
||||
/// change the data structures so the user references the clone. Returns:
|
||||
@ -1120,7 +1228,7 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
|
||||
}
|
||||
|
||||
// No. Look for previously created clones of the CPE that are in range.
|
||||
unsigned CPI = CPEMI->getOperand(1).getIndex();
|
||||
unsigned CPI = getCombinedIndex(CPEMI);
|
||||
std::vector<CPEntry> &CPEs = CPEntries[CPI];
|
||||
for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
|
||||
// We already tried this one
|
||||
@ -1365,7 +1473,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
|
||||
CPUser &U = CPUsers[CPUserIndex];
|
||||
MachineInstr *UserMI = U.MI;
|
||||
MachineInstr *CPEMI = U.CPEMI;
|
||||
unsigned CPI = CPEMI->getOperand(1).getIndex();
|
||||
unsigned CPI = getCombinedIndex(CPEMI);
|
||||
unsigned Size = CPEMI->getOperand(2).getImm();
|
||||
// Compute this only once, it's expensive.
|
||||
unsigned UserOffset = getUserOffset(U);
|
||||
@ -1429,17 +1537,17 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
|
||||
// Update internal data structures to account for the newly inserted MBB.
|
||||
updateForInsertedWaterBlock(NewIsland);
|
||||
|
||||
// Decrement the old entry, and remove it if refcount becomes 0.
|
||||
decrementCPEReferenceCount(CPI, CPEMI);
|
||||
|
||||
// Now that we have an island to add the CPE to, clone the original CPE and
|
||||
// add it to the island.
|
||||
U.HighWaterMark = NewIsland;
|
||||
U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
|
||||
.addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
|
||||
U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc())
|
||||
.addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size);
|
||||
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
|
||||
++NumCPEs;
|
||||
|
||||
// Decrement the old entry, and remove it if refcount becomes 0.
|
||||
decrementCPEReferenceCount(CPI, CPEMI);
|
||||
|
||||
// Mark the basic block as aligned as required by the const-pool entry.
|
||||
NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
|
||||
|
||||
@ -1844,77 +1952,121 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
/// If we've formed a TBB or TBH instruction, the base register is now
|
||||
/// redundant. In most cases, the instructions defining it will now be dead and
|
||||
/// can be tidied up. This function removes them if so, and returns the number
|
||||
/// of bytes saved.
|
||||
unsigned ARMConstantIslands::removeDeadDefinitions(MachineInstr *MI,
|
||||
unsigned BaseReg,
|
||||
unsigned IdxReg) {
|
||||
unsigned BytesRemoved = 0;
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
/// \brief
|
||||
static bool isSimpleIndexCalc(MachineInstr &I, unsigned EntryReg,
|
||||
unsigned BaseReg) {
|
||||
if (I.getOpcode() != ARM::t2ADDrs)
|
||||
return false;
|
||||
|
||||
// Scan backwards to find the instruction that defines the base
|
||||
// register. Due to post-RA scheduling, we can't count on it
|
||||
// immediately preceding the branch instruction.
|
||||
MachineBasicBlock::iterator PrevI = MI;
|
||||
MachineBasicBlock::iterator B = MBB->begin();
|
||||
while (PrevI != B && !PrevI->definesRegister(BaseReg))
|
||||
--PrevI;
|
||||
if (I.getOperand(0).getReg() != EntryReg)
|
||||
return false;
|
||||
|
||||
// If for some reason we didn't find it, we can't do anything, so
|
||||
// just skip this one.
|
||||
if (!PrevI->definesRegister(BaseReg) || PrevI->hasUnmodeledSideEffects() ||
|
||||
PrevI->mayStore())
|
||||
return BytesRemoved;
|
||||
if (I.getOperand(1).getReg() != BaseReg)
|
||||
return false;
|
||||
|
||||
MachineInstr *AddrMI = PrevI;
|
||||
unsigned NewBaseReg = BytesRemoved;
|
||||
// FIXME: what about CC and IdxReg?
|
||||
return true;
|
||||
}
|
||||
|
||||
// Examine the instruction that calculates the jumptable entry address. Make
|
||||
// sure it only defines the base register and kills any uses other than the
|
||||
// index register. We also need precisely one use to trace backwards to
|
||||
// (hopefully) the LEA.
|
||||
for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
|
||||
const MachineOperand &MO = AddrMI->getOperand(k);
|
||||
if (!MO.isReg() || !MO.getReg())
|
||||
continue;
|
||||
if (MO.isDef() && MO.getReg() != BaseReg)
|
||||
return BytesRemoved;
|
||||
/// \brief While trying to form a TBB/TBH instruction, we may (if the table
|
||||
/// doesn't immediately follow the BR_JT) need access to the start of the
|
||||
/// jump-table. We know one instruction that produces such a register; this
|
||||
/// function works out whether that definition can be preserved to the BR_JT,
|
||||
/// possibly by removing an intervening addition (which is usually needed to
|
||||
/// calculate the actual entry to jump to).
|
||||
bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
|
||||
MachineInstr *LEAMI,
|
||||
unsigned &DeadSize,
|
||||
bool &CanDeleteLEA,
|
||||
bool &BaseRegKill) {
|
||||
if (JumpMI->getParent() != LEAMI->getParent())
|
||||
return false;
|
||||
|
||||
if (MO.isUse() && MO.getReg() != IdxReg) {
|
||||
if (!MO.isKill() || (NewBaseReg != 0 && NewBaseReg != MO.getReg()))
|
||||
return BytesRemoved;
|
||||
NewBaseReg = MO.getReg();
|
||||
// Now we hope that we have at least these instructions in the basic block:
|
||||
// BaseReg = t2LEA ...
|
||||
// [...]
|
||||
// EntryReg = t2ADDrs BaseReg, ...
|
||||
// [...]
|
||||
// t2BR_JT EntryReg
|
||||
//
|
||||
// We have to be very conservative about what we recognise here though. The
|
||||
// main perturbing factors to watch out for are:
|
||||
// + Spills at any point in the chain: not direct problems but we would
|
||||
// expect a blocking Def of the spilled register so in practice what we
|
||||
// can do is limited.
|
||||
// + EntryReg == BaseReg: this is the one situation we should allow a Def
|
||||
// of BaseReg, but only if the t2ADDrs can be removed.
|
||||
// + Some instruction other than t2ADDrs computing the entry. Not seen in
|
||||
// the wild, but we should be careful.
|
||||
unsigned EntryReg = JumpMI->getOperand(0).getReg();
|
||||
unsigned BaseReg = LEAMI->getOperand(0).getReg();
|
||||
|
||||
CanDeleteLEA = true;
|
||||
BaseRegKill = false;
|
||||
MachineInstr *RemovableAdd = nullptr;
|
||||
MachineBasicBlock::iterator I(LEAMI);
|
||||
for (++I; &*I != JumpMI; ++I) {
|
||||
if (isSimpleIndexCalc(*I, EntryReg, BaseReg)) {
|
||||
RemovableAdd = &*I;
|
||||
break;
|
||||
}
|
||||
|
||||
for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
|
||||
const MachineOperand &MO = I->getOperand(K);
|
||||
if (!MO.isReg() || !MO.getReg())
|
||||
continue;
|
||||
if (MO.isDef() && MO.getReg() == BaseReg)
|
||||
return false;
|
||||
if (MO.isUse() && MO.getReg() == BaseReg) {
|
||||
BaseRegKill = BaseRegKill || MO.isKill();
|
||||
CanDeleteLEA = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Want to continue searching for AddrMI, but there are 2 problems: AddrMI is
|
||||
// going away soon, and even decrementing once may be invalid.
|
||||
if (PrevI != B)
|
||||
PrevI = std::prev(PrevI);
|
||||
if (!RemovableAdd)
|
||||
return true;
|
||||
|
||||
DEBUG(dbgs() << "remove addr: " << *AddrMI);
|
||||
BytesRemoved += TII->GetInstSizeInBytes(AddrMI);
|
||||
AddrMI->eraseFromParent();
|
||||
// Check the add really is removable, and that nothing else in the block
|
||||
// clobbers BaseReg.
|
||||
for (++I; &*I != JumpMI; ++I) {
|
||||
for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
|
||||
const MachineOperand &MO = I->getOperand(K);
|
||||
if (!MO.isReg() || !MO.getReg())
|
||||
continue;
|
||||
if (MO.isDef() && MO.getReg() == BaseReg)
|
||||
return false;
|
||||
if (MO.isUse() && MO.getReg() == EntryReg)
|
||||
RemovableAdd = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
|
||||
// that gave us the initial base register definition.
|
||||
for (; PrevI != B && !PrevI->definesRegister(NewBaseReg); --PrevI)
|
||||
;
|
||||
if (RemovableAdd) {
|
||||
RemovableAdd->eraseFromParent();
|
||||
DeadSize += 4;
|
||||
} else if (BaseReg == EntryReg) {
|
||||
// The add wasn't removable, but clobbered the base for the TBB. So we can't
|
||||
// preserve it.
|
||||
return false;
|
||||
}
|
||||
|
||||
// The instruction should be a tLEApcrel or t2LEApcrelJT; we want
|
||||
// to delete it as well.
|
||||
MachineInstr *LeaMI = PrevI;
|
||||
if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
|
||||
LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
|
||||
LeaMI->getOperand(0).getReg() != NewBaseReg)
|
||||
return BytesRemoved;
|
||||
// We reached the end of the block without seeing another definition of
|
||||
// BaseReg (except, possibly the t2ADDrs, which was removed). BaseReg can be
|
||||
// used in the TBB/TBH if necessary.
|
||||
return true;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "remove lea: " << *LeaMI);
|
||||
BytesRemoved += TII->GetInstSizeInBytes(LeaMI);
|
||||
LeaMI->eraseFromParent();
|
||||
return BytesRemoved;
|
||||
/// \brief Returns whether CPEMI is the first instruction in the block
|
||||
/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
|
||||
/// we can switch the first register to PC and usually remove the address
|
||||
/// calculation that preceeded it.
|
||||
static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
|
||||
MachineFunction::iterator MBB = JTMI->getParent();
|
||||
MachineFunction *MF = MBB->getParent();
|
||||
++MBB;
|
||||
|
||||
return MBB != MF->end() && MBB->begin() != MBB->end() &&
|
||||
&*MBB->begin() == CPEMI;
|
||||
}
|
||||
|
||||
/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
|
||||
@ -1955,37 +2107,79 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
|
||||
break;
|
||||
}
|
||||
|
||||
if (ByteOk || HalfWordOk) {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
unsigned BaseReg = MI->getOperand(0).getReg();
|
||||
bool BaseRegKill = MI->getOperand(0).isKill();
|
||||
if (!BaseRegKill)
|
||||
continue;
|
||||
unsigned IdxReg = MI->getOperand(1).getReg();
|
||||
bool IdxRegKill = MI->getOperand(1).isKill();
|
||||
if (!ByteOk && !HalfWordOk)
|
||||
continue;
|
||||
|
||||
DEBUG(dbgs() << "Shrink JT: " << *MI);
|
||||
unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
|
||||
MachineBasicBlock::iterator MI_JT = MI;
|
||||
MachineInstr *NewJTMI =
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
if (!MI->getOperand(0).isKill()) // FIXME: needed now?
|
||||
continue;
|
||||
unsigned IdxReg = MI->getOperand(1).getReg();
|
||||
bool IdxRegKill = MI->getOperand(1).isKill();
|
||||
|
||||
CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
|
||||
unsigned DeadSize = 0;
|
||||
bool CanDeleteLEA = false;
|
||||
bool BaseRegKill = false;
|
||||
bool PreservedBaseReg =
|
||||
preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
|
||||
|
||||
if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
|
||||
continue;
|
||||
|
||||
DEBUG(dbgs() << "Shrink JT: " << *MI);
|
||||
MachineInstr *CPEMI = User.CPEMI;
|
||||
unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
|
||||
MachineBasicBlock::iterator MI_JT = MI;
|
||||
MachineInstr *NewJTMI =
|
||||
BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
|
||||
.addReg(IdxReg, getKillRegState(IdxRegKill))
|
||||
.addJumpTableIndex(JTI, JTOP.getTargetFlags());
|
||||
DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
|
||||
// FIXME: Insert an "ALIGN" instruction to ensure the next instruction
|
||||
// is 2-byte aligned. For now, asm printer will fix it up.
|
||||
unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
|
||||
unsigned OrigSize = TII->GetInstSizeInBytes(MI);
|
||||
unsigned DeadSize = removeDeadDefinitions(MI, BaseReg, IdxReg);
|
||||
MI->eraseFromParent();
|
||||
.addReg(User.MI->getOperand(0).getReg(),
|
||||
getKillRegState(BaseRegKill))
|
||||
.addReg(IdxReg, getKillRegState(IdxRegKill))
|
||||
.addJumpTableIndex(JTI, JTOP.getTargetFlags())
|
||||
.addImm(CPEMI->getOperand(0).getImm());
|
||||
DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
|
||||
|
||||
int delta = OrigSize - NewSize + DeadSize;
|
||||
BBInfo[MBB->getNumber()].Size -= delta;
|
||||
adjustBBOffsetsAfter(MBB);
|
||||
unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH;
|
||||
CPEMI->setDesc(TII->get(JTOpc));
|
||||
|
||||
++NumTBs;
|
||||
MadeChange = true;
|
||||
if (jumpTableFollowsTB(MI, User.CPEMI)) {
|
||||
NewJTMI->getOperand(0).setReg(ARM::PC);
|
||||
NewJTMI->getOperand(0).setIsKill(false);
|
||||
|
||||
if (CanDeleteLEA) {
|
||||
User.MI->eraseFromParent();
|
||||
DeadSize += 4;
|
||||
|
||||
// The LEA was eliminated, the TBB instruction becomes the only new user
|
||||
// of the jump table.
|
||||
User.MI = NewJTMI;
|
||||
User.MaxDisp = 4;
|
||||
User.NegOk = false;
|
||||
User.IsSoImm = false;
|
||||
User.KnownAlignment = false;
|
||||
} else {
|
||||
// The LEA couldn't be eliminated, so we must add another CPUser to
|
||||
// record the TBB or TBH use.
|
||||
int CPEntryIdx = JumpTableEntryIndices[JTI];
|
||||
auto &CPEs = CPEntries[CPEntryIdx];
|
||||
auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) {
|
||||
return E.CPEMI == User.CPEMI;
|
||||
});
|
||||
++Entry->RefCount;
|
||||
CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false));
|
||||
}
|
||||
}
|
||||
|
||||
unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
|
||||
unsigned OrigSize = TII->GetInstSizeInBytes(MI);
|
||||
MI->eraseFromParent();
|
||||
|
||||
int Delta = OrigSize - NewSize + DeadSize;
|
||||
BBInfo[MBB->getNumber()].Size -= Delta;
|
||||
adjustBBOffsetsAfter(MBB);
|
||||
|
||||
++NumTBs;
|
||||
MadeChange = true;
|
||||
}
|
||||
|
||||
return MadeChange;
|
||||
|
@ -1834,6 +1834,32 @@ def CONSTPOOL_ENTRY :
|
||||
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
|
||||
i32imm:$size), NoItinerary, []>;
|
||||
|
||||
/// A jumptable consisting of direct 32-bit addresses of the destination basic
|
||||
/// blocks (either absolute, or relative to the start of the jump-table in PIC
|
||||
/// mode). Used mostly in ARM and Thumb-1 modes.
|
||||
def JUMPTABLE_ADDRS :
|
||||
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
|
||||
i32imm:$size), NoItinerary, []>;
|
||||
|
||||
/// A jumptable consisting of 32-bit jump instructions. Used for Thumb-2 tables
|
||||
/// that cannot be optimised to use TBB or TBH.
|
||||
def JUMPTABLE_INSTS :
|
||||
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
|
||||
i32imm:$size), NoItinerary, []>;
|
||||
|
||||
/// A jumptable consisting of 8-bit unsigned integers representing offsets from
|
||||
/// a TBB instruction.
|
||||
def JUMPTABLE_TBB :
|
||||
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
|
||||
i32imm:$size), NoItinerary, []>;
|
||||
|
||||
/// A jumptable consisting of 16-bit unsigned integers representing offsets from
|
||||
/// a TBH instruction.
|
||||
def JUMPTABLE_TBH :
|
||||
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
|
||||
i32imm:$size), NoItinerary, []>;
|
||||
|
||||
|
||||
// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
|
||||
// from removing one half of the matched pairs. That breaks PEI, which assumes
|
||||
// these will always be in pairs, and asserts if it finds otherwise. Better way?
|
||||
@ -2232,7 +2258,7 @@ let isBranch = 1, isTerminator = 1 in {
|
||||
[(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>,
|
||||
Sched<[WriteBr]>;
|
||||
|
||||
let isNotDuplicable = 1, isIndirectBranch = 1 in {
|
||||
let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
|
||||
def BR_JTr : ARMPseudoInst<(outs),
|
||||
(ins GPR:$target, i32imm:$jt),
|
||||
0, IIC_Br,
|
||||
|
@ -526,6 +526,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
|
||||
0, IIC_Br,
|
||||
[(ARMbrjt tGPR:$target, tjumptable:$jt)]>,
|
||||
Sched<[WriteBrTbl]> {
|
||||
let Size = 2;
|
||||
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
|
||||
}
|
||||
}
|
||||
|
@ -3531,20 +3531,20 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
|
||||
let AsmMatchConverter = "cvtThumbBranches";
|
||||
}
|
||||
|
||||
let isNotDuplicable = 1, isIndirectBranch = 1 in {
|
||||
let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
|
||||
def t2BR_JT : t2PseudoInst<(outs),
|
||||
(ins GPR:$target, GPR:$index, i32imm:$jt),
|
||||
0, IIC_Br,
|
||||
[(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt)]>,
|
||||
Sched<[WriteBr]>;
|
||||
|
||||
// FIXME: Add a non-pc based case that can be predicated.
|
||||
// FIXME: Add a case that can be predicated.
|
||||
def t2TBB_JT : t2PseudoInst<(outs),
|
||||
(ins GPR:$index, i32imm:$jt), 0, IIC_Br, []>,
|
||||
(ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
|
||||
Sched<[WriteBr]>;
|
||||
|
||||
def t2TBH_JT : t2PseudoInst<(outs),
|
||||
(ins GPR:$index, i32imm:$jt), 0, IIC_Br, []>,
|
||||
(ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
|
||||
Sched<[WriteBr]>;
|
||||
|
||||
def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
|
||||
|
52
test/CodeGen/ARM/jump-table-islands-split.ll
Normal file
52
test/CodeGen/ARM/jump-table-islands-split.ll
Normal file
@ -0,0 +1,52 @@
|
||||
; RUN: llc -mtriple=thumbv7s-apple-ios8.0 -o - %s | FileCheck %s
|
||||
|
||||
declare void @foo(double)
|
||||
declare i32 @llvm.arm.space(i32, i32)
|
||||
|
||||
; The constpool entry used to call @foo should be directly between where we want
|
||||
; the tbb and its table. Fortunately, the flow is simple enough that we can
|
||||
; eliminate the entry calculation (ADD) and use the ADR as the base.
|
||||
;
|
||||
; I'm hoping this won't be fragile, but if it does break the most likely fix is
|
||||
; adjusting the @llvm.arm.space call slightly. If this happens too many times
|
||||
; the test should probably be removed.
|
||||
define i32 @test_jumptable_not_adjacent(i1 %tst, i32 %sw, i32 %l) {
|
||||
; CHECK-LABEL: test_jumptable_not_adjacent:
|
||||
; CHECK: vldr {{d[0-9]+}}, [[DBL_CONST:LCPI[0-9]+_[0-9]+]]
|
||||
; [...]
|
||||
; CHECK: adr.w r[[BASE:[0-9]+]], [[JUMP_TABLE:LJTI[0-9]+_[0-9]+]]
|
||||
; CHECK-NOT: r[[BASE]]
|
||||
|
||||
; CHECK: [[TBB_KEY:LCPI[0-9]+_[0-9]+]]:
|
||||
; CHECK-NEXT: tbb [r[[BASE]], {{r[0-9]+}}]
|
||||
|
||||
; CHECK: [[DBL_CONST]]:
|
||||
; CHECK: .long
|
||||
; CHECK: .long
|
||||
; CHECK: [[JUMP_TABLE]]:
|
||||
; CHECK: .byte (LBB{{[0-9]+}}_{{[0-9]+}}-([[TBB_KEY]]+4)
|
||||
|
||||
br label %complex
|
||||
|
||||
complex:
|
||||
call void @foo(double 12345.0)
|
||||
call i32 @llvm.arm.space(i32 970, i32 undef)
|
||||
switch i32 %sw, label %second [ i32 0, label %other
|
||||
i32 1, label %third
|
||||
i32 2, label %end
|
||||
i32 3, label %other ]
|
||||
|
||||
second:
|
||||
ret i32 43
|
||||
third:
|
||||
ret i32 0
|
||||
|
||||
other:
|
||||
call void @bar()
|
||||
unreachable
|
||||
|
||||
end:
|
||||
ret i32 42
|
||||
}
|
||||
|
||||
declare void @bar()
|
40
test/CodeGen/ARM/jump-table-islands.ll
Normal file
40
test/CodeGen/ARM/jump-table-islands.ll
Normal file
@ -0,0 +1,40 @@
|
||||
; RUN: llc -mtriple=armv7-apple-ios8.0 -o - %s | FileCheck %s
|
||||
|
||||
%BigInt = type i5500
|
||||
|
||||
define %BigInt @test_moved_jumptable(i1 %tst, i32 %sw, %BigInt %l) {
|
||||
; CHECK-LABEL: test_moved_jumptable:
|
||||
|
||||
; CHECK: adr {{r[0-9]+}}, [[JUMP_TABLE:LJTI[0-9]+_[0-9]+]]
|
||||
; CHECK: b [[SKIP_TABLE:LBB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: [[JUMP_TABLE]]:
|
||||
; CHECK: .data_region jt32
|
||||
; CHECK: .long LBB{{[0-9]+_[0-9]+}}-[[JUMP_TABLE]]
|
||||
|
||||
; CHECK: [[SKIP_TABLE]]:
|
||||
; CHECK: add pc, {{r[0-9]+}}, {{r[0-9]+}}
|
||||
br i1 %tst, label %simple, label %complex
|
||||
|
||||
simple:
|
||||
br label %end
|
||||
|
||||
complex:
|
||||
switch i32 %sw, label %simple [ i32 0, label %other
|
||||
i32 1, label %third
|
||||
i32 5, label %end
|
||||
i32 6, label %other ]
|
||||
|
||||
third:
|
||||
ret %BigInt 0
|
||||
|
||||
other:
|
||||
call void @bar()
|
||||
unreachable
|
||||
|
||||
end:
|
||||
%val = phi %BigInt [ %l, %complex ], [ -1, %simple ]
|
||||
ret %BigInt %val
|
||||
}
|
||||
|
||||
declare void @bar()
|
@ -2,8 +2,8 @@
|
||||
|
||||
; test that we print the label of a bb that is only used in a jump table.
|
||||
|
||||
; CHECK: .long LBB0_2
|
||||
; CHECK: LBB0_2:
|
||||
; CHECK: .long [[JUMPTABLE_DEST:LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: [[JUMPTABLE_DEST]]:
|
||||
|
||||
define i32 @calculate() {
|
||||
entry:
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7-linux-gnueabihf -O1 %s -o - | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: test_jump_table:
|
||||
; CHECK: b .LBB
|
||||
; CHECK: b{{.*}} .LBB
|
||||
; CHECK-NOT: tbh
|
||||
|
||||
define i32 @test_jump_table(i32 %x, float %in) {
|
||||
|
@ -14,9 +14,19 @@ declare void @Z_fatal(i8*) noreturn nounwind
|
||||
|
||||
declare noalias i8* @calloc(i32, i32) nounwind
|
||||
|
||||
; Jump tables are not anchored next to the TBB/TBH any more. Make sure the
|
||||
; correct address is still calculated (i.e. via a PC-relative symbol *at* the
|
||||
; TBB/TBH).
|
||||
define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
|
||||
; CHECK-LABEL: main:
|
||||
; CHECK: tbb
|
||||
; CHECK-NOT: adr {{r[0-9]+}}, LJTI
|
||||
; CHECK: [[PCREL_ANCHOR:LCPI[0-9]+_[0-9]+]]:
|
||||
; CHECK-NEXT: tbb [pc, {{r[0-9]+}}]
|
||||
|
||||
; CHECK: LJTI0_0:
|
||||
; CHECK-NEXT: .data_region jt8
|
||||
; CHECK-NEXT: .byte (LBB{{[0-9]+_[0-9]+}}-([[PCREL_ANCHOR]]+4))/2
|
||||
|
||||
entry:
|
||||
br label %bb42.i
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user