From 45d6a21e21255294ac3add0f2da25fa1187e23fa Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 14 Aug 2009 00:32:16 +0000 Subject: [PATCH] Shrink ADR and LDR from constantpool late during constantpool island pass. llvm-svn: 78970 --- lib/Target/ARM/ARMConstantIslandPass.cpp | 83 ++++++++++++++++++++---- lib/Target/ARM/ARMInstrThumb.td | 9 +-- lib/Target/ARM/ARMInstrThumb2.td | 1 - lib/Target/ARM/Thumb2SizeReduction.cpp | 2 + test/CodeGen/Thumb2/tls2.ll | 2 +- 5 files changed, 80 insertions(+), 17 deletions(-) diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index d60799e199c..ea64bab79d1 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -32,11 +32,12 @@ #include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(NumCPEs, "Number of constpool entries"); -STATISTIC(NumSplit, "Number of uncond branches inserted"); -STATISTIC(NumCBrFixed, "Number of cond branches fixed"); -STATISTIC(NumUBrFixed, "Number of uncond branches fixed"); -STATISTIC(NumTBs, "Number of table branches generated"); +STATISTIC(NumCPEs, "Number of constpool entries"); +STATISTIC(NumSplit, "Number of uncond branches inserted"); +STATISTIC(NumCBrFixed, "Number of cond branches fixed"); +STATISTIC(NumUBrFixed, "Number of uncond branches fixed"); +STATISTIC(NumTBs, "Number of table branches generated"); +STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk"); namespace { /// ARMConstantIslands - Due to limited PC-relative displacements, ARM @@ -179,6 +180,8 @@ namespace { bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br); bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br); bool UndoLRSpillRestore(); + bool OptimizeThumb2Instructions(MachineFunction &MF); + bool OptimizeThumb2Branches(MachineFunction &MF); bool OptimizeThumb2JumpTables(MachineFunction &MF); unsigned GetOffsetOf(MachineInstr *MI) const; @@ -292,8 +295,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { MadeChange = true; } - // Let's see if we can use tbb / tbh to do jump tables. - MadeChange |= OptimizeThumb2JumpTables(MF); + // Shrink 32-bit Thumb2 branch, load, and store instructions. + if (isThumb2) + MadeChange |= OptimizeThumb2Instructions(MF); // After a while, this might be made debug-only, but it is not expensive. verify(MF); @@ -1077,7 +1081,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, unsigned Size = CPEMI->getOperand(2).getImm(); MachineBasicBlock *NewMBB; // Compute this only once, it's expensive. The 4 or 8 is the value the - // hardware keeps in the PC (2 insns ahead of the reference). + // hardware keeps in the PC. unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8); // See if the current entry is within range, or there is a clone of it @@ -1350,6 +1354,62 @@ bool ARMConstantIslands::UndoLRSpillRestore() { return MadeChange; } +bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) { + bool MadeChange = false; + + // Shrink ADR and LDR from constantpool. + for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { + CPUser &U = CPUsers[i]; + unsigned Opcode = U.MI->getOpcode(); + unsigned NewOpc = 0; + unsigned Scale = 1; + unsigned Bits = 0; + switch (Opcode) { + default: break; + case ARM::t2LEApcrel: + if (isARMLowRegister(U.MI->getOperand(0).getReg())) { + NewOpc = ARM::tLEApcrel; + Bits = 8; + Scale = 4; + } + break; + case ARM::t2LDRpci: + if (isARMLowRegister(U.MI->getOperand(0).getReg())) { + NewOpc = ARM::tLDRpci; + Bits = 8; + Scale = 4; + } + break; + } + + if (!NewOpc) + continue; + + unsigned UserOffset = GetOffsetOf(U.MI) + 4; + unsigned MaxOffs = ((1 << Bits) - 1) * Scale; + // FIXME: Check if offset is multiple of scale if scale is not 4. + if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { + U.MI->setDesc(TII->get(NewOpc)); + MachineBasicBlock *MBB = U.MI->getParent(); + BBSizes[MBB->getNumber()] -= 2; + AdjustBBOffsetsAfter(MBB, -2); + ++NumT2CPShrunk; + MadeChange = true; + } + } + + MadeChange |= OptimizeThumb2JumpTables(MF); + MadeChange |= OptimizeThumb2Branches(MF); + return MadeChange; +} + +bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { + return false; +} + + +/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller +/// jumptables when it's possible. bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { bool MadeChange = false; @@ -1417,10 +1477,11 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { if (!OptOk) continue; - // The previous instruction should be a t2LEApcrelJT, we want to delete - // it as well. + // The previous instruction should be a tLEApcrel or t2LEApcrelJT, we want + // to delete it as well. MachineInstr *LeaMI = --PrevI; - if (LeaMI->getOpcode() != ARM::t2LEApcrelJT || + if ((LeaMI->getOpcode() != ARM::tLEApcrelJT && + LeaMI->getOpcode() != ARM::t2LEApcrelJT) || LeaMI->getOperand(0).getReg() != BaseReg) OptOk = false; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 212c32b5a33..3e4725f4ceb 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -627,11 +627,12 @@ def tMOVCCi : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU, // tLEApcrel - Load a pc-relative address into a register without offending the // assembler. -def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label), IIC_iALU, - "adr $dst, #$label", []>; +def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALU, + "adr$p.n $dst, #$label", []>; -def tLEApcrelJT : T1I<(outs tGPR:$dst), (ins i32imm:$label, lane_cst:$id), - IIC_iALU, "adr $dst, #${label}_${id}", []>; +def tLEApcrelJT : T1I<(outs tGPR:$dst), + (ins i32imm:$label, lane_cst:$id, pred:$p), + IIC_iALU, "adr$p $dst, #${label}_${id}", []>; //===----------------------------------------------------------------------===// // TLS Instructions diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index c4e79a71073..840a6bb0130 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -434,7 +434,6 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst), (ins i32imm:$label, lane_cst:$id, pred:$p), IIC_iALU, "adr$p.w $dst, #${label}_${id}", []>; - // ADD r, sp, {so_imm|i12} def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), IIC_iALU, "add", ".w $dst, $sp, $imm", []>; diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index fd6bcf0f5ed..0a86a75ff7c 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -71,6 +71,8 @@ namespace { { ARM::t2CMPzri,ARM::tCMPzi8, 0, 8, 0, 1, 0, 2,0, 0 }, { ARM::t2CMPzrr,ARM::tCMPzhir,0, 0, 0, 0, 0, 2,0, 0 }, { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 }, + // FIXME: adr.n immediate offset must be multiple of 4. + //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0 }, { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 0 }, { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 0 }, { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 }, diff --git a/test/CodeGen/Thumb2/tls2.ll b/test/CodeGen/Thumb2/tls2.ll index ca40a242ff0..3396b0ba43f 100644 --- a/test/CodeGen/Thumb2/tls2.ll +++ b/test/CodeGen/Thumb2/tls2.ll @@ -1,7 +1,7 @@ ; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \ ; RUN: grep {i(gottpoff)} ; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \ -; RUN: grep {ldr.w r., \[pc, r.\]} +; RUN: grep {ldr r., \[pc, r.\]} ; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi \ ; RUN: -relocation-model=pic | grep {__tls_get_addr}