Shrinkify Thumb2 load / store multiple instructions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78717 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2009-08-11 21:11:32 +00:00
parent 59fc42debd
commit 4b322e58b7
8 changed files with 129 additions and 60 deletions

View File

@ -1333,7 +1333,8 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
}
/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
/// LR / restores LR to pc.
/// LR / restores LR to pc. FIXME: This is done here because it's only possible
/// to do this if tBfar is not used.
bool ARMConstantIslands::UndoLRSpillRestore() {
bool MadeChange = false;
for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {

View File

@ -181,9 +181,9 @@ let isReturn = 1, isTerminator = 1 in {
}
// FIXME: remove when we have a way to marking a MI with these properties.
let isReturn = 1, isTerminator = 1 in
def tPOP_RET : T1I<(outs reglist:$dst1, variable_ops), (ins), IIC_Br,
"pop $dst1", []>;
let isReturn = 1, isTerminator = 1, mayLoad = 1 in
def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dst1, variable_ops), IIC_Br,
"pop${p} $dst1", []>;
let isCall = 1,
Defs = [R0, R1, R2, R3, R12, LR,
@ -347,16 +347,26 @@ def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore,
// Load / store multiple Instructions.
//
// TODO: A7-44: LDMIA - load multiple
// TODO: Allow these to be predicated
// These requires base address to be written back or one of the loaded regs.
let mayLoad = 1 in
def tPOP : T1I<(outs reglist:$dst1, variable_ops), (ins), IIC_Br,
"pop $dst1", []>;
def tLDM : T1I<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
IIC_iLoad,
"ldm${addr:submode}${p} $addr, $dst1", []>;
let mayStore = 1 in
def tPUSH : T1I<(outs), (ins reglist:$src1, variable_ops), IIC_Br,
"push $src1", []>;
def tSTM : T1I<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$src1, variable_ops),
IIC_iStore,
"stm${addr:submode}${p} $addr, $src1", []>;
let mayLoad = 1, Uses = [SP], Defs = [SP] in
def tPOP : T1I<(outs), (ins pred:$p, reglist:$dst1, variable_ops), IIC_Br,
"pop${p} $dst1", []>;
let mayStore = 1, Uses = [SP], Defs = [SP] in
def tPUSH : T1I<(outs), (ins pred:$p, reglist:$src1, variable_ops), IIC_Br,
"push${p} $src1", []>;
//===----------------------------------------------------------------------===//
// Arithmetic Instructions.

View File

@ -1106,7 +1106,7 @@ let Defs =
let isReturn = 1, isTerminator = 1, mayLoad = 1 in
def t2LDM_RET : T2XI<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
IIC_iLoad, "ldm${addr:submode}${p}${addr:wide} $addr, $dst1",
IIC_Br, "ldm${addr:submode}${p}${addr:wide} $addr, $dst1",
[]>;
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {

View File

@ -885,8 +885,10 @@ void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum) {
void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum) {
O << "{";
for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
if (MI->getOperand(i).isImplicit())
continue;
if ((int)i != OpNum) O << ", ";
printOperand(MI, i);
if (i != e-1) O << ", ";
}
O << "}";
}

View File

@ -149,6 +149,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
if (MI != MBB.end()) DL = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
AddDefaultPred(MIB);
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
// Add the callee-saved register as live-in. It's killed at the spill.
@ -168,7 +169,11 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return false;
bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
DebugLoc DL = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::tPOP));
AddDefaultPred(MIB);
bool NumRegs = 0;
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
if (Reg == ARM::LR) {
@ -176,15 +181,16 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (isVarArg)
continue;
Reg = ARM::PC;
PopMI->setDesc(get(ARM::tPOP_RET));
(*MIB).setDesc(get(ARM::tPOP_RET));
MI = MBB.erase(MI);
}
PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
MIB.addReg(Reg, getDefRegState(true));
++NumRegs;
}
// It's illegal to emit pop instruction without operands.
if (PopMI->getNumOperands() > 0)
MBB.insert(MI, PopMI);
if (NumRegs)
MBB.insert(MI, &*MIB);
return true;
}

View File

@ -834,7 +834,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
if (VARegSaveSize) {
// Epilogue for vararg functions: pop LR to R3 and branch off it.
// FIXME: Verify this is still ok when R3 is no longer being reserved.
BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3);
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))).addReg(ARM::R3);
emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize);

View File

@ -9,6 +9,7 @@
#define DEBUG_TYPE "t2-reduce-size"
#include "ARM.h"
#include "ARMAddressingModes.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMBaseInstrInfo.h"
#include "Thumb2InstrInfo.h"
@ -26,7 +27,12 @@ STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
static cl::opt<int> ReduceLimit("t2-reduce-limit", cl::init(-1), cl::Hidden);
static cl::opt<int> ReduceLimit("t2-reduce-limit",
cl::init(-1), cl::Hidden);
static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
cl::init(-1), cl::Hidden);
static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
cl::init(-1), cl::Hidden);
namespace {
/// ReduceTable - A static table with information on mapping from wide
@ -101,7 +107,11 @@ namespace {
{ ARM::t2STRBi12,ARM::tSTRB, 0, 5, 0, 1, 0, 0,0, 1 },
{ ARM::t2STRBs, ARM::tSTRB, 0, 0, 0, 1, 0, 0,0, 1 },
{ ARM::t2STRHi12,ARM::tSTRH, 0, 5, 0, 1, 0, 0,0, 1 },
{ ARM::t2STRHs, ARM::tSTRH, 0, 0, 0, 1, 0, 0,0, 1 }
{ ARM::t2STRHs, ARM::tSTRH, 0, 0, 0, 1, 0, 0,0, 1 },
{ ARM::t2LDM_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 },
{ ARM::t2LDM, ARM::tLDM, ARM::tPOP, 0, 0, 1, 1, 1,1, 1 },
{ ARM::t2STM, ARM::tSTM, ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 },
};
class VISIBILITY_HIDDEN Thumb2SizeReduce : public MachineFunctionPass {
@ -185,12 +195,43 @@ static bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
return true;
}
static bool VerifyLowRegs(MachineInstr *MI) {
unsigned Opc = MI->getOpcode();
bool isPCOk = (Opc == ARM::t2LDM_RET) || (Opc == ARM::t2LDM);
bool isLROk = (Opc == ARM::t2STM);
bool isSPOk = isPCOk || isLROk;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || MO.isImplicit())
continue;
unsigned Reg = MO.getReg();
if (Reg == 0 || Reg == ARM::CPSR)
continue;
if (isPCOk && Reg == ARM::PC)
continue;
if (isLROk && Reg == ARM::LR)
continue;
if (isSPOk && Reg == ARM::SP)
continue;
if (!isARMLowRegister(Reg))
return false;
}
return true;
}
bool
Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry) {
if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
return false;
unsigned Scale = 1;
bool HasImmOffset = false;
bool HasShift = false;
bool isLdStMul = false;
bool isPopPush = false;
unsigned Opc = Entry.NarrowOpc1;
unsigned OpNum = 3; // First 'rest' of operands.
switch (Entry.WideOpc) {
default:
llvm_unreachable("Unexpected Thumb2 load / store opcode!");
@ -217,7 +258,27 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
case ARM::t2STRBs:
case ARM::t2STRHs:
HasShift = true;
OpNum = 4;
break;
case ARM::t2LDM_RET:
case ARM::t2LDM:
case ARM::t2STM: {
OpNum = 0;
unsigned BaseReg = MI->getOperand(0).getReg();
unsigned Mode = MI->getOperand(1).getImm();
if (BaseReg == ARM::SP && ARM_AM::getAM4WBFlag(Mode)) {
Opc = Entry.NarrowOpc2;
isPopPush = true;
OpNum = 2;
} else if (Entry.WideOpc == ARM::t2LDM_RET ||
!isARMLowRegister(BaseReg) ||
!ARM_AM::getAM4WBFlag(Mode) ||
ARM_AM::getAM4SubMode(Mode) != ARM_AM::ia) {
return false;
}
isLdStMul = true;
break;
}
}
unsigned OffsetReg = 0;
@ -242,22 +303,21 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit load / store instruction.
// FIXME: Thumb1 addressing mode encode both immediate and register offset.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Entry.NarrowOpc1))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1));
if (Entry.NarrowOpc1 != ARM::tLDRSB && Entry.NarrowOpc1 != ARM::tLDRSH) {
// tLDRSB and tLDRSH do not have an immediate offset field. On the other
// hand, it must have an offset register.
// FIXME: Remove this special case.
MIB.addImm(OffsetImm/Scale);
MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
if (!isLdStMul) {
MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1));
if (Entry.NarrowOpc1 != ARM::tLDRSB && Entry.NarrowOpc1 != ARM::tLDRSH) {
// tLDRSB and tLDRSH do not have an immediate offset field. On the other
// hand, it must have an offset register.
// FIXME: Remove this special case.
MIB.addImm(OffsetImm/Scale);
}
assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
}
assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
// Transfer the rest of operands.
unsigned OpNum = HasShift ? 4 : 3;
for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
MIB.addOperand(MI->getOperand(OpNum));
@ -268,28 +328,14 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
return true;
}
static bool VerifyLowRegs(MachineInstr *MI, const TargetInstrDesc &TID) {
for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (Reg == 0 || Reg == ARM::CPSR)
continue;
if (!isARMLowRegister(Reg))
return false;
}
return true;
}
bool
Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
bool LiveCPSR) {
const TargetInstrDesc &TID = MI->getDesc();
if (Entry.LowRegs1 && !VerifyLowRegs(MI, TID))
if (Entry.LowRegs1 && !VerifyLowRegs(MI))
return false;
const TargetInstrDesc &TID = MI->getDesc();
if (TID.mayLoad() || TID.mayStore())
return ReduceLoadStore(MBB, MI, Entry);
return false;
@ -299,6 +345,10 @@ bool
Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
bool LiveCPSR) {
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
const TargetInstrDesc &TID = MI->getDesc();
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
@ -369,6 +419,9 @@ bool
Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
bool LiveCPSR) {
if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
return false;
unsigned Limit = ~0U;
if (Entry.Imm1Limit)
Limit = (1 << Entry.Imm1Limit) - 1;
@ -511,9 +564,6 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
ProcessNext:
LiveCPSR = UpdateCPSRLiveness(*MI, LiveCPSR);
if (ReduceLimit != -1 && ((int)(NumNarrows + Num2Addrs) > ReduceLimit))
break;
}
return Modified;

View File

@ -4,8 +4,8 @@
define i32 @t1() {
; CHECK: t1:
; CHECK: stmfd sp!, {r7, lr}
; CHECK: ldmfd.w sp!, {r7, pc}
; CHECK: push {r7, lr}
; CHECK: pop {r7, pc}
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
@ -14,9 +14,9 @@ define i32 @t1() {
define i32 @t2() {
; CHECK: t2:
; CHECK: stmfd sp!, {r7, lr}
; CHECK: ldmia.w
; CHECK: ldmfd.w sp!, {r7, pc}
; CHECK: push {r7, lr}
; CHECK: ldmia
; CHECK: pop {r7, pc}
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
%tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
@ -26,8 +26,8 @@ define i32 @t2() {
define i32 @t3() {
; CHECK: t3:
; CHECK: stmfd sp!, {r7, lr}
; CHECK: ldmfd.w sp!, {r7, pc}
; CHECK: push {r7, lr}
; CHECK: pop {r7, pc}
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
%tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]