Switch ARM target to register masks.

I'll let the buildbots determine the compile time improvements from this
change, but 464.h264ref has 5% faster codegen at -O2.

This patch does cause some assembly changes.  Branch folding can make
different decisions about calls with dead return values.
CriticalAntiDepBreaker may choose different registers because its
liveness tracking is affected.  MachineCopyPropagation may sometimes
leave a dead copy behind.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151331 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jakob Stoklund Olesen 2012-02-24 01:19:29 +00:00
parent d1b220a33b
commit c54f634886
5 changed files with 25 additions and 35 deletions

View File

@ -2151,6 +2151,10 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(CC));
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
@ -2283,6 +2287,10 @@ bool ARMFastISel::SelectCall(const Instruction *I,
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(CC));
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;

View File

@ -1600,6 +1600,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
Ops.push_back(InFlag);

View File

@ -1898,18 +1898,13 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
}
}
// All calls clobber the non-callee saved registers. SP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead.
// SP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead.
let isCall = 1,
// On non-IOS platforms R9 is callee-saved.
// FIXME: Do we really need a non-predicated version? If so, it should
// at least be a pseudo instruction expanding to the predicated version
// at MC lowering time.
Defs = [R0, R1, R2, R3, R12, LR,
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
CPSR, FPSCR],
Uses = [SP] in {
Defs = [LR], Uses = [SP] in {
def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
IIC_Br, "bl\t$func",
[(ARMcall tglobaladdr:$func)]>,
@ -1964,10 +1959,7 @@ let isCall = 1,
// On IOS R9 is call-clobbered.
// R7 is marked as a use to prevent frame-pointer assignments from being
// moved above / below calls.
Defs = [R0, R1, R2, R3, R9, R12, LR,
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
CPSR, FPSCR],
Uses = [R7, SP] in {
Defs = [LR], Uses = [R7, SP] in {
def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops),
4, IIC_Br,
[(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>,
@ -2071,9 +2063,7 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// IOS versions.
let Defs = [R0, R1, R2, R3, R9, R12,
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
Uses = [SP] in {
let Uses = [SP] in {
def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
IIC_Br, []>, Requires<[IsIOS]>;
@ -2093,9 +2083,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
}
// Non-IOS versions (the difference is R9).
let Defs = [R0, R1, R2, R3, R12,
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
Uses = [SP] in {
let Uses = [SP] in {
def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
IIC_Br, []>, Requires<[IsNotIOS]>;

View File

@ -406,10 +406,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
// potentially appearing dead.
let isCall = 1,
// On non-IOS platforms R9 is callee-saved.
Defs = [R0, R1, R2, R3, R12, LR,
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
CPSR, FPSCR],
Uses = [SP] in {
Defs = [LR], Uses = [SP] in {
// Also used for Thumb2
def tBL : TIx2<0b11110, 0b11, 1,
(outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br,
@ -460,10 +457,7 @@ let isCall = 1,
// On IOS R9 is call-clobbered.
// R7 is marked as a use to prevent frame-pointer assignments from being
// moved above / below calls.
Defs = [R0, R1, R2, R3, R9, R12, LR,
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
CPSR, FPSCR],
Uses = [R7, SP] in {
Defs = [LR], Uses = [R7, SP] in {
// Also used for Thumb2
def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops),
4, IIC_Br, [(ARMtcall tglobaladdr:$func)],
@ -529,9 +523,7 @@ let isBranch = 1, isTerminator = 1 in
// Tail calls
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// IOS versions.
let Defs = [R0, R1, R2, R3, R9, R12,
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
Uses = [SP] in {
let Uses = [SP] in {
// tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls
// on IOS), so it's in ARMInstrThumb2.td.
def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
@ -540,9 +532,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
Requires<[IsThumb, IsIOS]>;
}
// Non-IOS versions (the difference is R9).
let Defs = [R0, R1, R2, R3, R12,
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
Uses = [SP] in {
let Uses = [SP] in {
def tTAILJMPdND : tPseudoExpand<(outs),
(ins t_brtarget:$dst, pred:$p, variable_ops),
4, IIC_Br, [],

View File

@ -3260,9 +3260,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
// it goes here.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// IOS version.
let Defs = [R0, R1, R2, R3, R9, R12, PC,
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
Uses = [SP] in
let Uses = [SP] in
def tTAILJMPd: tPseudoExpand<(outs),
(ins uncondbrtarget:$dst, pred:$p, variable_ops),
4, IIC_Br, [],