mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-26 05:00:39 +00:00
Switch ARM target to register masks.
I'll let the buildbots determine the compile time improvements from this change, but 464.h264ref has 5% faster codegen at -O2. This patch does cause some assembly changes. Branch folding can make different decisions about calls with dead return values. CriticalAntiDepBreaker may choose different registers because its liveness tracking is affected. MachineCopyPropagation may sometimes leave a dead copy behind. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151331 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d1b220a33b
commit
c54f634886
@ -2151,6 +2151,10 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
|
||||
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
|
||||
MIB.addReg(RegArgs[i]);
|
||||
|
||||
// Add a register mask with the call-preserved registers.
|
||||
// Proper defs for return values will be added by setPhysRegsDeadExcept().
|
||||
MIB.addRegMask(TRI.getCallPreservedMask(CC));
|
||||
|
||||
// Finish off the call including any return values.
|
||||
SmallVector<unsigned, 4> UsedRegs;
|
||||
if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
|
||||
@ -2283,6 +2287,10 @@ bool ARMFastISel::SelectCall(const Instruction *I,
|
||||
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
|
||||
MIB.addReg(RegArgs[i]);
|
||||
|
||||
// Add a register mask with the call-preserved registers.
|
||||
// Proper defs for return values will be added by setPhysRegsDeadExcept().
|
||||
MIB.addRegMask(TRI.getCallPreservedMask(CC));
|
||||
|
||||
// Finish off the call including any return values.
|
||||
SmallVector<unsigned, 4> UsedRegs;
|
||||
if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
|
||||
|
@ -1600,6 +1600,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
|
||||
RegsToPass[i].second.getValueType()));
|
||||
|
||||
// Add a register mask operand representing the call-preserved registers.
|
||||
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
|
||||
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
|
||||
assert(Mask && "Missing call preserved mask for calling convention");
|
||||
Ops.push_back(DAG.getRegisterMask(Mask));
|
||||
|
||||
if (InFlag.getNode())
|
||||
Ops.push_back(InFlag);
|
||||
|
||||
|
@ -1898,18 +1898,13 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
|
||||
}
|
||||
}
|
||||
|
||||
// All calls clobber the non-callee saved registers. SP is marked as
|
||||
// a use to prevent stack-pointer assignments that appear immediately
|
||||
// before calls from potentially appearing dead.
|
||||
// SP is marked as a use to prevent stack-pointer assignments that appear
|
||||
// immediately before calls from potentially appearing dead.
|
||||
let isCall = 1,
|
||||
// On non-IOS platforms R9 is callee-saved.
|
||||
// FIXME: Do we really need a non-predicated version? If so, it should
|
||||
// at least be a pseudo instruction expanding to the predicated version
|
||||
// at MC lowering time.
|
||||
Defs = [R0, R1, R2, R3, R12, LR,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
|
||||
CPSR, FPSCR],
|
||||
Uses = [SP] in {
|
||||
Defs = [LR], Uses = [SP] in {
|
||||
def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
|
||||
IIC_Br, "bl\t$func",
|
||||
[(ARMcall tglobaladdr:$func)]>,
|
||||
@ -1964,10 +1959,7 @@ let isCall = 1,
|
||||
// On IOS R9 is call-clobbered.
|
||||
// R7 is marked as a use to prevent frame-pointer assignments from being
|
||||
// moved above / below calls.
|
||||
Defs = [R0, R1, R2, R3, R9, R12, LR,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
|
||||
CPSR, FPSCR],
|
||||
Uses = [R7, SP] in {
|
||||
Defs = [LR], Uses = [R7, SP] in {
|
||||
def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops),
|
||||
4, IIC_Br,
|
||||
[(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>,
|
||||
@ -2071,9 +2063,7 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
|
||||
// IOS versions.
|
||||
let Defs = [R0, R1, R2, R3, R9, R12,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
|
||||
Uses = [SP] in {
|
||||
let Uses = [SP] in {
|
||||
def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
|
||||
IIC_Br, []>, Requires<[IsIOS]>;
|
||||
|
||||
@ -2093,9 +2083,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
|
||||
}
|
||||
|
||||
// Non-IOS versions (the difference is R9).
|
||||
let Defs = [R0, R1, R2, R3, R12,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
|
||||
Uses = [SP] in {
|
||||
let Uses = [SP] in {
|
||||
def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
|
||||
IIC_Br, []>, Requires<[IsNotIOS]>;
|
||||
|
||||
|
@ -406,10 +406,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
|
||||
// potentially appearing dead.
|
||||
let isCall = 1,
|
||||
// On non-IOS platforms R9 is callee-saved.
|
||||
Defs = [R0, R1, R2, R3, R12, LR,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
|
||||
CPSR, FPSCR],
|
||||
Uses = [SP] in {
|
||||
Defs = [LR], Uses = [SP] in {
|
||||
// Also used for Thumb2
|
||||
def tBL : TIx2<0b11110, 0b11, 1,
|
||||
(outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br,
|
||||
@ -460,10 +457,7 @@ let isCall = 1,
|
||||
// On IOS R9 is call-clobbered.
|
||||
// R7 is marked as a use to prevent frame-pointer assignments from being
|
||||
// moved above / below calls.
|
||||
Defs = [R0, R1, R2, R3, R9, R12, LR,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
|
||||
CPSR, FPSCR],
|
||||
Uses = [R7, SP] in {
|
||||
Defs = [LR], Uses = [R7, SP] in {
|
||||
// Also used for Thumb2
|
||||
def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops),
|
||||
4, IIC_Br, [(ARMtcall tglobaladdr:$func)],
|
||||
@ -529,9 +523,7 @@ let isBranch = 1, isTerminator = 1 in
|
||||
// Tail calls
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
|
||||
// IOS versions.
|
||||
let Defs = [R0, R1, R2, R3, R9, R12,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
|
||||
Uses = [SP] in {
|
||||
let Uses = [SP] in {
|
||||
// tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls
|
||||
// on IOS), so it's in ARMInstrThumb2.td.
|
||||
def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
|
||||
@ -540,9 +532,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
|
||||
Requires<[IsThumb, IsIOS]>;
|
||||
}
|
||||
// Non-IOS versions (the difference is R9).
|
||||
let Defs = [R0, R1, R2, R3, R12,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
|
||||
Uses = [SP] in {
|
||||
let Uses = [SP] in {
|
||||
def tTAILJMPdND : tPseudoExpand<(outs),
|
||||
(ins t_brtarget:$dst, pred:$p, variable_ops),
|
||||
4, IIC_Br, [],
|
||||
|
@ -3260,9 +3260,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
|
||||
// it goes here.
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
|
||||
// IOS version.
|
||||
let Defs = [R0, R1, R2, R3, R9, R12, PC,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
|
||||
Uses = [SP] in
|
||||
let Uses = [SP] in
|
||||
def tTAILJMPd: tPseudoExpand<(outs),
|
||||
(ins uncondbrtarget:$dst, pred:$p, variable_ops),
|
||||
4, IIC_Br, [],
|
||||
|
Loading…
Reference in New Issue
Block a user