[AArch64] Break the dependency between FP and SP when possible.

When the SP in not changed because of realignment/VLAs etc., we restore the SP
by using the previous value of SP and not the FP. Breaking the dependency will
help in cases when the epilog of a callee is close to the epilog of the caller;
for then "sub sp, fp, #" depends on the load restoring the FP in the epilog of
the callee.

http://reviews.llvm.org/D18060
Patch by Aditya Kumar and Evandro Menezes.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@263458 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chad Rosier 2016-03-14 18:17:41 +00:00
parent b7448a08dd
commit 4005bceb12
6 changed files with 25 additions and 14 deletions

View File

@ -396,6 +396,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
.addReg(scratchSPReg, RegState::Kill)
.addImm(andMaskEncoded);
AFI->setStackRealigned(true);
}
// If we need a base pointer, set it up here. It's whatever the value of the
@ -607,10 +608,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// FIXME: Rather than doing the math here, we should instead just use
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
if (NumBytes || MFI->hasVarSizedObjects())
if (MFI->hasVarSizedObjects() || AFI->isStackRealigned())
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
-AFI->getCalleeSavedStackSize() + 16, TII,
MachineInstr::FrameDestroy);
else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
MachineInstr::FrameDestroy);
// This must be placed after the callee-save restore code because that code
// assumes the SP is at the same location as it was after the callee-save save

View File

@ -79,18 +79,22 @@ class AArch64FunctionInfo : public MachineFunctionInfo {
/// copies.
bool IsSplitCSR;
/// True when the stack gets realigned dynamically because the size of stack
/// frame is unknown at compile time. e.g., in case of VLAs.
bool StackRealigned;
public:
AArch64FunctionInfo()
: BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
IsSplitCSR(false) {}
IsSplitCSR(false), StackRealigned(false) {}
explicit AArch64FunctionInfo(MachineFunction &MF)
: BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
IsSplitCSR(false) {
IsSplitCSR(false), StackRealigned(false) {
(void)MF;
}
@ -105,6 +109,9 @@ public:
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
bool isStackRealigned() const { return StackRealigned; }
void setStackRealigned(bool s) { StackRealigned = s; }
bool isSplitCSR() const { return IsSplitCSR; }
void setIsSplitCSR(bool s) { IsSplitCSR = s; }

View File

@ -9,7 +9,7 @@
; CHECK: adrp x0, L_.str@PAGE
; CHECK: add x0, x0, L_.str@PAGEOFF
; CHECK-NEXT: bl _puts
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ldp x29, x30, [sp], #16
; CHECK-NEXT: ret

View File

@ -29,7 +29,7 @@ entry:
; CHECK: mov x29, sp
; CHECK-NEXT: sub sp, sp, #32
; CHECK: Ltmp
; CHECK: mov sp, x29
; CHECK: add sp, sp, #32
; CHECK: ret
define void @caller_meta_leaf() {

View File

@ -33,8 +33,8 @@ target triple = "arm64-apple-ios"
; Without shrink-wrapping, epilogue is in the exit block.
; DISABLE: [[EXIT_LABEL]]:
; Epilogue code.
; CHECK-NEXT: mov sp, [[SAVE_SP]]
; CHECK-NEXT: ldp [[SAVE_SP]], [[CSR]], [sp], #16
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ldp x{{[0-9]+}}, [[CSR]], [sp], #16
;
; With shrink-wrapping, exit block is a simple return.
; ENABLE: [[EXIT_LABEL]]:
@ -473,7 +473,7 @@ if.end: ; preds = %for.body, %if.else
; DISABLE: [[IFEND_LABEL]]: ; %if.end
;
; Epilogue code.
; CHECK: mov sp, [[NEW_SP]]
; CHECK: add sp, sp, #48
; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16
; CHECK-NEXT: ret
;

View File

@ -42,12 +42,12 @@ define fastcc void @func_stack0() {
; CHECK-TAIL-NOT: sub sp, sp
ret void
; CHECK: mov sp, x29
; CHECK: add sp, sp, #32
; CHECK-NEXT: ldp x29, x30, [sp], #16
; CHECK-NEXT: ret
; CHECK-TAIL: mov sp, x29
; CHECK-TAIL: add sp, sp, #32
; CHECK-TAIL-NEXT: ldp x29, x30, [sp], #16
; CHECK-TAIL-NEXT: ret
}
@ -91,12 +91,12 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) {
; CHECK-TAIL-NOT: sub sp, sp
ret void
; CHECK: mov sp, x29
; CHECK: add sp, sp, #32
; CHECK-NEXT: ldp x29, x30, [sp], #16
; CHECK-NEXT: ret
; CHECK-TAIL: mov sp, x29
; CHECK-TAIL: add sp, sp, #32
; CHECK-TAIL-NEXT: ldp x29, x30, [sp], #16
; CHECK-TAIL-NEXT: add sp, sp, #16
; CHECK-TAIL-NEXT: ret
@ -136,11 +136,11 @@ define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
; CHECK-TAIL-NOT: sub sp, sp
ret void
; CHECK: mov sp, x29
; CHECK: add sp, sp, #32
; CHECK-NEXT: ldp x29, x30, [sp], #16
; CHECK-NEXT: ret
; CHECK-TAIL: mov sp, x29
; CHECK-TAIL: add sp, sp, #32
; CHECK-TAIL-NEXT: ldp x29, x30, [sp], #16
; CHECK-TAIL-NEXT: add sp, sp, #32
; CHECK-TAIL-NEXT: ret