From a04e9e4a0af16f15ace258e81448b7eeca5ff599 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Tue, 23 Aug 2016 09:19:22 +0000 Subject: [PATCH] [ARM] Generate consistent frame records for Thumb2 There is not an official documented ABI for frame pointers in Thumb2, but we should try to emit something which is useful. We use r7 as the frame pointer for Thumb code, which currently means that if a function needs to save a high register (r8-r11), it will get pushed to the stack between the frame pointer (r7) and link register (r14). This means that while a stack unwinder can follow the chain of frame pointers up the stack, it cannot know the offset to lr, so does not know which functions correspond to the stack frames. To fix this, we need to push the callee-saved registers in two batches, with the first push saving the low registers, fp and lr, and the second push saving the high registers. This is already implemented, but previously only used for iOS. This patch turns it on for all Thumb2 targets when frame pointers are required by the ABI, and the frame pointer is r7 (Windows uses r11, so this isn't a problem there). If frame pointer elimination is enabled we still emit a single push/pop even if we need a frame pointer for other reasons, to avoid increasing code size. We must also ensure that lr is pushed to the stack when using a frame pointer, so that we end up with a complete frame record. Situations that could cause this were rare, because we already push lr in most situations so that we can return using the pop instruction. Differential Revision: https://reviews.llvm.org/D23516 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279506 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 9 +- lib/Target/ARM/ARMFrameLowering.cpp | 51 +++--- lib/Target/ARM/ARMSubtarget.h | 8 +- lib/Target/ARM/Thumb1FrameLowering.cpp | 4 +- test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll | 2 +- test/CodeGen/ARM/2010-11-29-PrologueBug.ll | 2 +- test/CodeGen/ARM/2010-12-07-PEIBug.ll | 2 +- test/CodeGen/ARM/2011-08-25-ldmia_ret.ll | 2 +- test/CodeGen/ARM/arm-shrink-wrapping.ll | 24 +-- test/CodeGen/ARM/call-tc.ll | 20 +-- test/CodeGen/ARM/cxx-tlscc.ll | 12 +- test/CodeGen/ARM/debug-frame-large-stack.ll | 28 ++-- test/CodeGen/ARM/debug-info-arg.ll | 2 +- test/CodeGen/ARM/dwarf-unwind.ll | 12 +- test/CodeGen/ARM/fast-isel-frameaddr.ll | 24 +-- test/CodeGen/ARM/hello.ll | 7 +- test/CodeGen/ARM/ifcvt-iter-indbr.ll | 17 +- test/CodeGen/ARM/ifcvt10.ll | 2 +- test/CodeGen/ARM/ifcvt5.ll | 4 +- test/CodeGen/ARM/insn-sched1.ll | 2 +- test/CodeGen/ARM/ldrd.ll | 18 +-- test/CodeGen/ARM/lsr-unfolded-offset.ll | 2 +- test/CodeGen/ARM/memfunc.ll | 58 +++---- test/CodeGen/ARM/noreturn.ll | 57 ++++++- test/CodeGen/ARM/swiftself.ll | 12 +- test/CodeGen/ARM/v7k-abi-align.ll | 22 +-- test/CodeGen/ARM/warn-stack.ll | 4 +- test/CodeGen/Thumb/large-stack.ll | 67 ++++++-- test/CodeGen/Thumb/push.ll | 2 +- test/CodeGen/Thumb2/2009-07-21-ISelBug.ll | 2 +- test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll | 2 +- test/CodeGen/Thumb2/aligned-spill.ll | 6 +- test/CodeGen/Thumb2/frame-pointer.ll | 152 ++++++++++++++++++ test/CodeGen/Thumb2/thumb2-ldm.ll | 8 +- test/DebugInfo/ARM/PR16736.ll | 2 +- 35 files changed, 452 insertions(+), 196 deletions(-) create mode 100644 test/CodeGen/Thumb2/frame-pointer.ll diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index d4ff475cbab..2f8b56032cc 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -49,18 +49,13 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo() : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {} static unsigned getFramePointerReg(const ARMSubtarget &STI) { - if (STI.isTargetMachO()) - return ARM::R7; - else if (STI.isTargetWindows()) - return ARM::R11; - else // ARM EABI - return STI.isThumb() ? ARM::R7 : ARM::R11; + return STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11; } const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const ARMSubtarget &STI = MF->getSubtarget(); - bool UseSplitPush = STI.splitFramePushPop(); + bool UseSplitPush = STI.splitFramePushPop(*MF); const MCPhysReg *RegList = STI.isTargetDarwin() ? CSR_iOS_SaveList diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 4c8646c2d73..61be18c2b54 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -57,16 +57,14 @@ bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const { /// or if frame pointer elimination is disabled. bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); - // iOS requires FP not to be clobbered for backtracing purpose. - if (STI.isTargetIOS() || STI.isTargetWatchOS()) + // ABI-required frame pointer. + if (MF.getTarget().Options.DisableFramePointerElim(MF)) return true; - const MachineFrameInfo &MFI = MF.getFrameInfo(); - // Always eliminate non-leaf frame pointers. - return ((MF.getTarget().Options.DisableFramePointerElim(MF) && - MFI.hasCalls()) || - RegInfo->needsStackRealignment(MF) || + // Frame pointer required for use within this function. + return (RegInfo->needsStackRealignment(MF) || MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken()); } @@ -352,7 +350,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) { + if (STI.splitFramePushPop(MF)) { GPRCS2Size += 4; break; } @@ -557,7 +555,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) + if (STI.splitFramePushPop(MF)) break; LLVM_FALLTHROUGH; case ARM::R0: @@ -590,7 +588,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) { + if (STI.splitFramePushPop(MF)) { unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); unsigned Offset = MFI.getObjectOffset(FI); unsigned CFIIndex = MMI.addFrameInst( @@ -902,7 +900,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned LastReg = 0; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.splitFramePushPop())) continue; + if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue; // D-registers in the aligned area DPRCS2 are NOT spilled here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -983,7 +981,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.splitFramePushPop())) continue; + if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue; // The aligned reloads from area DPRCS2 are not inserted here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -1547,7 +1545,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, if (Spilled) { NumGPRSpills++; - if (!STI.splitFramePushPop()) { + if (!STI.splitFramePushPop(MF)) { if (Reg == ARM::LR) LRSpilled = true; CS1Spilled = true; @@ -1569,7 +1567,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, break; } } else { - if (!STI.splitFramePushPop()) { + if (!STI.splitFramePushPop(MF)) { UnspilledCS1GPRs.push_back(Reg); continue; } @@ -1634,6 +1632,23 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); + if (hasFP(MF)) { + SavedRegs.set(FramePtr); + // If the frame pointer is required by the ABI, also spill LR so that we + // emit a complete frame record. + if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) { + SavedRegs.set(ARM::LR); + LRSpilled = true; + NumGPRSpills++; + } + auto FPPos = find(UnspilledCS1GPRs, FramePtr); + if (FPPos != UnspilledCS1GPRs.end()) + UnspilledCS1GPRs.erase(FPPos); + NumGPRSpills++; + if (FramePtr == ARM::R7) + CS1Spilled = true; + } + // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. // Spill LR as well so we can fold BX_RET to the registers restore (LDM). if (!LRSpilled && CS1Spilled) { @@ -1648,14 +1663,6 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, ExtraCSSpill = true; } - if (hasFP(MF)) { - SavedRegs.set(FramePtr); - auto FPPos = find(UnspilledCS1GPRs, FramePtr); - if (FPPos != UnspilledCS1GPRs.end()) - UnspilledCS1GPRs.erase(FPPos); - NumGPRSpills++; - } - // If stack and double are 8-byte aligned and we are spilling an odd number // of GPRs, spill one extra callee save GPR so we won't have to pad between // the integer and double callee save areas. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 97bce6fa8ff..2c14eb094b5 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -560,11 +560,15 @@ public: return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; } + bool useR7AsFramePointer() const { + return isTargetDarwin() || (!isTargetWindows() && isThumb()); + } /// Returns true if the frame setup is split into two separate pushes (first /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent /// to lr. - bool splitFramePushPop() const { - return isTargetMachO(); + bool splitFramePushPop(const MachineFunction &MF) const { + return useR7AsFramePointer() && + MF.getTarget().Options.DisableFramePointerElim(MF); } bool useStride4VFPs(const MachineFunction &MF) const; diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index f2760e6d751..b3f26165a87 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -150,7 +150,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, case ARM::R9: case ARM::R10: case ARM::R11: - if (STI.splitFramePushPop()) { + if (STI.splitFramePushPop(MF)) { GPRCS2Size += 4; break; } @@ -212,7 +212,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) + if (STI.splitFramePushPop(MF)) break; // fallthough case ARM::R0: diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll index 4ba81e01ad3..991051caea7 100644 --- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll +++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll @@ -9,7 +9,7 @@ @A = external global [4 x [4 x i32]] ; <[4 x [4 x i32]]*> [#uses=1] ; CHECK-LABEL: dct_luma_sp: -define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) { +define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) "no-frame-pointer-elim"="true" { entry: ; Make sure to use base-updating stores for saving callee-saved registers. ; CHECK: push diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll index 15e17b4fd0f..79315ab59d4 100644 --- a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll +++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s ; rdar://8690640 -define i32* @t(i32* %x) nounwind { +define i32* @t(i32* %x) nounwind "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: t: ; CHECK: push diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll index 4baee64962c..340e3f8a80e 100644 --- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll +++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s ; rdar://8728956 -define hidden void @foo() nounwind ssp { +define hidden void @foo() nounwind ssp "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: foo: ; CHECK: mov r7, sp diff --git a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll index e70f973d6a7..f4d1b4de7c5 100644 --- a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll +++ b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll @@ -14,7 +14,7 @@ declare i1 @getbool() declare void @foo(i32) declare i32 @bar(i32) -define i32 @test(i32 %in1, i32 %in2) nounwind { +define i32 @test(i32 %in1, i32 %in2) nounwind "no-frame-pointer-elim"="true" { entry: %call = tail call zeroext i1 @getbool() nounwind br i1 %call, label %sw.bb18, label %sw.bb2 diff --git a/test/CodeGen/ARM/arm-shrink-wrapping.ll b/test/CodeGen/ARM/arm-shrink-wrapping.ll index 93572b1e63d..4ab090f22b7 100644 --- a/test/CodeGen/ARM/arm-shrink-wrapping.ll +++ b/test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -59,7 +59,7 @@ ; DISABLE-NEXT: pop {r7, pc} ; ; ENABLE-NEXT: bx lr -define i32 @foo(i32 %a, i32 %b) { +define i32 @foo(i32 %a, i32 %b) "no-frame-pointer-elim"="true" { %tmp = alloca i32, align 4 %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false @@ -124,7 +124,7 @@ declare i32 @doSomething(i32, i32*) ; DISABLE-NEXT: pop {r4, r7, pc} ; ; ENABLE-NEXT: bx lr -define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { +define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -178,7 +178,7 @@ declare i32 @something(...) ; CHECK: @ %for.exit ; CHECK: nop ; CHECK: pop {r4 -define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { +define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) "no-frame-pointer-elim"="true" { entry: br label %for.preheader @@ -248,7 +248,7 @@ for.end: ; preds = %for.body ; DISABLE-NEXT: pop {r4, r7, pc} ; ; ENABLE-NEXT: bx lr -define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { +define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -327,7 +327,7 @@ declare void @somethingElse(...) ; DISABLE-NEXT: pop {r4, r7, pc} ; ; ENABLE-NEXT: bx lr -define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { +define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" #0 { entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then @@ -405,7 +405,7 @@ entry: ; DISABLE-NEXT: pop {r4, r7, pc} ; ; ENABLE-NEXT: bx lr -define i32 @inlineAsm(i32 %cond, i32 %N) { +define i32 @inlineAsm(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -474,7 +474,7 @@ if.end: ; preds = %for.body, %if.else ; ARM-DISABLE-NEXT: mov sp, r7 ; THUMB-DISABLE-NEXT: add sp, #12 ; DISABLE-NEXT: pop {r7, pc} -define i32 @callVariadicFunc(i32 %cond, i32 %N) { +define i32 @callVariadicFunc(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then @@ -521,7 +521,7 @@ declare i32 @someVariadicFunc(i32, ...) ; ; CHECK: bl{{x?}} _abort ; ENABLE-NOT: pop -define i32 @noreturn(i8 signext %bad_thing) { +define i32 @noreturn(i8 signext %bad_thing) "no-frame-pointer-elim"="true" { entry: %tobool = icmp eq i8 %bad_thing, 0 br i1 %tobool, label %if.end, label %if.abort @@ -548,7 +548,7 @@ attributes #0 = { noreturn nounwind } ; The only condition for this test is the compilation finishes correctly. ; CHECK-LABEL: infiniteloop ; CHECK: pop -define void @infiniteloop() { +define void @infiniteloop() "no-frame-pointer-elim"="true" { entry: br i1 undef, label %if.then, label %if.end @@ -570,7 +570,7 @@ if.end: ; Another infinite loop test this time with a body bigger than just one block. ; CHECK-LABEL: infiniteloop2 ; CHECK: pop -define void @infiniteloop2() { +define void @infiniteloop2() "no-frame-pointer-elim"="true" { entry: br i1 undef, label %if.then, label %if.end @@ -600,7 +600,7 @@ if.end: ; Another infinite loop test this time with two nested infinite loop. ; CHECK-LABEL: infiniteloop3 ; CHECK: bx lr -define void @infiniteloop3() { +define void @infiniteloop3() "no-frame-pointer-elim"="true" { entry: br i1 undef, label %loop2a, label %body @@ -657,7 +657,7 @@ declare double @llvm.pow.f64(double, double) ; DISABLE: pop ; ; CHECK: bl -define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %tmp) { +define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %tmp) "no-frame-pointer-elim"="true" { bb: br i1 %or.cond, label %bb3, label %bb13 diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll index 53fa8920ec0..2277a585336 100644 --- a/test/CodeGen/ARM/call-tc.ll +++ b/test/CodeGen/ARM/call-tc.ll @@ -10,14 +10,14 @@ declare void @g(i32, i32, i32, i32) -define void @t1() { +define void @t1() "no-frame-pointer-elim"="true" { ; CHECKELF-LABEL: t1: ; CHECKELF: bl g call void @g( i32 1, i32 2, i32 3, i32 4 ) ret void } -define void @t2() { +define void @t2() "no-frame-pointer-elim"="true" { ; CHECKV6-LABEL: t2: ; CHECKV6: bx r0 ; CHECKT2D-LABEL: t2: @@ -29,7 +29,7 @@ define void @t2() { ret void } -define void @t3() { +define void @t3() "no-frame-pointer-elim"="true" { ; CHECKV6-LABEL: t3: ; CHECKV6: b _t2 ; CHECKELF-LABEL: t3: @@ -42,7 +42,7 @@ define void @t3() { } ; Sibcall optimization of expanded libcalls. rdar://8707777 -define double @t4(double %a) nounwind readonly ssp { +define double @t4(double %a) nounwind readonly ssp "no-frame-pointer-elim"="true" { entry: ; CHECKV6-LABEL: t4: ; CHECKV6: b _sin @@ -52,7 +52,7 @@ entry: ret double %0 } -define float @t5(float %a) nounwind readonly ssp { +define float @t5(float %a) nounwind readonly ssp "no-frame-pointer-elim"="true" { entry: ; CHECKV6-LABEL: t5: ; CHECKV6: b _sinf @@ -66,7 +66,7 @@ declare float @sinf(float) nounwind readonly declare double @sin(double) nounwind readonly -define i32 @t6(i32 %a, i32 %b) nounwind readnone { +define i32 @t6(i32 %a, i32 %b) nounwind readnone "no-frame-pointer-elim"="true" { entry: ; CHECKV6-LABEL: t6: ; CHECKV6: b ___divsi3 @@ -80,7 +80,7 @@ entry: ; rdar://8309338 declare void @foo() nounwind -define void @t7() nounwind { +define void @t7() nounwind "no-frame-pointer-elim"="true" { entry: ; CHECKT2D-LABEL: t7: ; CHECKT2D: it ne @@ -101,7 +101,7 @@ bb: ; Make sure codegenprep is duplicating ret instructions to enable tail calls. ; rdar://11140249 -define i32 @t8(i32 %x) nounwind ssp { +define i32 @t8(i32 %x) nounwind ssp "no-frame-pointer-elim"="true" { entry: ; CHECKT2D-LABEL: t8: ; CHECKT2D-NOT: push @@ -148,7 +148,7 @@ declare i32 @c(i32) @x = external global i32, align 4 -define i32 @t9() nounwind { +define i32 @t9() nounwind "no-frame-pointer-elim"="true" { ; CHECKT2D-LABEL: t9: ; CHECKT2D: bl __ZN9MutexLockC1Ev ; CHECKT2D: bl __ZN9MutexLockD1Ev @@ -168,7 +168,7 @@ declare %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock*) unnamed_addr nou ; rdar://13827621 ; Correctly preserve the input chain for the tailcall node in the bitcast case, ; otherwise the call to floorf is lost. -define float @libcall_tc_test2(float* nocapture %a, float %b) { +define float @libcall_tc_test2(float* nocapture %a, float %b) "no-frame-pointer-elim"="true" { ; CHECKT2D-LABEL: libcall_tc_test2: ; CHECKT2D: bl _floorf ; CHECKT2D: b.w _truncf diff --git a/test/CodeGen/ARM/cxx-tlscc.ll b/test/CodeGen/ARM/cxx-tlscc.ll index 5d017bbeebc..6a5aa12ac5a 100644 --- a/test/CodeGen/ARM/cxx-tlscc.ll +++ b/test/CodeGen/ARM/cxx-tlscc.ll @@ -33,7 +33,7 @@ declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*) ; THUMB: blx ; THUMB: r4 ; THUMB: pop {{.*}}r4 -define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind { +define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind "no-frame-pointer-elim"="true" { %.b.i = load i1, i1* @__tls_guard, align 1 br i1 %.b.i, label %__tls_init.exit, label %init.i @@ -95,7 +95,7 @@ __tls_init.exit: ; CHECK-O0-NOT: vpop ; CHECK-O0-NOT: vldr ; CHECK-O0: pop -define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { +define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind "no-frame-pointer-elim"="true" { ret i32* @sum1 } @@ -109,7 +109,7 @@ define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { ; CHECK-O0-NOT: vldr ; CHECK-O0: pop declare cxx_fast_tlscc void @tls_helper() -define cxx_fast_tlscc %class.C* @tls_test2() #1 { +define cxx_fast_tlscc %class.C* @tls_test2() #1 "no-frame-pointer-elim"="true" { call cxx_fast_tlscc void @tls_helper() ret %class.C* @tC } @@ -119,7 +119,7 @@ define cxx_fast_tlscc %class.C* @tls_test2() #1 { declare %class.C* @_ZN1CD1Ev(%class.C* readnone returned %this) ; CHECK-LABEL: tls_test ; CHECK: bl __tlv_atexit -define cxx_fast_tlscc void @__tls_test() { +define cxx_fast_tlscc void @__tls_test() "no-frame-pointer-elim"="true" { entry: store i32 0, i32* getelementptr inbounds (%class.C, %class.C* @tC, i64 0, i32 0), align 4 %0 = tail call i32 @_tlv_atexit(void (i8*)* bitcast (%class.C* (%class.C*)* @_ZN1CD1Ev to void (i8*)*), i8* bitcast (%class.C* @tC to i8*), i8* nonnull @__dso_handle) #1 @@ -127,7 +127,7 @@ entry: } declare void @somefunc() -define cxx_fast_tlscc void @test_ccmismatch_notail() { +define cxx_fast_tlscc void @test_ccmismatch_notail() "no-frame-pointer-elim"="true" { ; A tail call is not possible here because somefunc does not preserve enough ; registers. ; CHECK-LABEL: test_ccmismatch_notail: @@ -138,7 +138,7 @@ define cxx_fast_tlscc void @test_ccmismatch_notail() { } declare cxx_fast_tlscc void @some_fast_tls_func() -define void @test_ccmismatch_tail() { +define void @test_ccmismatch_tail() "no-frame-pointer-elim"="true" { ; We can perform a tail call here because some_fast_tls_func preserves all ; necessary registers (and more). ; CHECK-LABEL: test_ccmismatch_tail: diff --git a/test/CodeGen/ARM/debug-frame-large-stack.ll b/test/CodeGen/ARM/debug-frame-large-stack.ll index 1f814e70d54..22869c21193 100644 --- a/test/CodeGen/ARM/debug-frame-large-stack.ll +++ b/test/CodeGen/ARM/debug-frame-large-stack.ll @@ -23,13 +23,16 @@ define void @test2() { ; CHECK-ARM-LABEL: test2: ; CHECK-ARM: .cfi_startproc -; CHECK-ARM: push {r4, r5} -; CHECK-ARM: .cfi_def_cfa_offset 8 -; CHECK-ARM: .cfi_offset r5, -4 -; CHECK-ARM: .cfi_offset r4, -8 +; CHECK-ARM: push {r4, r5, r11, lr} +; CHECK-ARM: .cfi_def_cfa_offset 16 +; CHECK-ARM: .cfi_offset lr, -4 +; CHECK-ARM: .cfi_offset r11, -8 +; CHECK-ARM: .cfi_offset r5, -12 +; CHECK-ARM: .cfi_offset r4, -16 +; CHECK-ARM: add r11, sp, #8 +; CHECK-ARM: .cfi_def_cfa r11, 8 ; CHECK-ARM: sub sp, sp, #72 ; CHECK-ARM: sub sp, sp, #4096 -; CHECK-ARM: .cfi_def_cfa_offset 4176 ; CHECK-ARM: .cfi_endproc ; CHECK-ARM-FP_ELIM-LABEL: test2: @@ -54,14 +57,15 @@ define i32 @test3() { ; CHECK-ARM-LABEL: test3: ; CHECK-ARM: .cfi_startproc -; CHECK-ARM: push {r4, r5, r11} -; CHECK-ARM: .cfi_def_cfa_offset 12 -; CHECK-ARM: .cfi_offset r11, -4 -; CHECK-ARM: .cfi_offset r5, -8 -; CHECK-ARM: .cfi_offset r4, -12 +; CHECK-ARM: push {r4, r5, r11, lr} +; CHECK-ARM: .cfi_def_cfa_offset 16 +; CHECK-ARM: .cfi_offset lr, -4 +; CHECK-ARM: .cfi_offset r11, -8 +; CHECK-ARM: .cfi_offset r5, -12 +; CHECK-ARM: .cfi_offset r4, -16 ; CHECK-ARM: add r11, sp, #8 -; CHECK-ARM: .cfi_def_cfa r11, 4 -; CHECK-ARM: sub sp, sp, #20 +; CHECK-ARM: .cfi_def_cfa r11, 8 +; CHECK-ARM: sub sp, sp, #16 ; CHECK-ARM: sub sp, sp, #805306368 ; CHECK-ARM: bic sp, sp, #15 ; CHECK-ARM: .cfi_endproc diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll index 9dd820134dd..d80788dad73 100644 --- a/test/CodeGen/ARM/debug-info-arg.ll +++ b/test/CodeGen/ARM/debug-info-arg.ll @@ -6,7 +6,7 @@ target triple = "thumbv7-apple-ios" %struct.tag_s = type { i32, i32, i32 } -define void @foo(%struct.tag_s* nocapture %this, %struct.tag_s* %c, i64 %x, i64 %y, %struct.tag_s* nocapture %ptr1, %struct.tag_s* nocapture %ptr2) nounwind ssp !dbg !1 { +define void @foo(%struct.tag_s* nocapture %this, %struct.tag_s* %c, i64 %x, i64 %y, %struct.tag_s* nocapture %ptr1, %struct.tag_s* nocapture %ptr2) nounwind ssp "no-frame-pointer-elim"="true" !dbg !1 { tail call void @llvm.dbg.value(metadata %struct.tag_s* %this, i64 0, metadata !5, metadata !DIExpression()), !dbg !20 tail call void @llvm.dbg.value(metadata %struct.tag_s* %c, i64 0, metadata !13, metadata !DIExpression()), !dbg !21 tail call void @llvm.dbg.value(metadata i64 %x, i64 0, metadata !14, metadata !DIExpression()), !dbg !22 diff --git a/test/CodeGen/ARM/dwarf-unwind.ll b/test/CodeGen/ARM/dwarf-unwind.ll index 5256db86344..58a116bdeb0 100644 --- a/test/CodeGen/ARM/dwarf-unwind.ll +++ b/test/CodeGen/ARM/dwarf-unwind.ll @@ -71,12 +71,14 @@ define void @test_nodpr_noalign(i8 %l, i8 %r) { define void @test_frame_pointer_offset() minsize "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_frame_pointer_offset: -; CHECK: push.w {r3, r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK: .cfi_def_cfa_offset 40 -; CHECK: add r7, sp, #16 -; CHECK: .cfi_def_cfa r7, 24 +; CHECK: push {r4, r5, r6, r7, lr} +; CHECK: .cfi_def_cfa_offset 20 +; CHECK: add r7, sp, #12 +; CHECK: .cfi_def_cfa r7, 8 +; CHECK-NOT: .cfi_def_cfa_offset +; CHECK: push.w {r7, r8, r9, r10, r11} ; CHECK-NOT: .cfi_def_cfa_offset call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{d8}"() call void @bar() ret void -} \ No newline at end of file +} diff --git a/test/CodeGen/ARM/fast-isel-frameaddr.ll b/test/CodeGen/ARM/fast-isel-frameaddr.ll index 75d582f4ee3..ff00cd887fb 100644 --- a/test/CodeGen/ARM/fast-isel-frameaddr.ll +++ b/test/CodeGen/ARM/fast-isel-frameaddr.ll @@ -6,22 +6,22 @@ define i8* @frameaddr_index0() nounwind { entry: ; DARWIN-ARM-LABEL: frameaddr_index0: -; DARWIN-ARM: push {r7} +; DARWIN-ARM: push {r7, lr} ; DARWIN-ARM: mov r7, sp ; DARWIN-ARM: mov r0, r7 ; DARWIN-THUMB2-LABEL: frameaddr_index0: -; DARWIN-THUMB2: str r7, [sp, #-4]! +; DARWIN-THUMB2: push {r7, lr} ; DARWIN-THUMB2: mov r7, sp ; DARWIN-THUMB2: mov r0, r7 ; LINUX-ARM-LABEL: frameaddr_index0: -; LINUX-ARM: push {r11} +; LINUX-ARM: push {r11, lr} ; LINUX-ARM: mov r11, sp ; LINUX-ARM: mov r0, r11 ; LINUX-THUMB2-LABEL: frameaddr_index0: -; LINUX-THUMB2: str r7, [sp, #-4]! +; LINUX-THUMB2: push {r7, lr} ; LINUX-THUMB2: mov r7, sp ; LINUX-THUMB2: mov r0, r7 @@ -32,22 +32,22 @@ entry: define i8* @frameaddr_index1() nounwind { entry: ; DARWIN-ARM-LABEL: frameaddr_index1: -; DARWIN-ARM: push {r7} +; DARWIN-ARM: push {r7, lr} ; DARWIN-ARM: mov r7, sp ; DARWIN-ARM: ldr r0, [r7] ; DARWIN-THUMB2-LABEL: frameaddr_index1: -; DARWIN-THUMB2: str r7, [sp, #-4]! +; DARWIN-THUMB2: push {r7, lr} ; DARWIN-THUMB2: mov r7, sp ; DARWIN-THUMB2: ldr r0, [r7] ; LINUX-ARM-LABEL: frameaddr_index1: -; LINUX-ARM: push {r11} +; LINUX-ARM: push {r11, lr} ; LINUX-ARM: mov r11, sp ; LINUX-ARM: ldr r0, [r11] ; LINUX-THUMB2-LABEL: frameaddr_index1: -; LINUX-THUMB2: str r7, [sp, #-4]! +; LINUX-THUMB2: push {r7, lr} ; LINUX-THUMB2: mov r7, sp ; LINUX-THUMB2: mov r0, r7 ; LINUX-THUMB2: ldr r0, [r0] @@ -59,28 +59,28 @@ entry: define i8* @frameaddr_index3() nounwind { entry: ; DARWIN-ARM-LABEL: frameaddr_index3: -; DARWIN-ARM: push {r7} +; DARWIN-ARM: push {r7, lr} ; DARWIN-ARM: mov r7, sp ; DARWIN-ARM: ldr r0, [r7] ; DARWIN-ARM: ldr r0, [r0] ; DARWIN-ARM: ldr r0, [r0] ; DARWIN-THUMB2-LABEL: frameaddr_index3: -; DARWIN-THUMB2: str r7, [sp, #-4]! +; DARWIN-THUMB2: push {r7, lr} ; DARWIN-THUMB2: mov r7, sp ; DARWIN-THUMB2: ldr r0, [r7] ; DARWIN-THUMB2: ldr r0, [r0] ; DARWIN-THUMB2: ldr r0, [r0] ; LINUX-ARM-LABEL: frameaddr_index3: -; LINUX-ARM: push {r11} +; LINUX-ARM: push {r11, lr} ; LINUX-ARM: mov r11, sp ; LINUX-ARM: ldr r0, [r11] ; LINUX-ARM: ldr r0, [r0] ; LINUX-ARM: ldr r0, [r0] ; LINUX-THUMB2-LABEL: frameaddr_index3: -; LINUX-THUMB2: str r7, [sp, #-4]! +; LINUX-THUMB2: push {r7, lr} ; LINUX-THUMB2: mov r7, sp ; LINUX-THUMB2: mov r0, r7 ; LINUX-THUMB2: ldr r0, [r0] diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll index b03a60ab60c..26410599493 100644 --- a/test/CodeGen/ARM/hello.ll +++ b/test/CodeGen/ARM/hello.ll @@ -9,7 +9,7 @@ @str = internal constant [12 x i8] c"Hello World\00" -define i32 @main() { +define i32 @main() "no-frame-pointer-elim"="true" { %tmp = call i32 @puts( i8* getelementptr ([12 x i8], [12 x i8]* @str, i32 0, i64 0) ) ; [#uses=0] ret i32 0 } @@ -17,7 +17,10 @@ define i32 @main() { declare i32 @puts(i8*) ; CHECK-LABEL: main -; CHECK: mov +; CHECK-NOT: mov +; CHECK: mov r11, sp +; CHECK-NOT: mov +; CHECK: mov r0, #0 ; CHECK-NOT: mov ; CHECK-FP-ELIM-LABEL: main diff --git a/test/CodeGen/ARM/ifcvt-iter-indbr.ll b/test/CodeGen/ARM/ifcvt-iter-indbr.ll index 967d6ebce27..73496257306 100644 --- a/test/CodeGen/ARM/ifcvt-iter-indbr.ll +++ b/test/CodeGen/ARM/ifcvt-iter-indbr.ll @@ -16,11 +16,12 @@ declare i8* @bar(i32, i8*, i8*) ; CHECK-NEXT: moveq pc ; CHECK-NEXT: LBB{{[0-9_]+}}: ; CHECK-NEXT: cmp {{.*}}, #42 -; CHECK-NEXT: itt ne -; CHECK-NEXT: strne.w -; CHECK-NEXT: movne pc +; CHECK-NEXT: beq [[CALL_FOO_1234:LBB[0-9_]+]] +; CHECK-NEXT: ldr {{.*}}[sp +; CHECK-NEXT: str +; CHECK-NEXT: mov pc ; CHECK-NEXT: Ltmp -; CHECK-NEXT: LBB0_2: +; CHECK-NEXT: [[CALL_FOO_1234]]: ; CHECK-NEXT: movw r0, #1234 ; CHECK-NEXT: b [[FOOCALL:LBB[0-9_]+]] ; CHECK-NEXT: Ltmp @@ -30,11 +31,11 @@ declare i8* @bar(i32, i8*, i8*) ; CHECK-NEXT: bl _foo ; ; CHECK-PROB: BB#0: -; CHECK-PROB: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}50.00%) BB#2({{[0-9a-fx/= ]+}}25.00%) BB#4({{[0-9a-fx/= ]+}}25.00%) -; CHECK-PROB: BB#1: -; CHECK-PROB: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}75.00%) BB#4({{[0-9a-fx/= ]+}}25.00%) +; CHECK-PROB: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}50.00%) BB#3({{[0-9a-fx/= ]+}}25.00%) BB#5({{[0-9a-fx/= ]+}}25.00%) +; CHECK-PROB: BB#2: +; CHECK-PROB: Successors according to CFG: BB#3({{[0-9a-fx/= ]+}}50.00%) BB#5({{[0-9a-fx/= ]+}}50.00%) -define i32 @test(i32 %a, i32 %a2, i32* %p, i32* %p2) { +define i32 @test(i32 %a, i32 %a2, i32* %p, i32* %p2) "no-frame-pointer-elim"="true" { entry: %dst1 = call i8* @bar(i32 1, i8* blockaddress(@test, %bb1), i8* blockaddress(@test, %bb2)) %dst2 = call i8* @bar(i32 2, i8* blockaddress(@test, %bb1), i8* blockaddress(@test, %bb2)) diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll index 509c182fc97..5725a404c32 100644 --- a/test/CodeGen/ARM/ifcvt10.ll +++ b/test/CodeGen/ARM/ifcvt10.ll @@ -4,7 +4,7 @@ ; micro-coded and would have long issue latency even if predicated on ; false predicate. -define void @t(double %a, double %b, double %c, double %d, i32* nocapture %solutions, double* nocapture %x) nounwind { +define void @t(double %a, double %b, double %c, double %d, i32* nocapture %solutions, double* nocapture %x) nounwind "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: t: ; CHECK: vpop {d8} diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll index 9fb8abde613..3819bc218a9 100644 --- a/test/CodeGen/ARM/ifcvt5.ll +++ b/test/CodeGen/ARM/ifcvt5.ll @@ -4,14 +4,14 @@ @x = external global i32* ; [#uses=1] -define void @foo(i32 %a) { +define void @foo(i32 %a) "no-frame-pointer-elim"="true" { entry: %tmp = load i32*, i32** @x ; [#uses=1] store i32 %a, i32* %tmp ret void } -define i32 @t1(i32 %a, i32 %b) { +define i32 @t1(i32 %a, i32 %b) "no-frame-pointer-elim"="true" { ; A8-LABEL: t1: ; A8: bxlt lr diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll index 2749a8e7cd2..120252d96d8 100644 --- a/test/CodeGen/ARM/insn-sched1.ll +++ b/test/CodeGen/ARM/insn-sched1.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o /dev/null ; RUN: llc -mtriple=arm-apple-ios -mattr=+v6 %s -o - | FileCheck %s -define i32 @test(i32 %x) { +define i32 @test(i32 %x) "no-frame-pointer-elim"="true" { %tmp = trunc i32 %x to i16 ; [#uses=1] %tmp2 = call i32 @f( i32 1, i16 %tmp ) ; [#uses=1] ret i32 %tmp2 diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll index dd97fbfd640..6a9e63f649c 100644 --- a/test/CodeGen/ARM/ldrd.ll +++ b/test/CodeGen/ARM/ldrd.ll @@ -15,7 +15,7 @@ declare i64* @get_ptr() declare void @use_i64(i64 %v) -define void @test_ldrd(i64 %a) nounwind readonly { +define void @test_ldrd(i64 %a) nounwind readonly "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_ldrd: ; NORMAL: bl{{x?}} _get_ptr ; A8: ldrd r0, r1, [r0] @@ -49,7 +49,7 @@ define void @test_ldrd(i64 %a) nounwind readonly { ; GREEDY: %bb ; GREEDY: ldrd ; GREEDY: str -define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind { +define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind "no-frame-pointer-elim"="true" { entry: %0 = add nsw i32 %n, -1 ; [#uses=2] %1 = icmp sgt i32 %0, 0 ; [#uses=1] @@ -79,7 +79,7 @@ return: ; preds = %bb, %entry @TestVar = external global %struct.Test ; CHECK-LABEL: Func1: -define void @Func1() nounwind ssp { +define void @Func1() nounwind ssp "no-frame-pointer-elim"="true" { entry: ; A8: movw [[BASE:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}} ; A8: movt [[BASE]], :upper16:{{.*}}TestVar{{.*}} @@ -104,7 +104,7 @@ declare void @extfunc(i32, i32, i32, i32) ; A8: ldrd ; CHECK: bl{{x?}} _extfunc ; A8: pop -define void @Func2(i32* %p) { +define void @Func2(i32* %p) "no-frame-pointer-elim"="true" { entry: %addr0 = getelementptr i32, i32* %p, i32 0 %addr1 = getelementptr i32, i32* %p, i32 1 @@ -129,7 +129,7 @@ entry: ; GREEDY: ldrd r1, r2, [sp] ; CONSERVATIVE: ldrd r1, r2, [sp] ; CHECK: bl{{x?}} _extfunc -define void @strd_spill_ldrd_reload(i32 %v0, i32 %v1) { +define void @strd_spill_ldrd_reload(i32 %v0, i32 %v1) "no-frame-pointer-elim"="true" { ; force %v0 and %v1 to be spilled call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{lr}"() ; force the reloaded %v0, %v1 into different registers @@ -143,7 +143,7 @@ declare void @extfunc2(i32*, i32, i32) ; NORMAL: ldrd r1, r2, [r0], #-8 ; CONSERVATIVE-NOT: ldrd ; CHECK: bl{{x?}} _extfunc -define void @ldrd_postupdate_dec(i32* %p0) { +define void @ldrd_postupdate_dec(i32* %p0) "no-frame-pointer-elim"="true" { %p0.1 = getelementptr i32, i32* %p0, i32 1 %v0 = load i32, i32* %p0 %v1 = load i32, i32* %p0.1 @@ -156,7 +156,7 @@ define void @ldrd_postupdate_dec(i32* %p0) { ; NORMAL: ldrd r1, r2, [r0], #8 ; CONSERVATIVE-NOT: ldrd ; CHECK: bl{{x?}} _extfunc -define void @ldrd_postupdate_inc(i32* %p0) { +define void @ldrd_postupdate_inc(i32* %p0) "no-frame-pointer-elim"="true" { %p0.1 = getelementptr i32, i32* %p0, i32 1 %v0 = load i32, i32* %p0 %v1 = load i32, i32* %p0.1 @@ -169,7 +169,7 @@ define void @ldrd_postupdate_inc(i32* %p0) { ; NORMAL: strd r1, r2, [r0], #-8 ; CONSERVATIVE-NOT: strd ; CHECK: bx lr -define i32* @strd_postupdate_dec(i32* %p0, i32 %v0, i32 %v1) { +define i32* @strd_postupdate_dec(i32* %p0, i32 %v0, i32 %v1) "no-frame-pointer-elim"="true" { %p0.1 = getelementptr i32, i32* %p0, i32 1 store i32 %v0, i32* %p0 store i32 %v1, i32* %p0.1 @@ -181,7 +181,7 @@ define i32* @strd_postupdate_dec(i32* %p0, i32 %v0, i32 %v1) { ; NORMAL: strd r1, r2, [r0], #8 ; CONSERVATIVE-NOT: strd ; CHECK: bx lr -define i32* @strd_postupdate_inc(i32* %p0, i32 %v0, i32 %v1) { +define i32* @strd_postupdate_inc(i32* %p0, i32 %v0, i32 %v1) "no-frame-pointer-elim"="true" { %p0.1 = getelementptr i32, i32* %p0, i32 1 store i32 %v0, i32* %p0 store i32 %v1, i32* %p0.1 diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll index 17292cfe289..c4fe8dc6487 100644 --- a/test/CodeGen/ARM/lsr-unfolded-offset.ll +++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll @@ -15,7 +15,7 @@ target triple = "thumbv7-apple-ios" %struct.partition_entry = type { i32, i32, i64, i64 } -define i32 @partition_overlap_check(%struct.partition_entry* nocapture %part, i32 %num_entries) nounwind readonly optsize ssp { +define i32 @partition_overlap_check(%struct.partition_entry* nocapture %part, i32 %num_entries) nounwind readonly optsize ssp "no-frame-pointer-elim"="true" { entry: %cmp79 = icmp sgt i32 %num_entries, 0 br i1 %cmp79, label %outer.loop, label %for.end72 diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll index bc60d8e4f0e..ed6746290b7 100644 --- a/test/CodeGen/ARM/memfunc.ll +++ b/test/CodeGen/ARM/memfunc.ll @@ -8,7 +8,7 @@ ; RUN: llc < %s -mtriple=arm-none-musleabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI --check-prefix=CHECK ; RUN: llc < %s -mtriple=arm-none-musleabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI --check-prefix=CHECK -define void @f1(i8* %dest, i8* %src) { +define void @f1(i8* %dest, i8* %src) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f1 @@ -98,7 +98,7 @@ entry: } ; Check that alloca arguments to memory intrinsics are automatically aligned if at least 8 bytes in size -define void @f2(i8* %dest, i32 %n) { +define void @f2(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f2 @@ -107,9 +107,9 @@ entry: ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: add r1, sp, #28 ; CHECK-DARWIN: bl _memmove - ; CHECK-EABI: add r1, sp, #28 + ; CHECK-EABI: {{add r1, sp, #28|sub r1, r(7|11), #20}} ; CHECK-EABI: bl __aeabi_memmove - ; CHECK-GNUEABI: add r1, sp, #28 + ; CHECK-GNUEABI: {{add r1, sp, #28|sub r1, r(7|11), #20}} ; CHECK-GNUEABI: bl memmove %arr0 = alloca [9 x i8], align 1 %0 = bitcast [9 x i8]* %arr0 to i8* @@ -144,11 +144,11 @@ entry: } ; Check that alloca arguments are not aligned if less than 8 bytes in size -define void @f3(i8* %dest, i32 %n) { +define void @f3(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f3 - ; CHECK: {{add(.w)? r1, sp, #17|sub(.w)? r1, r7, #15}} + ; CHECK: {{add(.w)? r1, sp, #17|sub(.w)? r1, r(7|11), #15}} ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove @@ -157,7 +157,7 @@ entry: %0 = bitcast [7 x i8]* %arr0 to i8* call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r1, sp, #10}} + ; CHECK: {{add(.w)? r1, sp, #10|sub(.w)? r1, r(7|11), #22}} ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy @@ -166,7 +166,7 @@ entry: %1 = bitcast [7 x i8]* %arr1 to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r0, sp, #3}} + ; CHECK: {{add(.w)? r0, sp, #3|sub(.w)? r0, r(7|11), #29}} ; CHECK-IOS: mov r1, #1 ; CHECK-IOS: bl _memset ; CHECK-DARWIN: movs r1, #1 @@ -183,11 +183,11 @@ entry: } ; Check that alloca arguments are not aligned if size+offset is less than 8 bytes -define void @f4(i8* %dest, i32 %n) { +define void @f4(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f4 - ; CHECK: {{add(.w)? r., sp, #23|sub(.w)? r., r7, #17}} + ; CHECK: {{add(.w)? r., sp, #23|sub(.w)? r., r(7|11), #17}} ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove @@ -196,7 +196,7 @@ entry: %0 = getelementptr inbounds [9 x i8], [9 x i8]* %arr0, i32 0, i32 4 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(10|14)}} + ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w) r., r(7|11), #26}} ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy @@ -205,7 +205,7 @@ entry: %1 = getelementptr inbounds [9 x i8], [9 x i8]* %arr1, i32 0, i32 4 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(1|5)}} + ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w) r., r(7|11), #35}} ; CHECK-IOS: mov r1, #1 ; CHECK-IOS: bl _memset ; CHECK-DARWIN: movs r1, #1 @@ -222,11 +222,11 @@ entry: } ; Check that alloca arguments are not aligned if the offset is not a multiple of 4 -define void @f5(i8* %dest, i32 %n) { +define void @f5(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f5 - ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}} + ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r(7|11), #21}} ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove @@ -235,7 +235,7 @@ entry: %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 1 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(10|14)}} + ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy @@ -244,7 +244,7 @@ entry: %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 1 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(1|5)}} + ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 ; CHECK-IOS: bl _memset ; CHECK-DARWIN: movs r1, #1 @@ -261,11 +261,11 @@ entry: } ; Check that alloca arguments are not aligned if the offset is unknown -define void @f6(i8* %dest, i32 %n, i32 %i) { +define void @f6(i8* %dest, i32 %n, i32 %i) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f6 - ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #25}} + ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r(7|11), #(25|29)}} ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove @@ -274,7 +274,7 @@ entry: %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 %i call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(10|14)}} + ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #42}} ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy @@ -283,7 +283,7 @@ entry: %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 %i call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(1|5)}} + ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #55}} ; CHECK-IOS: mov r1, #1 ; CHECK-IOS: bl _memset ; CHECK-DARWIN: movs r1, #1 @@ -300,11 +300,11 @@ entry: } ; Check that alloca arguments are not aligned if the GEP is not inbounds -define void @f7(i8* %dest, i32 %n) { +define void @f7(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f7 - ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}} + ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r(7|11), #21}} ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove @@ -313,7 +313,7 @@ entry: %0 = getelementptr [13 x i8], [13 x i8]* %arr0, i32 0, i32 4 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(10|14)}} + ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy @@ -322,7 +322,7 @@ entry: %1 = getelementptr [13 x i8], [13 x i8]* %arr1, i32 0, i32 4 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(1|5)}} + ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 ; CHECK-IOS: bl _memset ; CHECK-DARWIN: movs r1, #1 @@ -339,11 +339,11 @@ entry: } ; Check that alloca arguments are not aligned when the offset is past the end of the allocation -define void @f8(i8* %dest, i32 %n) { +define void @f8(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: f8 - ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}} + ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r(7|11), #21}} ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove @@ -352,7 +352,7 @@ entry: %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 16 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(10|14)}} + ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy @@ -361,7 +361,7 @@ entry: %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 16 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) - ; CHECK: {{add(.w)? r., sp, #(1|5)}} + ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 ; CHECK-IOS: bl _memset ; CHECK-DARWIN: movs r1, #1 @@ -388,7 +388,7 @@ entry: @arr7 = external global [7 x i8], align 1 @arr8 = internal global [128 x i8] undef @arr9 = weak_odr global [128 x i8] undef -define void @f9(i8* %dest, i32 %n) { +define void @f9(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr1, i32 0, i32 0), i32 %n, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr2, i32 0, i32 0), i32 %n, i32 1, i1 false) diff --git a/test/CodeGen/ARM/noreturn.ll b/test/CodeGen/ARM/noreturn.ll index edc3333455d..f242afb9953 100644 --- a/test/CodeGen/ARM/noreturn.ll +++ b/test/CodeGen/ARM/noreturn.ll @@ -1,6 +1,6 @@ ; RUN: llc -O3 -o - %s | FileCheck %s ; Test case from PR16882. -target triple = "thumbv7s-apple-ios" +target triple = "thumbv7a-none-eabi" define i32 @test1() { ; CHECK-LABEL: @test1 @@ -60,6 +60,61 @@ entry: unreachable } + +define i32 @test1_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: @test1_nofpelim +; CHECK: push +entry: + tail call void @overflow() #0 + unreachable +} + +define i32 @test2_nofpelim(i32 %x, i32 %y) "no-frame-pointer-elim"="true" { +; CHECK-LABEL: @test2_nofpelim +; CHECK: push +entry: + %conv = sext i32 %x to i64 + %conv1 = sext i32 %y to i64 + %mul = mul nsw i64 %conv1, %conv + %conv2 = trunc i64 %mul to i32 + %conv3 = sext i32 %conv2 to i64 + %cmp = icmp eq i64 %mul, %conv3 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + tail call void @overflow() #0 + unreachable + +if.end: ; preds = %entry + ret i32 %conv2 +} + +; Test case for PR17825. +define i32 @test3_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: @test3_nofpelim +; CHECK: push +entry: + tail call void @overflow_with_unwind() #1 + unreachable +} + +; Test case for uwtable +define i32 @test4_nofpelim() uwtable "no-frame-pointer-elim"="true" { +; CHECK-LABEL: @test4_nofpelim +; CHECK: push +entry: + tail call void @overflow() #0 + unreachable +} + +define i32 @test5_nofpelim() uwtable "no-frame-pointer-elim"="true" { +; CHECK-LABEL: @test5_nofpelim +; CHECK: push +entry: + tail call void @overflow_with_unwind() #1 + unreachable +} + ; Function Attrs: noreturn declare void @overflow_with_unwind() #1 diff --git a/test/CodeGen/ARM/swiftself.ll b/test/CodeGen/ARM/swiftself.ll index 6826b123472..b7a04ca4060 100644 --- a/test/CodeGen/ARM/swiftself.ll +++ b/test/CodeGen/ARM/swiftself.ll @@ -7,7 +7,7 @@ ; Parameter with swiftself should be allocated to r10. ; CHECK-LABEL: swiftself_param: ; CHECK: mov r0, r10 -define i8 *@swiftself_param(i8* swiftself %addr0) { +define i8 *@swiftself_param(i8* swiftself %addr0) "no-frame-pointer-elim"="true" { ret i8 *%addr0 } @@ -15,7 +15,7 @@ define i8 *@swiftself_param(i8* swiftself %addr0) { ; CHECK-LABEL: call_swiftself: ; CHECK: mov r10, r0 ; CHECK: bl {{_?}}swiftself_param -define i8 *@call_swiftself(i8* %arg) { +define i8 *@call_swiftself(i8* %arg) "no-frame-pointer-elim"="true" { %res = call i8 *@swiftself_param(i8* swiftself %arg) ret i8 *%res } @@ -25,7 +25,7 @@ define i8 *@call_swiftself(i8* %arg) { ; CHECK: push {r10} ; ... ; CHECK: pop {r10} -define i8 *@swiftself_clobber(i8* swiftself %addr0) { +define i8 *@swiftself_clobber(i8* swiftself %addr0) "no-frame-pointer-elim"="true" { call void asm sideeffect "", "~{r10}"() ret i8 *%addr0 } @@ -37,7 +37,7 @@ define i8 *@swiftself_clobber(i8* swiftself %addr0) { ; OPT: bl {{_?}}swiftself_param ; OPT-NOT: mov{{.*}}r10 ; OPT-NEXT: bl {{_?}}swiftself_param -define void @swiftself_passthrough(i8* swiftself %addr0) { +define void @swiftself_passthrough(i8* swiftself %addr0) "no-frame-pointer-elim"="true" { call i8 *@swiftself_param(i8* swiftself %addr0) call i8 *@swiftself_param(i8* swiftself %addr0) ret void @@ -47,7 +47,7 @@ define void @swiftself_passthrough(i8* swiftself %addr0) { ; CHECK-LABEL: swiftself_tail: ; TAILCALL: b {{_?}}swiftself_param ; TAILCALL-NOT: pop -define i8* @swiftself_tail(i8* swiftself %addr0) { +define i8* @swiftself_tail(i8* swiftself %addr0) "no-frame-pointer-elim"="true" { call void asm sideeffect "", "~{r10}"() %res = tail call i8* @swiftself_param(i8* swiftself %addr0) ret i8* %res @@ -59,7 +59,7 @@ define i8* @swiftself_tail(i8* swiftself %addr0) { ; CHECK: mov r10, r0 ; CHECK: bl {{_?}}swiftself_param ; CHECK: pop -define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind { +define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind "no-frame-pointer-elim"="true" { %res = tail call i8* @swiftself_param(i8* swiftself %addr1) ret i8* %res } diff --git a/test/CodeGen/ARM/v7k-abi-align.ll b/test/CodeGen/ARM/v7k-abi-align.ll index e9b67f22edf..a5cdb8f8982 100644 --- a/test/CodeGen/ARM/v7k-abi-align.ll +++ b/test/CodeGen/ARM/v7k-abi-align.ll @@ -2,25 +2,25 @@ %struct = type { i8, i64, i8, double, i8, <2 x float>, i8, <4 x float> } -define i32 @test_i64_align() { +define i32 @test_i64_align() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_i64_align: ; CHECL: movs r0, #8 ret i32 ptrtoint(i64* getelementptr(%struct, %struct* null, i32 0, i32 1) to i32) } -define i32 @test_f64_align() { +define i32 @test_f64_align() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_f64_align: ; CHECL: movs r0, #24 ret i32 ptrtoint(double* getelementptr(%struct, %struct* null, i32 0, i32 3) to i32) } -define i32 @test_v2f32_align() { +define i32 @test_v2f32_align() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_v2f32_align: ; CHECL: movs r0, #40 ret i32 ptrtoint(<2 x float>* getelementptr(%struct, %struct* null, i32 0, i32 5) to i32) } -define i32 @test_v4f32_align() { +define i32 @test_v4f32_align() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_v4f32_align: ; CHECL: movs r0, #64 ret i32 ptrtoint(<4 x float>* getelementptr(%struct, %struct* null, i32 0, i32 7) to i32) @@ -28,7 +28,7 @@ define i32 @test_v4f32_align() { ; Key point here is than an extra register has to be saved so that the DPRs end ; up in an aligned location (as prologue/epilogue inserter had calculated). -define void @test_dpr_unwind_align() { +define void @test_dpr_unwind_align() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_dpr_unwind_align: ; CHECK: push {r5, r6, r7, lr} ; CHECK-NOT: sub sp @@ -51,7 +51,7 @@ define void @test_dpr_unwind_align() { ; This time, there's no viable way to tack CS-registers onto the list: a real SP ; adjustment needs to be performed to put d8 and d9 where they should be. -define void @test_dpr_unwind_align_manually() { +define void @test_dpr_unwind_align_manually() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_dpr_unwind_align_manually: ; CHECK: push {r4, r5, r6, r7, lr} ; CHECK-NOT: sub sp @@ -76,7 +76,7 @@ define void @test_dpr_unwind_align_manually() { } ; If there's only a CS1 area, the sub should be in the right place: -define void @test_dpr_unwind_align_just_cs1() { +define void @test_dpr_unwind_align_just_cs1() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_dpr_unwind_align_just_cs1: ; CHECK: push {r4, r5, r6, r7, lr} ; CHECK: sub sp, #4 @@ -99,7 +99,7 @@ define void @test_dpr_unwind_align_just_cs1() { } ; If there are no DPRs, we shouldn't try to align the stack in stages anyway -define void @test_dpr_unwind_align_no_dprs() { +define void @test_dpr_unwind_align_no_dprs() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_dpr_unwind_align_no_dprs: ; CHECK: push {r4, r5, r6, r7, lr} ; CHECK: sub sp, #12 @@ -117,7 +117,7 @@ define void @test_dpr_unwind_align_no_dprs() { ; 128-bit vectors should use 128-bit (i.e. correctly aligned) slots on ; the stack. -define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) { +define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_v128_stack_pass: ; CHECK: add r[[ADDR:[0-9]+]], sp, #16 ; CHECK: vld1.64 {d0, d1}, [r[[ADDR]]:128] @@ -129,7 +129,7 @@ declare void @varargs(i32, ...) ; When varargs are enabled, we go down a different route. Still want 128-bit ; alignment though. -define void @test_v128_stack_pass_varargs(<4 x float> %in) { +define void @test_v128_stack_pass_varargs(<4 x float> %in) "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_v128_stack_pass_varargs: ; CHECK: add r[[ADDR:[0-9]+]], sp, #16 ; CHECK: vst1.64 {d0, d1}, [r[[ADDR]]:128] @@ -140,7 +140,7 @@ define void @test_v128_stack_pass_varargs(<4 x float> %in) { ; To be compatible with AAPCS's va_start model (store r0-r3 at incoming SP, give ; a single pointer), 64-bit quantities must be pass -define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) { +define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test_64bit_gpr_align: ; CHECK: ldr [[RHS:r[0-9]+]], [sp] ; CHECK: adds r0, [[RHS]], r2 diff --git a/test/CodeGen/ARM/warn-stack.ll b/test/CodeGen/ARM/warn-stack.ll index 6e819e40432..f07cb64cddf 100644 --- a/test/CodeGen/ARM/warn-stack.ll +++ b/test/CodeGen/ARM/warn-stack.ll @@ -4,7 +4,7 @@ ; ; CHECK-NOT: nowarn -define void @nowarn() nounwind ssp { +define void @nowarn() nounwind ssp "no-frame-pointer-elim"="true" { entry: %buffer = alloca [12 x i8], align 1 %arraydecay = getelementptr inbounds [12 x i8], [12 x i8]* %buffer, i64 0, i64 0 @@ -13,7 +13,7 @@ entry: } ; CHECK: warning: stack size limit exceeded (92) in warn -define void @warn() nounwind ssp { +define void @warn() nounwind ssp "no-frame-pointer-elim"="true" { entry: %buffer = alloca [80 x i8], align 1 %arraydecay = getelementptr inbounds [80 x i8], [80 x i8]* %buffer, i64 0, i64 0 diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll index c5d1044e9d6..66d0f4b8b18 100644 --- a/test/CodeGen/Thumb/large-stack.ll +++ b/test/CodeGen/Thumb/large-stack.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s --check-prefix=CHECK --check-prefix=IOS -; RUN: llc < %s -mtriple=thumb-none-eabi | FileCheck %s --check-prefix=CHECK --check-prefix=EABI +; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=thumb-none-eabi | FileCheck %s ; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-apple-ios -; RUN: llvm-objdump -triple=thumbv6-apple-ios -d %t | FileCheck %s --check-prefix=CHECK --check-prefix=IOS +; RUN: llvm-objdump -triple=thumbv6-apple-ios -d %t | FileCheck %s ; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-none-eabi -; RUN: llvm-objdump -triple=thumbv6-none-eabi -d %t | FileCheck %s --check-prefix=CHECK --check-prefix=EABI +; RUN: llvm-objdump -triple=thumbv6-none-eabi -d %t | FileCheck %s ; Largest stack for which a single tADDspi/tSUBspi is enough define void @test1() { @@ -20,11 +20,21 @@ define void @test100() { ; CHECK: sub sp, #508 ; CHECK: sub sp, #508 ; CHECK: sub sp, #508 -; EABI: add sp, #508 -; EABI: add sp, #508 -; EABI: add sp, #508 -; IOS: subs r4, r7, #4 -; IOS: mov sp, r4 +; CHECK: add sp, #508 +; CHECK: add sp, #508 +; CHECK: add sp, #508 + %tmp = alloca [ 1524 x i8 ] , align 4 + ret void +} + +; Largest stack for which three tADDspi/tSUBspis are enough +define void @test100_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: test100_nofpelim: +; CHECK: sub sp, #508 +; CHECK: sub sp, #508 +; CHECK: sub sp, #508 +; CHECK: subs r4, r7, #4 +; CHECK: mov sp, r4 %tmp = alloca [ 1524 x i8 ] , align 4 ret void } @@ -34,10 +44,19 @@ define void @test2() { ; CHECK-LABEL: test2: ; CHECK: ldr [[TEMP:r[0-7]]], ; CHECK: add sp, [[TEMP]] -; EABI: ldr [[TEMP:r[0-7]]], -; EABI: add sp, [[TEMP]] -; IOS: subs r4, r7, #4 -; IOS: mov sp, r4 +; CHECK: ldr [[TEMP:r[0-7]]], +; CHECK: add sp, [[TEMP]] + %tmp = alloca [ 1528 x i8 ] , align 4 + ret void +} + +; Smallest stack for which we use a constant pool +define void @test2_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: test2_nofpelim: +; CHECK: ldr [[TEMP:r[0-7]]], +; CHECK: add sp, [[TEMP]] +; CHECK: subs r4, r7, #4 +; CHECK: mov sp, r4 %tmp = alloca [ 1528 x i8 ] , align 4 ret void } @@ -48,10 +67,24 @@ define i32 @test3() { ; CHECK: add sp, [[TEMP]] ; CHECK: ldr [[TEMP]], ; CHECK: add [[TEMP]], sp -; EABI: ldr [[TEMP:r[0-7]]], -; EABI: add sp, [[TEMP]] -; IOS: subs r4, r7, #4 -; IOS: mov sp, r4 +; CHECK: ldr [[TEMP:r[0-7]]], +; CHECK: add sp, [[TEMP]] + %retval = alloca i32, align 4 + %tmp = alloca i32, align 4 + %a = alloca [805306369 x i8], align 16 + store i32 0, i32* %tmp + %tmp1 = load i32, i32* %tmp + ret i32 %tmp1 +} + +define i32 @test3_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: test3_nofpelim: +; CHECK: ldr [[TEMP:r[0-7]]], +; CHECK: add sp, [[TEMP]] +; CHECK: ldr [[TEMP]], +; CHECK: add [[TEMP]], sp +; CHECK: subs r4, r7, +; CHECK: mov sp, r4 %retval = alloca i32, align 4 %tmp = alloca i32, align 4 %a = alloca [805306369 x i8], align 16 diff --git a/test/CodeGen/Thumb/push.ll b/test/CodeGen/Thumb/push.ll index 62229c6dade..4f4ffed7bbd 100644 --- a/test/CodeGen/Thumb/push.ll +++ b/test/CodeGen/Thumb/push.ll @@ -3,7 +3,7 @@ define void @t() nounwind { ; CHECK-LABEL: t: -; CHECK: push {r7} +; CHECK: push {r7, lr} entry: call void asm sideeffect alignstack ".long 0xe7ffdefe", ""() nounwind ret void diff --git a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll index e363a343f0b..1b3cb9920ef 100644 --- a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll +++ b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll @@ -3,7 +3,7 @@ @"\01LC" = external constant [36 x i8], align 1 ; <[36 x i8]*> [#uses=1] -define i32 @t(i32, ...) nounwind { +define i32 @t(i32, ...) nounwind "no-frame-pointer-elim"="true" { entry: ; CHECK-LABEL: t: ; CHECK: add r7, sp, #12 diff --git a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll index 3d89390d04c..382f5dc281d 100644 --- a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll +++ b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll @@ -5,7 +5,7 @@ declare void @bar() nounwind optsize -define void @foo() nounwind optsize { +define void @foo() nounwind optsize "no-frame-pointer-elim"="true" { ; CHECK-LABEL: foo: ; CHECK: push ; CHECK: mov r7, sp diff --git a/test/CodeGen/Thumb2/aligned-spill.ll b/test/CodeGen/Thumb2/aligned-spill.ll index 59f546b8e8e..e3db73236c8 100644 --- a/test/CodeGen/Thumb2/aligned-spill.ll +++ b/test/CodeGen/Thumb2/aligned-spill.ll @@ -11,7 +11,7 @@ target triple = "thumbv7-apple-ios" ; CHECK: push {r4, r7, lr} ; CHECK: bfc r4, #0, #3 ; CHECK: mov sp, r4 -define void @f(double* nocapture %p) nounwind ssp { +define void @f(double* nocapture %p) nounwind ssp "no-frame-pointer-elim"="true" { entry: %0 = load double, double* %p, align 4 tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind @@ -45,7 +45,7 @@ entry: declare void @g() ; Spill 7 d-registers. -define void @f7(double* nocapture %p) nounwind ssp { +define void @f7(double* nocapture %p) nounwind ssp "no-frame-pointer-elim"="true" { entry: tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14}"() nounwind ret void @@ -69,7 +69,7 @@ entry: ; NEON: pop ; Spill 7 d-registers, leave a hole. -define void @f3plus4(double* nocapture %p) nounwind ssp { +define void @f3plus4(double* nocapture %p) nounwind ssp "no-frame-pointer-elim"="true" { entry: tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d12},~{d13},~{d14},~{d15}"() nounwind ret void diff --git a/test/CodeGen/Thumb2/frame-pointer.ll b/test/CodeGen/Thumb2/frame-pointer.ll new file mode 100644 index 00000000000..f6e18603b5f --- /dev/null +++ b/test/CodeGen/Thumb2/frame-pointer.ll @@ -0,0 +1,152 @@ +; RUN: llc -mtriple=thumbv7m-none-eabi -o - %s | FileCheck %s + +declare void @foo() + +; Leaf function, no frame so no need for a frame pointer. +define void @leaf() { +; CHECK-LABEL: leaf: +; CHECK-NOT: push +; CHECK-NOT: sp +; CHECK-NOT: pop +; CHECK: bx lr + ret void +} + +; Leaf function, frame pointer is requested but we don't need any stack frame, +; so don't create a frame pointer. +define void @leaf_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: leaf_nofpelim: +; CHECK-NOT: push +; CHECK-NOT: sp +; CHECK-NOT: pop +; CHECK: bx lr + ret void +} + +; Leaf function, frame pointer is requested and we need a stack frame, so we +; need to use a frame pointer. +define void @leaf_lowreg_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: leaf_lowreg_nofpelim: +; CHECK: push {r4, r7, lr} +; CHECK: add r7, sp, #4 +; CHECK: pop {r4, r7, pc} + call void asm sideeffect "", "~{r4}" () + ret void +} + +; Leaf function, frame pointer is requested and we need a stack frame, so we +; need to use a frame pointer. A high register is pushed to the stack, so we +; must use two push/pop instructions to ensure that fp and sp are adjacent on +; the stack. +define void @leaf_highreg_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: leaf_highreg_nofpelim: +; CHECK: push {r7, lr} +; CHECK: mov r7, sp +; CHECK: str r8, [sp, #-4]! +; CHECK: ldr r8, [sp], #4 +; CHECK: pop {r7, pc} + call void asm sideeffect "", "~{r8}" () + ret void +} + +; Leaf function, frame pointer requested for non-leaf functions only, so no +; need for a stack frame. +define void @leaf_nononleaffpelim() "no-frame-pointer-elim-non-leaf" { +; CHECK-LABEL: leaf_nononleaffpelim: +; CHECK-NOT: push +; CHECK-NOT: sp +; CHECK-NOT: pop +; CHECK: bx lr + ret void +} + +; Has a call, but still no need for a frame pointer. +define void @call() { +; CHECK-LABEL: call: +; CHECK: push {[[DUMMYREG:r[0-9]+]], lr} +; CHECK-NOT: sp +; CHECK: bl foo +; CHECK: pop {[[DUMMYREG]], pc} + call void @foo() + ret void +} + +; Has a call, and frame pointer requested. +define void @call_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: call_nofpelim: +; CHECK: push {r7, lr} +; CHECK: mov r7, sp +; CHECK: bl foo +; CHECK: pop {r7, pc} + call void @foo() + ret void +} + +; Has a call, and frame pointer requested for non-leaf function. +define void @call_nononleaffpelim() "no-frame-pointer-elim-non-leaf" { +; CHECK-LABEL: call_nononleaffpelim: +; CHECK: push {r7, lr} +; CHECK: mov r7, sp +; CHECK: bl foo +; CHECK: pop {r7, pc} + call void @foo() + ret void +} + +; Has a high register clobbered, no need for a frame pointer. +define void @highreg() { +; CHECK-LABEL: highreg: +; CHECK: push.w {r8, lr} +; CHECK-NOT: sp +; CHECK: bl foo +; CHECK: pop.w {r8, pc} + call void asm sideeffect "", "~{r8}" () + call void @foo() + ret void +} + +; Has a high register clobbered, frame pointer requested. We need to split the +; push into two, to ensure that r7 and sp are adjacent on the stack. +define void @highreg_nofpelim() "no-frame-pointer-elim"="true" { +; CHECK-LABEL: highreg_nofpelim: +; CHECK: push {[[DUMMYREG:r[0-9]+]], r7, lr} +; CHECK: add r7, sp, #4 +; CHECK: str r8, [sp, #-4]! +; CHECK: bl foo +; CHECK: ldr r8, [sp], #4 +; CHECK: pop {[[DUMMYREG]], r7, pc} + call void asm sideeffect "", "~{r8}" () + call void @foo() + ret void +} + +; Has a high register clobbered, frame required due to variable-sized alloca. +; We need a frame pointer to correctly restore the stack, but don't need to +; split the push/pop here, because the frame pointer not required by the ABI. +define void @highreg_alloca(i32 %a) { +; CHECK-LABEL: highreg_alloca: +; CHECK: push.w {[[SOMEREGS:.*]], r7, r8, lr} +; CHECK: add r7, sp, #{{[0-9]+}} +; CHECK: bl foo +; CHECK: pop.w {[[SOMEREGS]], r7, r8, pc} + %alloca = alloca i32, i32 %a, align 4 + call void @foo() + call void asm sideeffect "", "~{r8}" () + ret void +} + +; Has a high register clobbered, frame required due to both variable-sized +; alloca and ABI. We do need to split the push/pop here. +define void @highreg_alloca_nofpelim(i32 %a) "no-frame-pointer-elim"="true" { +; CHECK-LABEL: highreg_alloca_nofpelim: +; CHECK: push {[[SOMEREGS:.*]], r7, lr} +; CHECK: add r7, sp, #{{[0-9]+}} +; CHECK: str r8, [sp, #-4]! +; CHECK: bl foo +; CHECK: ldr r8, [sp], #4 +; CHECK: pop {[[SOMEREGS]], r7, pc} + %alloca = alloca i32, i32 %a, align 4 + call void @foo() + call void asm sideeffect "", "~{r8}" () + ret void +} diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll index e733d5c9926..abda4bac57d 100644 --- a/test/CodeGen/Thumb2/thumb2-ldm.ll +++ b/test/CodeGen/Thumb2/thumb2-ldm.ll @@ -3,7 +3,7 @@ @X = external global [0 x i32] ; <[0 x i32]*> [#uses=5] -define i32 @t1() { +define i32 @t1() "no-frame-pointer-elim"="true" { ; ALL-LABEL: t1: ; ALL: push {r7, lr} ; CHECK: ldrd @@ -16,7 +16,7 @@ define i32 @t1() { ret i32 %tmp4 } -define i32 @t2() { +define i32 @t2() "no-frame-pointer-elim"="true" { ; ALL-LABEL: t2: ; ALL: push {r7, lr} ; CHECK: ldm @@ -30,7 +30,7 @@ define i32 @t2() { ret i32 %tmp6 } -define i32 @t3() { +define i32 @t3() "no-frame-pointer-elim"="true" { ; ALL-LABEL: t3: ; ALL: push {r7, lr} ; CHECK: ldm @@ -46,7 +46,7 @@ define i32 @t3() { @g = common global i32* null -define void @t4(i32 %a0, i32 %a1, i32 %a2) { +define void @t4(i32 %a0, i32 %a1, i32 %a2) "no-frame-pointer-elim"="true" { ; ALL-LABEL: t4: ; ALL: stm.w sp, {r0, r1, r2} ; ALL: bl _ext diff --git a/test/DebugInfo/ARM/PR16736.ll b/test/DebugInfo/ARM/PR16736.ll index 19e317be69d..9ff9ed529ac 100644 --- a/test/DebugInfo/ARM/PR16736.ll +++ b/test/DebugInfo/ARM/PR16736.ll @@ -13,7 +13,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:64:64- target triple = "thumbv7-apple-ios" ; Function Attrs: nounwind -define arm_aapcscc void @_Z1hiiiif(i32, i32, i32, i32, float %x) #0 !dbg !4 { +define arm_aapcscc void @_Z1hiiiif(i32, i32, i32, i32, float %x) #0 "no-frame-pointer-elim"="true" !dbg !4 { entry: tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !12, metadata !DIExpression()), !dbg !18 tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !13, metadata !DIExpression()), !dbg !18