mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-14 23:48:49 +00:00
Fix ARMv4T (Thumb1) epilogue generation
Summary: Before ARMv5T, Thumb1 code could not pop PC, as described at D14357 and D14986; so we need the special fixup in the epilogue. Reviewers: jroelofs, qcolombet Subscribers: aemerson, llvm-commits, rengolin Differential Revision: http://reviews.llvm.org/D15126 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255047 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
917e9a38ca
commit
d10549743a
@ -406,9 +406,6 @@ bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const {
|
||||
if (AFI->getArgRegsSaveSize())
|
||||
return true;
|
||||
|
||||
// FIXME: this doesn't make sense, and the following patch will remove it.
|
||||
if (!STI.hasV4TOps()) return false;
|
||||
|
||||
// LR cannot be encoded with Thumb1, i.e., it requires a special fix-up.
|
||||
for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo())
|
||||
if (CSI.getReg() == ARM::LR)
|
||||
@ -532,10 +529,32 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
|
||||
.addReg(PopReg, RegState::Kill));
|
||||
}
|
||||
|
||||
bool AddBx = false;
|
||||
if (MBBI == MBB.end()) {
|
||||
MachineInstr& Pop = MBB.back();
|
||||
assert(Pop.getOpcode() == ARM::tPOP);
|
||||
Pop.RemoveOperand(Pop.findRegisterDefOperandIdx(ARM::LR));
|
||||
} else if (MBBI->getOpcode() == ARM::tPOP_RET) {
|
||||
// We couldn't use the direct restoration above, so
|
||||
// perform the opposite conversion: tPOP_RET to tPOP.
|
||||
MachineInstrBuilder MIB =
|
||||
AddDefaultPred(
|
||||
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)));
|
||||
unsigned Popped = 0;
|
||||
for (auto MO: MBBI->operands())
|
||||
if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
|
||||
MO.getReg() != ARM::PC) {
|
||||
MIB.addOperand(MO);
|
||||
if (!MO.isImplicit())
|
||||
Popped++;
|
||||
}
|
||||
// Is there anything left to pop?
|
||||
if (!Popped)
|
||||
MBB.erase(MIB.getInstr());
|
||||
// Erase the old instruction.
|
||||
MBB.erase(MBBI);
|
||||
MBBI = MBB.end();
|
||||
AddBx = true;
|
||||
}
|
||||
|
||||
assert(PopReg && "Do not know how to get LR");
|
||||
@ -554,14 +573,20 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
|
||||
return true;
|
||||
}
|
||||
|
||||
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
|
||||
.addReg(ARM::LR, RegState::Define)
|
||||
.addReg(PopReg, RegState::Kill));
|
||||
|
||||
if (AddBx && !TemporaryReg) {
|
||||
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
|
||||
.addReg(PopReg, RegState::Kill));
|
||||
} else {
|
||||
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
|
||||
.addReg(ARM::LR, RegState::Define)
|
||||
.addReg(PopReg, RegState::Kill));
|
||||
}
|
||||
if (TemporaryReg) {
|
||||
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
|
||||
.addReg(PopReg, RegState::Define)
|
||||
.addReg(TemporaryReg, RegState::Kill));
|
||||
if (AddBx)
|
||||
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET)));
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -628,7 +653,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
|
||||
if (isVarArg)
|
||||
continue;
|
||||
// ARMv4T requires BX, see emitEpilogue
|
||||
if (STI.hasV4TOps() && !STI.hasV5TOps())
|
||||
if (!STI.hasV5TOps())
|
||||
continue;
|
||||
Reg = ARM::PC;
|
||||
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
|
||||
|
@ -30,11 +30,11 @@
|
||||
; RUN: -filetype=asm -o - %s \
|
||||
; RUN: | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM
|
||||
|
||||
; RUN: llc -mtriple thumb-unknown-linux-gnueabi \
|
||||
; RUN: llc -mtriple thumbv5-unknown-linux-gnueabi \
|
||||
; RUN: -disable-fp-elim -filetype=asm -o - %s \
|
||||
; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-FP
|
||||
|
||||
; RUN: llc -mtriple thumb-unknown-linux-gnueabi \
|
||||
; RUN: llc -mtriple thumbv5-unknown-linux-gnueabi \
|
||||
; RUN: -filetype=asm -o - %s \
|
||||
; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-FP-ELIM
|
||||
|
||||
|
@ -32,10 +32,10 @@ define void @test100() {
|
||||
; Smallest stack for which we use a constant pool
|
||||
define void @test2() {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: ldr r0,
|
||||
; CHECK: add sp, r0
|
||||
; EABI: ldr r0,
|
||||
; EABI: add sp, r0
|
||||
; CHECK: ldr [[TEMP:r[0-7]]],
|
||||
; CHECK: add sp, [[TEMP]]
|
||||
; EABI: ldr [[TEMP:r[0-7]]],
|
||||
; EABI: add sp, [[TEMP]]
|
||||
; IOS: subs r4, r7, #4
|
||||
; IOS: mov sp, r4
|
||||
%tmp = alloca [ 1528 x i8 ] , align 4
|
||||
@ -44,12 +44,12 @@ define void @test2() {
|
||||
|
||||
define i32 @test3() {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: ldr r1,
|
||||
; CHECK: add sp, r1
|
||||
; CHECK: ldr r1,
|
||||
; CHECK: add r1, sp
|
||||
; EABI: ldr r1,
|
||||
; EABI: add sp, r1
|
||||
; CHECK: ldr [[TEMP:r[0-7]]],
|
||||
; CHECK: add sp, [[TEMP]]
|
||||
; CHECK: ldr [[TEMP]],
|
||||
; CHECK: add [[TEMP]], sp
|
||||
; EABI: ldr [[TEMP:r[0-7]]],
|
||||
; EABI: add sp, [[TEMP]]
|
||||
; IOS: subs r4, r7, #4
|
||||
; IOS: mov sp, r4
|
||||
%retval = alloca i32, align 4
|
||||
|
@ -1,60 +0,0 @@
|
||||
; RUN: llc %s -enable-shrink-wrap=true -o - | FileCheck %s
|
||||
|
||||
target triple = "thumbv6m-none-none-eabi"
|
||||
|
||||
@retval = global i32 0, align 4
|
||||
|
||||
define i32 @test(i32 %i, i32 %argc, i8** nocapture readonly %argv) {
|
||||
%1 = icmp sgt i32 %argc, %i
|
||||
br i1 %1, label %2, label %19
|
||||
|
||||
%3 = getelementptr inbounds i8*, i8** %argv, i32 %i
|
||||
%4 = load i8*, i8** %3, align 4
|
||||
%5 = load i8, i8* %4, align 1
|
||||
%6 = icmp eq i8 %5, 45
|
||||
%7 = getelementptr inbounds i8, i8* %4, i32 1
|
||||
%. = select i1 %6, i8* %7, i8* %4
|
||||
%.1 = select i1 %6, i32 -1, i32 1
|
||||
%8 = load i8, i8* %., align 1
|
||||
%.off2 = add i8 %8, -48
|
||||
%9 = icmp ult i8 %.off2, 10
|
||||
%.pre = load i32, i32* @retval, align 4
|
||||
br i1 %9, label %.lr.ph.preheader, label %.critedge
|
||||
|
||||
.lr.ph.preheader: ; preds = %2
|
||||
br label %.lr.ph
|
||||
|
||||
.lr.ph: ; preds = %.lr.ph.preheader, %.lr.ph
|
||||
%10 = phi i32 [ %14, %.lr.ph ], [ %.pre, %.lr.ph.preheader ]
|
||||
%11 = phi i8 [ %15, %.lr.ph ], [ %8, %.lr.ph.preheader ]
|
||||
%valstring.03 = phi i8* [ %13, %.lr.ph ], [ %., %.lr.ph.preheader ]
|
||||
%12 = zext i8 %11 to i32
|
||||
%13 = getelementptr inbounds i8, i8* %valstring.03, i32 1
|
||||
%14 = add nsw i32 %10, %12
|
||||
store i32 %14, i32* @retval, align 4
|
||||
%15 = load i8, i8* %13, align 1
|
||||
%.off = add i8 %15, -48
|
||||
%16 = icmp ult i8 %.off, 10
|
||||
br i1 %16, label %.lr.ph, label %.critedge.loopexit
|
||||
|
||||
.critedge.loopexit: ; preds = %.lr.ph
|
||||
%.lcssa = phi i32 [ %14, %.lr.ph ]
|
||||
br label %.critedge
|
||||
|
||||
.critedge: ; preds = %.critedge.loopexit, %2
|
||||
%17 = phi i32 [ %.pre, %2 ], [ %.lcssa, %.critedge.loopexit ]
|
||||
%18 = mul nsw i32 %17, %.1
|
||||
store i32 %18, i32* @retval, align 4
|
||||
br label %19
|
||||
|
||||
; <label>:19 ; preds = %.critedge, %0
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; CHECK: push {r4, r5, r7, lr}
|
||||
; CHECK: pop {r4, r5, r7}
|
||||
; CHECK: pop {r0}
|
||||
; CHECK: mov lr, r0
|
||||
; CHECK: movs r0, #0
|
||||
; CHECK: bx lr
|
||||
|
@ -1,7 +1,11 @@
|
||||
; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \
|
||||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
|
||||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V4T
|
||||
; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv5-macho \
|
||||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V5T
|
||||
; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \
|
||||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
|
||||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V4T
|
||||
; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv5-macho \
|
||||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V5T
|
||||
;
|
||||
; Note: Lots of tests use inline asm instead of regular calls.
|
||||
; This allows to have a better control on what the allocation will do.
|
||||
@ -39,14 +43,20 @@
|
||||
;
|
||||
; With shrink-wrapping, epilogue is just after the call.
|
||||
; ENABLE-NEXT: add sp, #8
|
||||
; ENABLE-NEXT: pop {r7, lr}
|
||||
; ENABLE-V5T-NEXT: pop {r7, pc}
|
||||
; ENABLE-V4T-NEXT: pop {r7}
|
||||
; ENABLE-V4T-NEXT: pop {r1}
|
||||
; ENABLE-V4T-NEXT: mov lr, r1
|
||||
;
|
||||
; CHECK: [[EXIT_LABEL]]:
|
||||
;
|
||||
; Without shrink-wrapping, epilogue is in the exit block.
|
||||
; Epilogue code. (What we pop does not matter.)
|
||||
; DISABLE: add sp, #8
|
||||
; DISABLE-NEXT: pop {r7, pc}
|
||||
; DISABLE-V5T-NEXT: pop {r7, pc}
|
||||
; DISABLE-V4T-NEXT: pop {r7}
|
||||
; DISABLE-V4T-NEXT: pop {r1}
|
||||
; DISABLE-V4T-NEXT: bx r1
|
||||
;
|
||||
; ENABLE-NEXT: bx lr
|
||||
define i32 @foo(i32 %a, i32 %b) {
|
||||
@ -64,6 +74,42 @@ false:
|
||||
ret i32 %tmp.0
|
||||
}
|
||||
|
||||
|
||||
; Same, but the final BB is non-trivial, so we don't duplicate the return inst.
|
||||
; CHECK-LABEL: bar:
|
||||
;
|
||||
; With shrink-wrapping, epilogue is just after the call.
|
||||
; CHECK: bl
|
||||
; ENABLE-NEXT: add sp, #8
|
||||
; ENABLE-NEXT: pop {r7}
|
||||
; ENABLE-NEXT: pop {r0}
|
||||
; ENABLE-NEXT: mov lr, r0
|
||||
;
|
||||
; CHECK: movs r0, #42
|
||||
;
|
||||
; Without shrink-wrapping, epilogue is in the exit block.
|
||||
; Epilogue code. (What we pop does not matter.)
|
||||
; DISABLE: add sp, #8
|
||||
; DISABLE-V5T-NEXT: pop {r7, pc}
|
||||
; DISABLE-V4T-NEXT: pop {r7}
|
||||
; DISABLE-V4T-NEXT: pop {r1}
|
||||
; DISABLE-V4T-NEXT: bx r1
|
||||
;
|
||||
; ENABLE-NEXT: bx lr
|
||||
define i32 @bar(i32 %a, i32 %b) {
|
||||
%tmp = alloca i32, align 4
|
||||
%tmp2 = icmp slt i32 %a, %b
|
||||
br i1 %tmp2, label %true, label %false
|
||||
|
||||
true:
|
||||
store i32 %a, i32* %tmp, align 4
|
||||
%tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
|
||||
br label %false
|
||||
|
||||
false:
|
||||
ret i32 42
|
||||
}
|
||||
|
||||
; Function Attrs: optsize
|
||||
declare i32 @doSomething(i32, i32*)
|
||||
|
||||
@ -101,12 +147,17 @@ declare i32 @doSomething(i32, i32*)
|
||||
; CHECK: lsls [[SUM]], [[SUM]], #3
|
||||
;
|
||||
; Duplicated epilogue.
|
||||
; DISABLE: pop {r4, pc}
|
||||
; DISABLE-V5T: pop {r4, pc}
|
||||
; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; CHECK: [[ELSE_LABEL]]: @ %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
; CHECK: lsls r0, r1, #1
|
||||
; DISABLE-NEXT: pop {r4, pc}
|
||||
; DISABLE-V5T-NEXT: pop {r4, pc}
|
||||
; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
|
||||
; DISABLE-V4T-NEXT: pop {r4}
|
||||
; DISABLE-V4T-NEXT: pop {r1}
|
||||
; DISABLE-V4T-NEXT: bx r1
|
||||
;
|
||||
; ENABLE-NEXT: bx lr
|
||||
define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
|
||||
@ -222,12 +273,17 @@ for.end: ; preds = %for.body
|
||||
; ENABLE-NEXT: pop {r4, lr}
|
||||
;
|
||||
; Duplicated epilogue.
|
||||
; DISABLE: pop {r4, pc}
|
||||
; DISABLE-V5T: pop {r4, pc}
|
||||
; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; CHECK: [[ELSE_LABEL]]: @ %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
; CHECK: lsls r0, r1, #1
|
||||
; DISABLE-NEXT: pop {r4, pc}
|
||||
; DISABLE-V5T-NEXT: pop {r4, pc}
|
||||
; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
|
||||
; DISABLE-V4T-NEXT: pop {r4}
|
||||
; DISABLE-V4T-NEXT: pop {r1}
|
||||
; DISABLE-V4T-NEXT: bx r1
|
||||
;
|
||||
; ENABLE-NEXT: bx lr
|
||||
define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
|
||||
@ -297,12 +353,17 @@ declare void @somethingElse(...)
|
||||
; ENABLE: pop {r4, lr}
|
||||
;
|
||||
; Duplicated epilogue.
|
||||
; DISABLE: pop {r4, pc}
|
||||
; DISABLE-V5T: pop {r4, pc}
|
||||
; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; CHECK: [[ELSE_LABEL]]: @ %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
; CHECK: lsls r0, r1, #1
|
||||
; DISABLE-NEXT: pop {r4, pc}
|
||||
; DISABLE-V5T-NEXT: pop {r4, pc}
|
||||
; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
|
||||
; DISABLE-V4T-NEXT: pop {r4}
|
||||
; DISABLE-V4T-NEXT: pop {r1}
|
||||
; DISABLE-V4T-NEXT: bx r1
|
||||
;
|
||||
; ENABLE-NEXT: bx lr
|
||||
define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
|
||||
@ -373,12 +434,17 @@ entry:
|
||||
; ENABLE-NEXT: pop {r4, lr}
|
||||
;
|
||||
; Duplicated epilogue.
|
||||
; DISABLE-NEXT: pop {r4, pc}
|
||||
; DISABLE-V5T-NEXT: pop {r4, pc}
|
||||
; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; CHECK: [[ELSE_LABEL]]: @ %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
; CHECK: lsls r0, r1, #1
|
||||
; DISABLE-NEXT: pop {r4, pc}
|
||||
; DISABLE-V5T-NEXT: pop {r4, pc}
|
||||
; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
|
||||
; DISABLE-V4T-NEXT: pop {r4}
|
||||
; DISABLE-V4T-NEXT: pop {r1}
|
||||
; DISABLE-V4T-NEXT: bx r1
|
||||
;
|
||||
; ENABLE-NEXT: bx lr
|
||||
define i32 @inlineAsm(i32 %cond, i32 %N) {
|
||||
@ -438,12 +504,14 @@ if.end: ; preds = %for.body, %if.else
|
||||
; CHECK-NEXT: pop {r3}
|
||||
; CHECK-NEXT: bl
|
||||
; CHECK-NEXT: lsls r0, r0, #3
|
||||
; CHECK-NEXT: add sp, #16
|
||||
;
|
||||
; ENABLE-NEXT: add sp, #16
|
||||
; ENABLE-NEXT: pop {[[TMP]], lr}
|
||||
;
|
||||
; Duplicated epilogue.
|
||||
; DISABLE-NEXT: pop {[[TMP]], pc}
|
||||
; DISABLE-V5T-NEXT: add sp, #16
|
||||
; DISABLE-V5T-NEXT: pop {[[TMP]], pc}
|
||||
; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; CHECK: [[ELSE_LABEL]]: @ %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
@ -452,8 +520,12 @@ if.end: ; preds = %for.body, %if.else
|
||||
; Epilogue code.
|
||||
; ENABLE-NEXT: bx lr
|
||||
;
|
||||
; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
|
||||
; DISABLE-NEXT: add sp, #16
|
||||
; DISABLE-NEXT: pop {[[TMP]], pc}
|
||||
; DISABLE-V5T-NEXT: pop {[[TMP]], pc}
|
||||
; DISABLE-V4T-NEXT: pop {[[TMP]]}
|
||||
; DISABLE-V4T-NEXT: pop {r1}
|
||||
; DISABLE-V4T-NEXT: bx r1
|
||||
define i32 @callVariadicFunc(i32 %cond, i32 %N) {
|
||||
entry:
|
||||
%tobool = icmp eq i32 %cond, 0
|
||||
|
Loading…
Reference in New Issue
Block a user