[mips] Restrict tail call optimization

The tail call optimization was being used without proper consideration of
ABI requirements for saving and restoring the GP. This patch restricts tail
call optimization to functions within the same translation unit.

Reviewers: vkalintiris

Differential Revision: https://reviews.llvm.org/D24763

llvm-svn: 287505
This commit is contained in:
Simon Dardis 2016-11-20 21:23:08 +00:00
parent a97617aadc
commit 29c3915faa
8 changed files with 151 additions and 166 deletions

View File

@ -2650,11 +2650,20 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NextStackOffset = CCInfo.getNextStackOffset();
// Check if it's really possible to do a tail call.
if (IsTailCall)
// Check if it's really possible to do a tail call. Restrict it to functions
// that are part of this compilation unit.
bool InternalLinkage = false;
if (IsTailCall) {
IsTailCall = isEligibleForTailCallOptimization(
CCInfo, NextStackOffset, *MF.getInfo<MipsFunctionInfo>());
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
InternalLinkage = G->getGlobal()->hasInternalLinkage();
IsTailCall &= (InternalLinkage || G->getGlobal()->hasLocalLinkage() ||
G->getGlobal()->hasPrivateLinkage() ||
G->getGlobal()->hasHiddenVisibility() ||
G->getGlobal()->hasProtectedVisibility());
}
}
if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
@ -2789,9 +2798,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// node so that legalize doesn't hack it.
bool IsPICCall = (ABI.IsN64() || IsPIC); // true if calls are translated to
// jalr $25
bool GlobalOrExternal = false, InternalLinkage = false, IsCallReloc = false;
SDValue CalleeLo;
EVT Ty = Callee.getValueType();
bool GlobalOrExternal = false, IsCallReloc = false;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
if (IsPICCall) {

View File

@ -290,7 +290,7 @@ entry:
; NOODDSPREG-DAG: lwc1 $f18, 36($[[R0]])
; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], 40($[[R0]])
; NOODDSPREG-DAG: swc1 $[[F0]], 8($sp)
; NOODDSPREG-DAG: swc1 $[[F0]], 0($sp)
%0 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 0), align 4
%1 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 1), align 4

View File

@ -2,8 +2,8 @@
define void @f1(i64 %ll1, float %f, i64 %ll, i32 %i, float %f2) nounwind {
entry:
; CHECK-DAG: lw $[[R2:[0-9]+]], 64($sp)
; CHECK-DAG: lw $[[R3:[0-9]+]], 68($sp)
; CHECK-DAG: lw $[[R2:[0-9]+]], 80($sp)
; CHECK-DAG: lw $[[R3:[0-9]+]], 84($sp)
; CHECK-DAG: move $[[R1:[0-9]+]], $5
; CHECK-DAG: move $[[R0:[0-9]+]], $4
; CHECK-DAG: ori $6, ${{[0-9]+}}, 3855
@ -18,11 +18,11 @@ entry:
tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind
%sub = add nsw i32 %i, -1
; CHECK-DAG: lw $25, %call16(ff3)
; CHECK-DAG: sw $[[R1]], 76($sp)
; CHECK-DAG: sw $[[R0]], 72($sp)
; CHECK-DAG: sw $[[R1]], 28($sp)
; CHECK-DAG: sw $[[R0]], 24($sp)
; CHECK-DAG: move $6, $[[R2]]
; CHECK-DAG: move $7, $[[R3]]
; CHECK: jr $25
; CHECK: jalr $25
tail call void @ff3(i32 %i, i64 %ll, i32 %sub, i64 %ll1) nounwind
ret void
}

View File

@ -30,7 +30,7 @@ declare void @foo2()
; CHECK: lw $25, %call16(foo2)(${{[0-9]+}})
; CHECK: jalr $25
; CHECK: lw $25, %call16(foo2)(${{[0-9]+}})
; CHECK: jr $25
; CHECK: jalr $25
define void @foo1() {
entry:

View File

@ -80,48 +80,6 @@ define float @call_float_void() {
ret float %2
}
define void @musttail_call_void_void() {
; ALL-LABEL: musttail_call_void_void:
; O32: lw $[[TGT:[0-9]+]], %call16(extern_void_void)($gp)
; N64: ld $[[TGT:[0-9]+]], %call16(extern_void_void)($gp)
; NOT-R6C: jr $[[TGT]]
; R6C: jrc $[[TGT]]
musttail call void @extern_void_void()
ret void
}
define i32 @musttail_call_i32_void() {
; ALL-LABEL: musttail_call_i32_void:
; O32: lw $[[TGT:[0-9]+]], %call16(extern_i32_void)($gp)
; N64: ld $[[TGT:[0-9]+]], %call16(extern_i32_void)($gp)
; NOT-R6C: jr $[[TGT]]
; R6C: jrc $[[TGT]]
%1 = musttail call i32 @extern_i32_void()
ret i32 %1
}
define float @musttail_call_float_void() {
; ALL-LABEL: musttail_call_float_void:
; O32: lw $[[TGT:[0-9]+]], %call16(extern_float_void)($gp)
; N64: ld $[[TGT:[0-9]+]], %call16(extern_float_void)($gp)
; NOT-R6C: jr $[[TGT]]
; R6C: jrc $[[TGT]]
%1 = musttail call float @extern_float_void()
ret float %1
}
define i32 @indirect_call_void_void(void ()* %addr) {
; ALL-LABEL: indirect_call_void_void:

View File

@ -18,7 +18,14 @@
; O32 case: The last two arguments should appear at 16(sp), 20(sp). The order
; of the loads doesn't matter, but they have to become before the
; stores
declare i32 @func2(i32, i32, i32, i32, i32, i32)
define internal i32 @func2(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
%1 = add i32 %a, %b
%2 = add i32 %1, %c
%3 = add i32 %2, %d
%4 = add i32 %3, %e
%5 = add i32 %4, %f
ret i32 %5
}
define i32 @func1(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f){
; MIPS32-LABEL: func1:
@ -27,7 +34,7 @@ define i32 @func1(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f){
; MIPS32-NEXT: lw ${{[0-9]+}}, {{[0-9]+}}($sp)
; MIPS32-NEXT: sw ${{[0-9]+}}, {{[0-9]+}}($sp)
; MIPS32-NEXT: sw ${{[0-9]+}}, {{[0-9]+}}($sp)
%retval = tail call i32 @func1(i32 %a, i32 %f, i32 %c, i32 %d, i32 %e, i32 %b)
%retval = tail call i32 @func2(i32 %a, i32 %f, i32 %c, i32 %d, i32 %e, i32 %b)
ret i32 %retval
}
@ -36,8 +43,19 @@ define i32 @func1(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f){
; of the loads doesn't matter, but they have to become before the
; stores
declare i64 @func4(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64)
define internal i64 @func4(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e,
i64 %f, i64 %g, i64 %h, i64 %i, i64 %j) {
%1 = add i64 %a, %b
%2 = add i64 %1, %c
%3 = add i64 %2, %d
%4 = add i64 %3, %e
%5 = add i64 %4, %f
%6 = add i64 %1, %g
%7 = add i64 %2, %h
%8 = add i64 %3, %i
%9 = add i64 %4, %j
ret i64 %5
}
define i64 @func3(i64 %a, i64 %b, i64 %c, i64 %d,
i64 %e, i64 %f, i64 %g, i64 %h,
i64 %i, i64 %j){
@ -51,5 +69,3 @@ define i64 @func3(i64 %a, i64 %b, i64 %c, i64 %d,
ret i64 %retval
}

View File

@ -21,23 +21,26 @@
; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64r6 -mips-tail-calls=1 < %s -o - \
; RUN: | llvm-objdump -d - | FileCheck %s -check-prefix=N64R6
declare i8 @f2(i8)
define internal i8 @f2(i8) {
ret i8 4
}
define i8 @f1(i8 signext %i) nounwind {
%a = tail call i8 @f2(i8 %i)
ret i8 %a
}
; PIC32: {{[0-9]}}: 08 00 20 03 jr $25
; STATIC32: {{[0-9]}}: 00 00 00 08 j 0
; ALL: f1:
; PIC32: {{[0-9a-z]}}: 08 00 20 03 jr $25
; STATIC32: {{[0-9a-z]}}: 00 00 00 08 j 0
; N64: {{[0-9a-z]+}}: 08 00 20 03 jr $25
; PIC32MM: {{[0-9a-z]+}}: b9 45 jrc $25
; STATIC32MM: {{[0-9]}}: 00 d4 00 00 j 0
; STATIC32MM: {{[0-9a-z]}}: 00 d4 00 00 j 0
; PIC32R6: {{[0-9]}}: 00 00 19 d8 jrc $25
; STATIC32R6: {{[0-9]}}: 00 00 00 08 j 0
; PIC32R6: {{[0-9a-z]}}: 00 00 19 d8 jrc $25
; STATIC32R6: {{[0-9a-z]}}: 00 00 00 08 j 0
; N64R6: {{[0-9a-z]+}}: 09 00 20 03 jr $25
; N64R6: {{[0-9a-z]+}}: 00 00 19 d8 jrc $25

View File

@ -1,31 +1,31 @@
; RUN: llc -march=mipsel -relocation-model=pic \
; RUN: -verify-machineinstrs -mips-tail-calls=1 < %s | FileCheck %s -check-prefix=PIC32
; RUN: -verify-machineinstrs -mips-tail-calls=1 < %s | FileCheck %s -check-prefixes=ALL,PIC32
; RUN: llc -march=mipsel -relocation-model=static \
; RUN: -verify-machineinstrs -mips-tail-calls=1 < %s | FileCheck %s -check-prefix=STATIC32
; RUN: -verify-machineinstrs -mips-tail-calls=1 < %s | FileCheck %s -check-prefixes=ALL,STATIC32
; RUN: llc -march=mips64el -mcpu=mips64r2 \
; RUN: -verify-machineinstrs -mips-tail-calls=1 < %s | FileCheck %s -check-prefix=N64
; RUN: -verify-machineinstrs -mips-tail-calls=1 < %s | FileCheck %s -check-prefixes=ALL,N64
; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic \
; RUN: -verify-machineinstrs -mips-tail-calls=1 < %s | \
; RUN: FileCheck %s -check-prefix=PIC16
; RUN: FileCheck %s -check-prefixes=ALL,PIC16
; RUN: llc -march=mipsel -relocation-model=pic -mattr=+micromips -mips-tail-calls=1 < %s | \
; RUN: FileCheck %s -check-prefix=PIC32
; RUN: FileCheck %s -check-prefixes=ALL,PIC32MM
; RUN: llc -march=mipsel -relocation-model=static -mattr=+micromips \
; RUN: -mips-tail-calls=1 < %s | FileCheck %s -check-prefix=STATIC32
; RUN: -mips-tail-calls=1 < %s | FileCheck %s -check-prefixes=ALL,STATIC32
; RUN: llc -march=mipsel -relocation-model=pic -mcpu=mips32r6 -mips-tail-calls=1 < %s | \
; RUN: FileCheck %s -check-prefix=PIC32
; RUN: FileCheck %s -check-prefixes=ALL,PIC32R6
; RUN: llc -march=mipsel -relocation-model=static -mcpu=mips32r6 \
; RUN: -mips-tail-calls=1 < %s | FileCheck %s -check-prefix=STATIC32
; RUN: -mips-tail-calls=1 < %s | FileCheck %s -check-prefixes=ALL,STATIC32
; RUN: llc -march=mips64el -mcpu=mips64r6 \
; RUN: -mips-tail-calls=1 < %s | FileCheck %s -check-prefix=N64
; RUN: -mips-tail-calls=1 < %s | FileCheck %s -check-prefixes=ALL,N64R6
; RUN: llc -march=mipsel -relocation-model=pic -mcpu=mips32r6 -mattr=+micromips \
; RUN: -mips-tail-calls=1 < %s | FileCheck %s -check-prefix=PIC32
; RUN: -mips-tail-calls=1 < %s | FileCheck %s -check-prefixes=ALL,PIC32MM
; RUN: llc -march=mipsel -relocation-model=static -mcpu=mips32r6 \
; RUN: -mattr=+micromips -mips-tail-calls=1 < %s | FileCheck %s -check-prefix=STATIC32
; RUN: -mattr=+micromips -mips-tail-calls=1 < %s | FileCheck %s -check-prefixes=ALL,STATIC32
; RUN: llc -march=mips64el -mcpu=mips64r6 -mattr=+micromips -mips-tail-calls=1 < %s \
; RUN: | FileCheck %s -check-prefix=N64
; RUN: | FileCheck %s -check-prefixes=ALL,N64
@g0 = common global i32 0, align 4
@g1 = common global i32 0, align 4
@ -40,9 +40,13 @@
define i32 @caller1(i32 %a0) nounwind {
entry:
; PIC32-NOT: jalr
; STATIC32-NOT: jal
; N64-NOT: jalr
; ALL-LABEL: caller1:
; PIC32: jalr $25
; PIC32MM: jalr $25
; PIC32R6: jalr $25
; STATIC32: jal
; N64: jalr $25
; N64R6: jalr $25
; PIC16: jalrc
%call = tail call i32 @callee1(i32 1, i32 1, i32 1, i32 %a0) nounwind
@ -53,9 +57,13 @@ declare i32 @callee1(i32, i32, i32, i32)
define i32 @caller2(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
entry:
; PIC32: jalr
; ALL-LABEL: caller2
; PIC32: jalr $25
; PIC32MM: jalr $25
; PIC32R6: jalr $25
; STATIC32: jal
; N64-NOT: jalr
; N64: jalr $25
; N64R6: jalr $25
; PIC16: jalrc
%call = tail call i32 @callee2(i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind
@ -66,9 +74,13 @@ declare i32 @callee2(i32, i32, i32, i32, i32)
define i32 @caller3(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) nounwind {
entry:
; PIC32: jalr
; ALL-LABEL: caller3:
; PIC32: jalr $25
; PIC32R6: jalr $25
; PIC32MM: jalr $25
; STATIC32: jal
; N64-NOT: jalr
; N64: jalr $25
; N64R6: jalr $25
; PIC16: jalrc
%call = tail call i32 @callee3(i32 1, i32 1, i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) nounwind
@ -79,9 +91,13 @@ declare i32 @callee3(i32, i32, i32, i32, i32, i32, i32, i32)
define i32 @caller4(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind {
entry:
; PIC32: jalr
; ALL-LABEL: caller4:
; PIC32: jalr $25
; PIC32R6: jalr $25
; PIC32MM: jalr $25
; STATIC32: jal
; N64: jalr
; N64: jalr $25
; N64R6: jalr $25
; PIC16: jalrc
%call = tail call i32 @callee4(i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind
@ -92,18 +108,14 @@ declare i32 @callee4(i32, i32, i32, i32, i32, i32, i32, i32, i32)
define i32 @caller5() nounwind readonly {
entry:
; PIC32: .ent caller5
; PIC32-NOT: jalr $25
; PIC32: .end caller5
; STATIC32: .ent caller5
; STATIC32-NOT: jal
; STATIC32: .end caller5
; N64: .ent caller5
; N64-NOT: jalr $25
; N64: .end caller5
; PIC16: .ent caller5
; ALL-LABEL: caller5:
; PIC32: jr $25
; PIC32R6: jr $25
; PIC32MM: jr
; STATIC32: j
; N64: jr $25
; N64R6: jr $25
; PIC16: jalrc
; PIC16: .end caller5
%0 = load i32, i32* @g0, align 4
%1 = load i32, i32* @g1, align 4
@ -137,18 +149,14 @@ declare i32 @callee8(i32, ...)
define i32 @caller8_0() nounwind {
entry:
; PIC32: .ent caller8_0
; PIC32: jr
; PIC32: .end caller8_0
; STATIC32: .ent caller8_0
; ALL-LABEL: caller8_0:
; PIC32: jr $25
; PIC32R6: jrc $25
; PIC32MM: jrc
; STATIC32: j
; STATIC32: .end caller8_0
; N64: .ent caller8_0
; N64-NOT: jalr $25
; N64: .end caller8_0
; PIC16: .ent caller8_0
; N64: jr $25
; N64R6: jrc $25
; PIC16: jalrc
; PIC16: .end caller8_0
%call = tail call fastcc i32 @caller8_1()
ret i32 %call
@ -156,18 +164,14 @@ entry:
define internal fastcc i32 @caller8_1() nounwind noinline {
entry:
; PIC32: .ent caller8_1
; PIC32: jalr
; PIC32: .end caller8_1
; STATIC32: .ent caller8_1
; ALL-LABEL: caller8_1:
; PIC32: jalr $25
; PIC32R6: jalr $25
; PIC32MM: jalr $25
; STATIC32: jal
; STATIC32: .end caller8_1
; N64: .ent caller8_1
; N64-NOT: jalr $25
; N64: .end caller8_1
; PIC16: .ent caller8_1
; N64: jalr $25
; N64R6: jalr $25
; PIC16: jalrc
; PIC16: .end caller8_1
%call = tail call i32 (i32, ...) @callee8(i32 2, i32 1) nounwind
ret i32 %call
@ -181,36 +185,28 @@ declare i32 @callee9(%struct.S* byval)
define i32 @caller9_0() nounwind {
entry:
; PIC32: .ent caller9_0
; PIC32: jr
; PIC32: .end caller9_0
; STATIC32: .ent caller9_0
; ALL-LABEL: caller9_0:
; PIC32: jr $25
; PIC32R6: jrc $25
; PIC32MM: jrc
; STATIC32: j
; STATIC32: .end caller9_0
; N64: .ent caller9_0
; N64-NOT: jalr $25
; N64: .end caller9_0
; PIC16: .ent caller9_0
; N64: jr $25
; N64R6: jrc $25
; PIC16: jalrc
; PIC16: .end caller9_0
%call = tail call fastcc i32 @caller9_1()
ret i32 %call
}
define internal fastcc i32 @caller9_1() nounwind noinline {
entry:
; PIC32: .ent caller9_1
; PIC32: jalr
; PIC32: .end caller9_1
; STATIC32: .ent caller9_1
; ALL-LABEL: caller9_1:
; PIC32: jalr $25
; PIC32R6: jalrc $25
; PIC32MM: jalr $25
; STATIC32: jal
; STATIC32: .end caller9_1
; N64: .ent caller9_1
; N64: jalr
; N64: .end caller9_1
; PIC16: .ent caller9_1
; N64: jalr $25
; N64R6: jalrc $25
; PIC16: jalrc
; PIC16: .end caller9_1
%call = tail call i32 @callee9(%struct.S* byval @gs1) nounwind
ret i32 %call
@ -220,13 +216,13 @@ declare i32 @callee10(i32, i32, i32, i32, i32, i32, i32, i32, i32)
define i32 @caller10(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) nounwind {
entry:
; PIC32: .ent caller10
; PIC32-NOT: jalr $25
; STATIC32: .ent caller10
; STATIC32-NOT: jal
; N64: .ent caller10
; N64-NOT: jalr $25
; PIC16: .ent caller10
; ALL-LABEL: caller10:
; PIC32: jalr $25
; PIC32R6: jalr $25
; PIC32MM: jalr $25
; STATIC32: jal
; N64: jalr $25
; N64R6: jalr $25
; PIC16: jalrc
%call = tail call i32 @callee10(i32 %a8, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind
@ -237,13 +233,13 @@ declare i32 @callee11(%struct.S* byval)
define i32 @caller11() nounwind noinline {
entry:
; PIC32: .ent caller11
; PIC32: jalr
; STATIC32: .ent caller11
; ALL-LABEL: caller11:
; PIC32: jalr $25
; PIC32R6: jalrc $25
; PIC32MM: jalr $25
; STATIC32: jal
; N64: .ent caller11
; N64: jalr
; PIC16: .ent caller11
; N64: jalr $25
; N64R6: jalrc $25
; PIC16: jalrc
%call = tail call i32 @callee11(%struct.S* byval @gs1) nounwind
@ -256,13 +252,13 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
define i32 @caller12(%struct.S* nocapture byval %a0) nounwind {
entry:
; PIC32: .ent caller12
; PIC32: jalr
; STATIC32: .ent caller12
; ALL-LABEL: caller12:
; PIC32: jalr $25
; PIC32R6: jalrc $25
; PIC32MM: jalr $25
; STATIC32: jal
; N64: .ent caller12
; N64: jalr
; PIC16: .ent caller12
; N64: jalr $25
; N64R6: jalrc $25
; PIC16: jalrc
%0 = bitcast %struct.S* %a0 to i8*
@ -275,13 +271,13 @@ declare i32 @callee13(i32, ...)
define i32 @caller13() nounwind {
entry:
; PIC32: .ent caller13
; PIC32-NOT: jalr
; STATIC32: .ent caller13
; STATIC32-NOT: jal
; N64: .ent caller13
; N64-NOT: jalr $25
; PIC16: .ent caller13
; ALL-LABEL: caller13
; PIC32: jalr $25
; PIC32R6: jalr $25
; PIC32MM: jalr $25
; STATIC32: jal
; N64: jalr $25
; N64R6: jalr $25
; PIC16: jalrc
%call = tail call i32 (i32, ...) @callee13(i32 1, i32 2) nounwind
@ -290,10 +286,13 @@ entry:
; Check that there is a chain edge between the load and store nodes.
;
; PIC32-LABEL: caller14:
; PIC32: lw ${{[0-9]+}}, 16($sp)
; ALL-LABEL: caller14:
; PIC32: lw ${{[0-9]+}}, 48($sp)
; PIC32: sw $4, 16($sp)
; PIC32MM: lw ${{[0-9]+}}, 48($sp)
; PIC32MM: sw16 $4, 16(${{[0-9]+}})
define void @caller14(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
entry:
tail call void @callee14(i32 %e, i32 %b, i32 %c, i32 %d, i32 %a)