llvm/test/CodeGen/PowerPC/ppc64-sibcall.ll
Hal Finkel 68c84942ec [PowerPC] Fix logic dealing with nop after calls (and tail-call eligibility)
This change aims to unify and correct our logic for when we need to allow for
the possibility of the linker adding a TOC restoration instruction after a
call. This comes up in two contexts:

 1. When determining tail-call eligibility. If we make a tail call (i.e.
    directly branch to a function) then there is no place for the linker to add
    a TOC restoration.
 2. When determining when we need to add a nop instruction after a call.
    Likewise, if there is a possibility that the linker might need to add a
    TOC restoration after a call, then we need to put a nop after the call
    (the bl instruction).

First problem: We were using similar, but different, logic to decide (1) and
(2). This is just wrong. Both the resideInSameModule function (used when
determining tail-call eligibility) and the isLocalCall function (used when
deciding if the post-call nop is needed) were supposed to be determining the
same underlying fact (i.e. might a TOC restoration be needed after the call).
The same logic should be used in both places.

Second problem: The logic in both places was wrong. We only know that two
functions will share the same TOC when both functions come from the same
section of the same object. Otherwise the linker might cause the functions to
use different TOC base addresses (unless the multi-TOC linker option is
disabled, in which case only shared-library boundaries are relevant). There are
a number of factors that can cause functions to be placed in different sections
or come from different objects (-ffunction-sections, explicitly-specified
section names, COMDAT, weak linkage, etc.). All of these need to be checked.
The existing logic only checked properties of the callee, but the properties of
the caller must also be checked (for example, calling from a function in a
COMDAT section means calling between sections).

There was a conceptual error in the resideInSameModule function in that it
allowed tail calls to functions with weak linkage and protected/hidden
visibility. While protected/hidden visibility does prevent the function
implementation from being replaced at runtime (via interposition), it does not
prevent the linker from using an alternate implementation at link time (i.e.
using some strong definition to replace the provided weak one during linking).
If this happens, then we're still potentially looking at a required TOC
restoration upon return.

Otherwise, in general, the post-call nop is needed wherever ELF interposition
needs to be supported. We don't currently support ELF interposition at the IR
level (see http://lists.llvm.org/pipermail/llvm-dev/2016-November/107625.html
for more information), and I don't think we should try to make it appear to
work in the backend in spite of that fact. Unfortunately, because of the way
that the ABI works, we need to generate code as if we supported interposition
whenever the linker might insert stubs for the purpose of supporting it.

Differential Revision: https://reviews.llvm.org/D27231

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291003 91177308-0d34-0410-b5e6-96231b3b80d8
2017-01-04 21:05:13 +00:00

204 lines
6.1 KiB
LLVM

; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK-SCO
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX
; No combination of "powerpc64le-unknown-linux-gnu" + "CHECK-SCO", because
; only Power8 (and later) fully support LE.
%S_56 = type { [13 x i32], i32 }
%S_64 = type { [15 x i32], i32 }
%S_32 = type { [7 x i32], i32 }
; Function Attrs: noinline nounwind
define void @callee_56_copy([7 x i64] %a, %S_56* %b) #0 { ret void }
define void @callee_64_copy([8 x i64] %a, %S_64* %b) #0 { ret void }
; Function Attrs: nounwind
define void @caller_56_reorder_copy(%S_56* %b, [7 x i64] %a) #1 {
tail call void @callee_56_copy([7 x i64] %a, %S_56* %b)
ret void
; CHECK-SCO-LABEL: caller_56_reorder_copy:
; CHECK-SCO-NOT: stdu 1
; CHECK-SCO: TC_RETURNd8 callee_56_copy
}
define void @caller_64_reorder_copy(%S_64* %b, [8 x i64] %a) #1 {
tail call void @callee_64_copy([8 x i64] %a, %S_64* %b)
ret void
; CHECK-SCO-LABEL: caller_64_reorder_copy:
; CHECK-SCO: bl callee_64_copy
}
define void @callee_64_64_copy([8 x i64] %a, [8 x i64] %b) #0 { ret void }
define void @caller_64_64_copy([8 x i64] %a, [8 x i64] %b) #1 {
tail call void @callee_64_64_copy([8 x i64] %a, [8 x i64] %b)
ret void
; CHECK-SCO-LABEL: caller_64_64_copy:
; CHECK-SCO: b callee_64_64_copy
}
define void @caller_64_64_reorder_copy([8 x i64] %a, [8 x i64] %b) #1 {
tail call void @callee_64_64_copy([8 x i64] %b, [8 x i64] %a)
ret void
; CHECK-SCO-LABEL: caller_64_64_reorder_copy:
; CHECK-SCO: bl callee_64_64_copy
}
define void @caller_64_64_undef_copy([8 x i64] %a, [8 x i64] %b) #1 {
tail call void @callee_64_64_copy([8 x i64] %a, [8 x i64] undef)
ret void
; CHECK-SCO-LABEL: caller_64_64_undef_copy:
; CHECK-SCO: b callee_64_64_copy
}
define void @arg8_callee(
float %a, i32 signext %b, float %c, i32* %d,
i8 zeroext %e, float %f, i32* %g, i32 signext %h)
{
ret void
}
define void @arg8_caller(float %a, i32 signext %b, i8 zeroext %c, i32* %d) {
entry:
tail call void @arg8_callee(float undef, i32 signext undef, float undef,
i32* %d, i8 zeroext undef, float undef,
i32* undef, i32 signext undef)
ret void
; CHECK-SCO-LABEL: arg8_caller:
; CHECK-SCO: b arg8_callee
}
; Struct return test
; Function Attrs: noinline nounwind
define void @callee_sret_56(%S_56* noalias sret %agg.result) #0 { ret void }
define void @callee_sret_32(%S_32* noalias sret %agg.result) #0 { ret void }
; Function Attrs: nounwind
define void @caller_do_something_sret_32(%S_32* noalias sret %agg.result) #1 {
%1 = alloca %S_56, align 4
%2 = bitcast %S_56* %1 to i8*
call void @callee_sret_56(%S_56* nonnull sret %1)
tail call void @callee_sret_32(%S_32* sret %agg.result)
ret void
; CHECK-SCO-LABEL: caller_do_something_sret_32:
; CHECK-SCO: stdu 1
; CHECK-SCO: bl callee_sret_56
; CHECK-SCO: addi 1
; CHECK-SCO: TC_RETURNd8 callee_sret_32
}
define void @caller_local_sret_32(%S_32* %a) #1 {
%tmp = alloca %S_32, align 4
tail call void @callee_sret_32(%S_32* nonnull sret %tmp)
ret void
; CHECK-SCO-LABEL: caller_local_sret_32:
; CHECK-SCO: bl callee_sret_32
}
attributes #0 = { noinline nounwind }
attributes #1 = { nounwind }
; vector <4 x i1> test
define void @callee_v4i1(i8 %a, <4 x i1> %b, <4 x i1> %c) { ret void }
define void @caller_v4i1_reorder(i8 %a, <4 x i1> %b, <4 x i1> %c) {
tail call void @callee_v4i1(i8 %a, <4 x i1> %c, <4 x i1> %b)
ret void
; <4 x i1> is 32 bytes aligned, if subtarget doesn't support qpx, then we can't
; place b, c to qpx register, so we can't do sco on caller_v4i1_reorder
; CHECK-SCO-LABEL: caller_v4i1_reorder:
; CHECK-SCO: bl callee_v4i1
; CHECK-SCO-HASQPX-LABEL: caller_v4i1_reorder:
; CHECK-SCO-HASQPX: b callee_v4i1
}
define void @f128_callee(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b) { ret void }
define void @f128_caller(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b) {
tail call void @f128_callee(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b)
ret void
; CHECK-SCO-LABEL: f128_caller:
; CHECK-SCO: b f128_callee
}
; weak linkage test
%class.T = type { [2 x i8] }
define weak_odr hidden void @wo_hcallee(%class.T* %this, i8* %c) { ret void }
define void @wo_hcaller(%class.T* %this, i8* %c) {
tail call void @wo_hcallee(%class.T* %this, i8* %c)
ret void
; CHECK-SCO-LABEL: wo_hcaller:
; CHECK-SCO: bl wo_hcallee
}
define weak_odr protected void @wo_pcallee(%class.T* %this, i8* %c) { ret void }
define void @wo_pcaller(%class.T* %this, i8* %c) {
tail call void @wo_pcallee(%class.T* %this, i8* %c)
ret void
; CHECK-SCO-LABEL: wo_pcaller:
; CHECK-SCO: bl wo_pcallee
}
define weak_odr void @wo_callee(%class.T* %this, i8* %c) { ret void }
define void @wo_caller(%class.T* %this, i8* %c) {
tail call void @wo_callee(%class.T* %this, i8* %c)
ret void
; CHECK-SCO-LABEL: wo_caller:
; CHECK-SCO: bl wo_callee
}
define weak protected void @w_pcallee(i8* %ptr) { ret void }
define void @w_pcaller(i8* %ptr) {
tail call void @w_pcallee(i8* %ptr)
ret void
; CHECK-SCO-LABEL: w_pcaller:
; CHECK-SCO: bl w_pcallee
}
define weak hidden void @w_hcallee(i8* %ptr) { ret void }
define void @w_hcaller(i8* %ptr) {
tail call void @w_hcallee(i8* %ptr)
ret void
; CHECK-SCO-LABEL: w_hcaller:
; CHECK-SCO: bl w_hcallee
}
define weak void @w_callee(i8* %ptr) { ret void }
define void @w_caller(i8* %ptr) {
tail call void @w_callee(i8* %ptr)
ret void
; CHECK-SCO-LABEL: w_caller:
; CHECK-SCO: bl w_callee
}
%struct.byvalTest = type { [8 x i8] }
@byval = common global %struct.byvalTest zeroinitializer
define void @byval_callee(%struct.byvalTest* byval %ptr) { ret void }
define void @byval_caller() {
tail call void @byval_callee(%struct.byvalTest* byval @byval)
ret void
; CHECK-SCO-LABEL: bl byval_callee
; CHECK-SCO: bl byval_callee
}