ARM, AArch64, X86: Check preserved registers for tail calls.

We can only perform a tail call to a callee that preserves all the
registers that the caller needs to preserve.

This situation happens with calling conventions like preserver_mostcc or
cxx_fast_tls. It was explicitely handled for fast_tls and failing for
preserve_most. This patch generalizes the check to any calling
convention.

Related to rdar://24207743

Differential Revision: http://reviews.llvm.org/D18680

llvm-svn: 265329
This commit is contained in:
Matthias Braun 2016-04-04 18:56:13 +00:00
parent bd1511854e
commit 9984790824
7 changed files with 80 additions and 22 deletions

View File

@ -460,6 +460,10 @@ public:
llvm_unreachable("target does not provide no preserved mask");
}
/// Return true if all bits that are set in mask \p mask0 are also set in
/// \p mask1.
bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const;
/// Return all the call-preserved register masks defined for this target.
virtual ArrayRef<const uint32_t *> getRegMasks() const = 0;
virtual ArrayRef<const char *> getRegMaskNames() const = 0;

View File

@ -388,6 +388,15 @@ bool TargetRegisterInfo::needsStackRealignment(
return false;
}
bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
const uint32_t *mask1) const {
unsigned N = (getNumRegs()+31) / 32;
for (unsigned I = 0; I < N; ++I)
if ((mask0[I] & mask1[I]) != mask0[I])
return false;
return true;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void
TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,

View File

@ -2816,13 +2816,6 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
// Disable tailcall for CXX_FAST_TLS when callee and caller have different
// calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
if (!CCMatch &&
(CallerCC == CallingConv::CXX_FAST_TLS ||
CalleeCC == CallingConv::CXX_FAST_TLS))
return false;
// Byval parameters hand the function a pointer directly into the stack area
// we want to reuse during a tail call. Working around this *is* possible (see
// X86) but less efficient and uglier in LowerCall.
@ -2882,6 +2875,13 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
CCAssignFnForCall(CalleeCC, isVarArg),
CCAssignFnForCall(CallerCC, isVarArg)))
return false;
// The callee has to preserve all registers the caller needs to preserve.
if (!CCMatch) {
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
TRI->getCallPreservedMask(MF, CalleeCC)))
return false;
}
// Nothing more to check if the callee is taking no arguments
if (Outs.empty())

View File

@ -2101,14 +2101,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
// Disable tailcall for CXX_FAST_TLS when callee and caller have different
// calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
if (!CCMatch &&
(CallerCC == CallingConv::CXX_FAST_TLS ||
CalleeCC == CallingConv::CXX_FAST_TLS))
return false;
assert(Subtarget->supportsTailCall());
@ -2152,6 +2144,13 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CCAssignFnForNode(CalleeCC, true, isVarArg),
CCAssignFnForNode(CallerCC, true, isVarArg)))
return false;
// The callee has to preserve all registers the caller needs to preserve.
if (CalleeCC != CallerCC) {
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
TRI->getCallPreservedMask(MF, CalleeCC)))
return false;
}
// If Caller's vararg or byval argument has been split between registers and
// stack, do not perform tail call, since part of the argument is in caller's

View File

@ -3818,13 +3818,6 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
if (IsCalleeWin64 != IsCallerWin64)
return false;
// Disable tailcall for CXX_FAST_TLS when callee and caller have different
// calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
if (!CCMatch &&
(CallerCC == CallingConv::CXX_FAST_TLS ||
CalleeCC == CallingConv::CXX_FAST_TLS))
return false;
if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
if (canGuaranteeTCO(CalleeCC) && CCMatch)
return true;
@ -3888,6 +3881,13 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
RetCC_X86, RetCC_X86))
return false;
// The callee has to preserve all registers the caller needs to preserve.
if (!CCMatch) {
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
TRI->getCallPreservedMask(MF, CalleeCC)))
return false;
}
unsigned StackArgsSize = 0;

View File

@ -0,0 +1,24 @@
; RUN: llc -o - %s | FileCheck %s
target triple="aarch64--"
declare void @somefunc()
define preserve_mostcc void @test_ccmismatch_notail() {
; Ensure that no tail call is used here, as the called function somefunc does
; not preserve enough registers for preserve_mostcc.
; CHECK-LABEL: test_ccmismatch_notail:
; CHECK-NOT: b somefunc
; CHECK: bl somefunc
tail call void @somefunc()
ret void
}
declare preserve_mostcc void @some_preserve_most_func()
define void @test_ccmismatch_tail() {
; We can perform a tail call here, because some_preserve_most_func preserves
; all registers necessary for test_ccmismatch_tail.
; CHECK-LABEL: test_ccmismatch_tail:
; CHECK-NOT: bl some_preserve_most_func
; CHECK: b some_preserve_most_func
tail call preserve_mostcc void @some_preserve_most_func()
ret void
}

View File

@ -126,5 +126,27 @@ entry:
ret void
}
declare void @somefunc()
define cxx_fast_tlscc void @test_ccmismatch_notail() {
; A tail call is not possible here because somefunc does not preserve enough
; registers.
; CHECK-LABEL: test_ccmismatch_notail:
; CHECK-NOT: b _somefunc
; CHECK: bl _somefunc
tail call void @somefunc()
ret void
}
declare cxx_fast_tlscc void @some_fast_tls_func()
define void @test_ccmismatch_tail() {
; We can perform a tail call here because some_fast_tls_func preserves all
; necessary registers (and more).
; CHECK-LABEL: test_ccmismatch_tail:
; CHECK-NOT: bl _some_fast_tls_func
; CHECK: b _some_fast_tls_func
tail call cxx_fast_tlscc void @some_fast_tls_func()
ret void
}
attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
attributes #1 = { nounwind }