mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-03-02 01:06:25 +00:00
ARM, AArch64, X86: Check preserved registers for tail calls.
We can only perform a tail call to a callee that preserves all the registers that the caller needs to preserve. This situation happens with calling conventions like preserver_mostcc or cxx_fast_tls. It was explicitely handled for fast_tls and failing for preserve_most. This patch generalizes the check to any calling convention. Related to rdar://24207743 Differential Revision: http://reviews.llvm.org/D18680 llvm-svn: 265329
This commit is contained in:
parent
bd1511854e
commit
9984790824
@ -460,6 +460,10 @@ public:
|
||||
llvm_unreachable("target does not provide no preserved mask");
|
||||
}
|
||||
|
||||
/// Return true if all bits that are set in mask \p mask0 are also set in
|
||||
/// \p mask1.
|
||||
bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const;
|
||||
|
||||
/// Return all the call-preserved register masks defined for this target.
|
||||
virtual ArrayRef<const uint32_t *> getRegMasks() const = 0;
|
||||
virtual ArrayRef<const char *> getRegMaskNames() const = 0;
|
||||
|
@ -388,6 +388,15 @@ bool TargetRegisterInfo::needsStackRealignment(
|
||||
return false;
|
||||
}
|
||||
|
||||
bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
|
||||
const uint32_t *mask1) const {
|
||||
unsigned N = (getNumRegs()+31) / 32;
|
||||
for (unsigned I = 0; I < N; ++I)
|
||||
if ((mask0[I] & mask1[I]) != mask0[I])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
void
|
||||
TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
|
||||
|
@ -2816,13 +2816,6 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
|
||||
CallingConv::ID CallerCC = CallerF->getCallingConv();
|
||||
bool CCMatch = CallerCC == CalleeCC;
|
||||
|
||||
// Disable tailcall for CXX_FAST_TLS when callee and caller have different
|
||||
// calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
|
||||
if (!CCMatch &&
|
||||
(CallerCC == CallingConv::CXX_FAST_TLS ||
|
||||
CalleeCC == CallingConv::CXX_FAST_TLS))
|
||||
return false;
|
||||
|
||||
// Byval parameters hand the function a pointer directly into the stack area
|
||||
// we want to reuse during a tail call. Working around this *is* possible (see
|
||||
// X86) but less efficient and uglier in LowerCall.
|
||||
@ -2882,6 +2875,13 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
|
||||
CCAssignFnForCall(CalleeCC, isVarArg),
|
||||
CCAssignFnForCall(CallerCC, isVarArg)))
|
||||
return false;
|
||||
// The callee has to preserve all registers the caller needs to preserve.
|
||||
if (!CCMatch) {
|
||||
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
|
||||
if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
|
||||
TRI->getCallPreservedMask(MF, CalleeCC)))
|
||||
return false;
|
||||
}
|
||||
|
||||
// Nothing more to check if the callee is taking no arguments
|
||||
if (Outs.empty())
|
||||
|
@ -2101,14 +2101,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
const Function *CallerF = MF.getFunction();
|
||||
CallingConv::ID CallerCC = CallerF->getCallingConv();
|
||||
bool CCMatch = CallerCC == CalleeCC;
|
||||
|
||||
// Disable tailcall for CXX_FAST_TLS when callee and caller have different
|
||||
// calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
|
||||
if (!CCMatch &&
|
||||
(CallerCC == CallingConv::CXX_FAST_TLS ||
|
||||
CalleeCC == CallingConv::CXX_FAST_TLS))
|
||||
return false;
|
||||
|
||||
assert(Subtarget->supportsTailCall());
|
||||
|
||||
@ -2152,6 +2144,13 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
|
||||
CCAssignFnForNode(CalleeCC, true, isVarArg),
|
||||
CCAssignFnForNode(CallerCC, true, isVarArg)))
|
||||
return false;
|
||||
// The callee has to preserve all registers the caller needs to preserve.
|
||||
if (CalleeCC != CallerCC) {
|
||||
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
|
||||
if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
|
||||
TRI->getCallPreservedMask(MF, CalleeCC)))
|
||||
return false;
|
||||
}
|
||||
|
||||
// If Caller's vararg or byval argument has been split between registers and
|
||||
// stack, do not perform tail call, since part of the argument is in caller's
|
||||
|
@ -3818,13 +3818,6 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
|
||||
if (IsCalleeWin64 != IsCallerWin64)
|
||||
return false;
|
||||
|
||||
// Disable tailcall for CXX_FAST_TLS when callee and caller have different
|
||||
// calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
|
||||
if (!CCMatch &&
|
||||
(CallerCC == CallingConv::CXX_FAST_TLS ||
|
||||
CalleeCC == CallingConv::CXX_FAST_TLS))
|
||||
return false;
|
||||
|
||||
if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
|
||||
if (canGuaranteeTCO(CalleeCC) && CCMatch)
|
||||
return true;
|
||||
@ -3888,6 +3881,13 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
|
||||
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
|
||||
RetCC_X86, RetCC_X86))
|
||||
return false;
|
||||
// The callee has to preserve all registers the caller needs to preserve.
|
||||
if (!CCMatch) {
|
||||
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
|
||||
if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
|
||||
TRI->getCallPreservedMask(MF, CalleeCC)))
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned StackArgsSize = 0;
|
||||
|
||||
|
24
test/CodeGen/AArch64/tailcall-ccmismatch.ll
Normal file
24
test/CodeGen/AArch64/tailcall-ccmismatch.ll
Normal file
@ -0,0 +1,24 @@
|
||||
; RUN: llc -o - %s | FileCheck %s
|
||||
target triple="aarch64--"
|
||||
|
||||
declare void @somefunc()
|
||||
define preserve_mostcc void @test_ccmismatch_notail() {
|
||||
; Ensure that no tail call is used here, as the called function somefunc does
|
||||
; not preserve enough registers for preserve_mostcc.
|
||||
; CHECK-LABEL: test_ccmismatch_notail:
|
||||
; CHECK-NOT: b somefunc
|
||||
; CHECK: bl somefunc
|
||||
tail call void @somefunc()
|
||||
ret void
|
||||
}
|
||||
|
||||
declare preserve_mostcc void @some_preserve_most_func()
|
||||
define void @test_ccmismatch_tail() {
|
||||
; We can perform a tail call here, because some_preserve_most_func preserves
|
||||
; all registers necessary for test_ccmismatch_tail.
|
||||
; CHECK-LABEL: test_ccmismatch_tail:
|
||||
; CHECK-NOT: bl some_preserve_most_func
|
||||
; CHECK: b some_preserve_most_func
|
||||
tail call preserve_mostcc void @some_preserve_most_func()
|
||||
ret void
|
||||
}
|
@ -126,5 +126,27 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @somefunc()
|
||||
define cxx_fast_tlscc void @test_ccmismatch_notail() {
|
||||
; A tail call is not possible here because somefunc does not preserve enough
|
||||
; registers.
|
||||
; CHECK-LABEL: test_ccmismatch_notail:
|
||||
; CHECK-NOT: b _somefunc
|
||||
; CHECK: bl _somefunc
|
||||
tail call void @somefunc()
|
||||
ret void
|
||||
}
|
||||
|
||||
declare cxx_fast_tlscc void @some_fast_tls_func()
|
||||
define void @test_ccmismatch_tail() {
|
||||
; We can perform a tail call here because some_fast_tls_func preserves all
|
||||
; necessary registers (and more).
|
||||
; CHECK-LABEL: test_ccmismatch_tail:
|
||||
; CHECK-NOT: bl _some_fast_tls_func
|
||||
; CHECK: b _some_fast_tls_func
|
||||
tail call cxx_fast_tlscc void @some_fast_tls_func()
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
|
||||
attributes #1 = { nounwind }
|
||||
|
Loading…
x
Reference in New Issue
Block a user