mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:39:47 +00:00
[X86] Add new calling convention that guarantees tail call optimization
When the target option GuaranteedTailCallOpt is specified, calls with the fastcc calling convention will be transformed into tail calls if they are in tail position. This diff adds a new calling convention, tailcc, currently supported only on X86, which behaves the same way as fastcc, except that the GuaranteedTailCallOpt flag does not need to enabled in order to enable tail call optimization. Patch by Dwight Guth <dwight.guth@runtimeverification.com>! Reviewed By: lebedev.ri, paquette, rnk Differential Revision: https://reviews.llvm.org/D67855 llvm-svn: 373976
This commit is contained in:
parent
383930d445
commit
a973c0bd85
@ -794,6 +794,7 @@ function. The operand fields are:
|
||||
* ``preserve_allcc``: code 15
|
||||
* ``swiftcc`` : code 16
|
||||
* ``cxx_fast_tlscc``: code 17
|
||||
* ``tailcc`` : code 18
|
||||
* ``x86_stdcallcc``: code 64
|
||||
* ``x86_fastcallcc``: code 65
|
||||
* ``arm_apcscc``: code 66
|
||||
|
@ -2068,12 +2068,12 @@ supported on x86/x86-64, PowerPC, and WebAssembly. It is performed on x86/x86-64
|
||||
and PowerPC if:
|
||||
|
||||
* Caller and callee have the calling convention ``fastcc``, ``cc 10`` (GHC
|
||||
calling convention) or ``cc 11`` (HiPE calling convention).
|
||||
calling convention), ``cc 11`` (HiPE calling convention), or ``tailcc``.
|
||||
|
||||
* The call is a tail call - in tail position (ret immediately follows call and
|
||||
ret uses value of call or is void).
|
||||
|
||||
* Option ``-tailcallopt`` is enabled.
|
||||
* Option ``-tailcallopt`` is enabled or the calling convention is ``tailcc``.
|
||||
|
||||
* Platform-specific constraints are met.
|
||||
|
||||
|
@ -299,7 +299,7 @@ added in the future:
|
||||
allows the target to use whatever tricks it wants to produce fast
|
||||
code for the target, without having to conform to an externally
|
||||
specified ABI (Application Binary Interface). `Tail calls can only
|
||||
be optimized when this, the GHC or the HiPE convention is
|
||||
be optimized when this, the tailcc, the GHC or the HiPE convention is
|
||||
used. <CodeGenerator.html#id80>`_ This calling convention does not
|
||||
support varargs and requires the prototype of all callees to exactly
|
||||
match the prototype of the function definition.
|
||||
@ -436,6 +436,14 @@ added in the future:
|
||||
- On X86-64 RCX and R8 are available for additional integer returns, and
|
||||
XMM2 and XMM3 are available for additional FP/vector returns.
|
||||
- On iOS platforms, we use AAPCS-VFP calling convention.
|
||||
"``tailcc``" - Tail callable calling convention
|
||||
This calling convention ensures that calls in tail position will always be
|
||||
tail call optimized. This calling convention is equivalent to fastcc,
|
||||
except for an additional guarantee that tail calls will be produced
|
||||
whenever possible. `Tail calls can only be optimized when this, the fastcc,
|
||||
the GHC or the HiPE convention is used. <CodeGenerator.html#id80>`_ This
|
||||
calling convention does not support varargs and requires the prototype of
|
||||
all callees to exactly match the prototype of the function definition.
|
||||
"``cc <n>``" - Numbered convention
|
||||
Any calling convention may be specified by number, allowing
|
||||
target-specific calling conventions to be used. Target specific
|
||||
@ -10232,11 +10240,12 @@ This instruction requires several arguments:
|
||||
Tail call optimization for calls marked ``tail`` is guaranteed to occur if
|
||||
the following conditions are met:
|
||||
|
||||
- Caller and callee both have the calling convention ``fastcc``.
|
||||
- Caller and callee both have the calling convention ``fastcc`` or ``tailcc``.
|
||||
- The call is in tail position (ret immediately follows call and ret
|
||||
uses value of call or is void).
|
||||
- Option ``-tailcallopt`` is enabled, or
|
||||
``llvm::GuaranteedTailCallOpt`` is ``true``.
|
||||
- Option ``-tailcallopt`` is enabled,
|
||||
``llvm::GuaranteedTailCallOpt`` is ``true``, or the calling convention
|
||||
is ``tailcc``
|
||||
- `Platform-specific constraints are
|
||||
met. <CodeGenerator.html#tailcallopt>`_
|
||||
|
||||
|
@ -75,6 +75,11 @@ namespace CallingConv {
|
||||
// CXX_FAST_TLS - Calling convention for access functions.
|
||||
CXX_FAST_TLS = 17,
|
||||
|
||||
/// Tail - This calling convention attemps to make calls as fast as
|
||||
/// possible while guaranteeing that tail call optimization can always
|
||||
/// be performed.
|
||||
Tail = 18,
|
||||
|
||||
// Target - This is the start of the target-specific calling conventions,
|
||||
// e.g. fastcall and thiscall on X86.
|
||||
FirstTargetCC = 64,
|
||||
|
@ -622,6 +622,7 @@ lltok::Kind LLLexer::LexIdentifier() {
|
||||
KEYWORD(amdgpu_ps);
|
||||
KEYWORD(amdgpu_cs);
|
||||
KEYWORD(amdgpu_kernel);
|
||||
KEYWORD(tailcc);
|
||||
|
||||
KEYWORD(cc);
|
||||
KEYWORD(c);
|
||||
|
@ -1955,6 +1955,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
|
||||
/// ::= 'amdgpu_ps'
|
||||
/// ::= 'amdgpu_cs'
|
||||
/// ::= 'amdgpu_kernel'
|
||||
/// ::= 'tailcc'
|
||||
/// ::= 'cc' UINT
|
||||
///
|
||||
bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
|
||||
@ -2000,6 +2001,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
|
||||
case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break;
|
||||
case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break;
|
||||
case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break;
|
||||
case lltok::kw_tailcc: CC = CallingConv::Tail; break;
|
||||
case lltok::kw_cc: {
|
||||
Lex.Lex();
|
||||
return ParseUInt32(CC);
|
||||
|
@ -168,6 +168,7 @@ enum Kind {
|
||||
kw_amdgpu_ps,
|
||||
kw_amdgpu_cs,
|
||||
kw_amdgpu_kernel,
|
||||
kw_tailcc,
|
||||
|
||||
// Attributes:
|
||||
kw_attributes,
|
||||
|
@ -523,7 +523,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
|
||||
// longjmp on x86), it can end up causing miscompilation that has not
|
||||
// been fully understood.
|
||||
if (!Ret &&
|
||||
(!TM.Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term)))
|
||||
((!TM.Options.GuaranteedTailCallOpt &&
|
||||
CS.getCallingConv() != CallingConv::Tail) || !isa<UnreachableInst>(Term)))
|
||||
return false;
|
||||
|
||||
// If I will have a chain, make sure no other instruction that will have a
|
||||
|
@ -352,6 +352,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
|
||||
case CallingConv::PreserveAll: Out << "preserve_allcc"; break;
|
||||
case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break;
|
||||
case CallingConv::GHC: Out << "ghccc"; break;
|
||||
case CallingConv::Tail: Out << "tailcc"; break;
|
||||
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
|
||||
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
|
||||
case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
|
||||
|
@ -433,6 +433,7 @@ defm X86_SysV64_RegCall :
|
||||
def RetCC_X86_32 : CallingConv<[
|
||||
// If FastCC, use RetCC_X86_32_Fast.
|
||||
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
|
||||
CCIfCC<"CallingConv::Tail", CCDelegateTo<RetCC_X86_32_Fast>>,
|
||||
// If HiPE, use RetCC_X86_32_HiPE.
|
||||
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
|
||||
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
|
||||
@ -1000,6 +1001,7 @@ def CC_X86_32 : CallingConv<[
|
||||
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win32_VectorCall>>,
|
||||
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
|
||||
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
|
||||
CCIfCC<"CallingConv::Tail", CCDelegateTo<CC_X86_32_FastCC>>,
|
||||
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
|
||||
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
|
||||
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_32_RegCall>>,
|
||||
|
@ -1160,6 +1160,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
|
||||
CallingConv::ID CC = F.getCallingConv();
|
||||
if (CC != CallingConv::C &&
|
||||
CC != CallingConv::Fast &&
|
||||
CC != CallingConv::Tail &&
|
||||
CC != CallingConv::X86_FastCall &&
|
||||
CC != CallingConv::X86_StdCall &&
|
||||
CC != CallingConv::X86_ThisCall &&
|
||||
@ -1173,7 +1174,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
|
||||
|
||||
// fastcc with -tailcallopt is intended to provide a guaranteed
|
||||
// tail call optimization. Fastisel doesn't know how to do that.
|
||||
if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
|
||||
if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
|
||||
CC == CallingConv::Tail)
|
||||
return false;
|
||||
|
||||
// Let SDISel handle vararg functions.
|
||||
@ -3157,7 +3159,7 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
|
||||
if (Subtarget->getTargetTriple().isOSMSVCRT())
|
||||
return 0;
|
||||
if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
|
||||
CC == CallingConv::HiPE)
|
||||
CC == CallingConv::HiPE || CC == CallingConv::Tail)
|
||||
return 0;
|
||||
|
||||
if (CS)
|
||||
@ -3208,6 +3210,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
|
||||
default: return false;
|
||||
case CallingConv::C:
|
||||
case CallingConv::Fast:
|
||||
case CallingConv::Tail:
|
||||
case CallingConv::WebKit_JS:
|
||||
case CallingConv::Swift:
|
||||
case CallingConv::X86_FastCall:
|
||||
@ -3224,7 +3227,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
|
||||
|
||||
// fastcc with -tailcallopt is intended to provide a guaranteed
|
||||
// tail call optimization. Fastisel doesn't know how to do that.
|
||||
if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
|
||||
if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
|
||||
CC == CallingConv::Tail)
|
||||
return false;
|
||||
|
||||
// Don't know how to handle Win64 varargs yet. Nothing special needed for
|
||||
|
@ -2269,7 +2269,8 @@ GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Pr
|
||||
bool IsNested = HasNestArgument(&MF);
|
||||
|
||||
if (CallingConvention == CallingConv::X86_FastCall ||
|
||||
CallingConvention == CallingConv::Fast) {
|
||||
CallingConvention == CallingConv::Fast ||
|
||||
CallingConvention == CallingConv::Tail) {
|
||||
if (IsNested)
|
||||
report_fatal_error("Segmented stacks does not support fastcall with "
|
||||
"nested function.");
|
||||
|
@ -2963,7 +2963,7 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
|
||||
static bool canGuaranteeTCO(CallingConv::ID CC) {
|
||||
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
|
||||
CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
|
||||
CC == CallingConv::HHVM);
|
||||
CC == CallingConv::HHVM || CC == CallingConv::Tail);
|
||||
}
|
||||
|
||||
/// Return true if we might ever do TCO for calls with this calling convention.
|
||||
@ -2989,7 +2989,7 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
|
||||
/// Return true if the function is being made into a tailcall target by
|
||||
/// changing its ABI.
|
||||
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
|
||||
return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
|
||||
return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail;
|
||||
}
|
||||
|
||||
bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
|
||||
@ -3615,6 +3615,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
|
||||
StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
|
||||
bool IsSibcall = false;
|
||||
bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
|
||||
CallConv == CallingConv::Tail;
|
||||
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
|
||||
auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
|
||||
const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
|
||||
@ -3635,8 +3637,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
if (Attr.getValueAsString() == "true")
|
||||
isTailCall = false;
|
||||
|
||||
if (Subtarget.isPICStyleGOT() &&
|
||||
!MF.getTarget().Options.GuaranteedTailCallOpt) {
|
||||
if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) {
|
||||
// If we are using a GOT, disable tail calls to external symbols with
|
||||
// default visibility. Tail calling such a symbol requires using a GOT
|
||||
// relocation, which forces early binding of the symbol. This breaks code
|
||||
@ -3663,7 +3664,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
|
||||
// Sibcalls are automatically detected tailcalls which do not require
|
||||
// ABI changes.
|
||||
if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
|
||||
if (!IsGuaranteeTCO && isTailCall)
|
||||
IsSibcall = true;
|
||||
|
||||
if (isTailCall)
|
||||
@ -3695,8 +3696,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
// This is a sibcall. The memory operands are available in caller's
|
||||
// own caller's stack.
|
||||
NumBytes = 0;
|
||||
else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
|
||||
canGuaranteeTCO(CallConv))
|
||||
else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
|
||||
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
|
||||
|
||||
int FPDiff = 0;
|
||||
@ -4321,6 +4321,8 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
|
||||
bool CCMatch = CallerCC == CalleeCC;
|
||||
bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
|
||||
bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
|
||||
bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
|
||||
CalleeCC == CallingConv::Tail;
|
||||
|
||||
// Win64 functions have extra shadow space for argument homing. Don't do the
|
||||
// sibcall if the caller and callee have mismatched expectations for this
|
||||
@ -4328,7 +4330,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
|
||||
if (IsCalleeWin64 != IsCallerWin64)
|
||||
return false;
|
||||
|
||||
if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
|
||||
if (IsGuaranteeTCO) {
|
||||
if (canGuaranteeTCO(CalleeCC) && CCMatch)
|
||||
return true;
|
||||
return false;
|
||||
@ -24421,6 +24423,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
|
||||
case CallingConv::X86_FastCall:
|
||||
case CallingConv::X86_ThisCall:
|
||||
case CallingConv::Fast:
|
||||
case CallingConv::Tail:
|
||||
// Pass 'nest' parameter in EAX.
|
||||
// Must be kept in sync with X86CallingConv.td
|
||||
NestReg = X86::EAX;
|
||||
|
@ -815,6 +815,7 @@ public:
|
||||
// On Win64, all these conventions just use the default convention.
|
||||
case CallingConv::C:
|
||||
case CallingConv::Fast:
|
||||
case CallingConv::Tail:
|
||||
case CallingConv::Swift:
|
||||
case CallingConv::X86_FastCall:
|
||||
case CallingConv::X86_StdCall:
|
||||
|
114
test/CodeGen/X86/musttail-tailcc.ll
Normal file
114
test/CodeGen/X86/musttail-tailcc.ll
Normal file
@ -0,0 +1,114 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s -check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s -check-prefix=X32
|
||||
|
||||
; tailcc will turn all of these musttail calls into tail calls.
|
||||
|
||||
declare tailcc i32 @tailcallee(i32 %a1, i32 %a2)
|
||||
|
||||
define tailcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind {
|
||||
; X64-LABEL: tailcaller:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: jmp tailcallee # TAILCALL
|
||||
;
|
||||
; X32-LABEL: tailcaller:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: jmp tailcallee # TAILCALL
|
||||
entry:
|
||||
%tmp11 = musttail call tailcc i32 @tailcallee(i32 %in1, i32 %in2)
|
||||
ret i32 %tmp11
|
||||
}
|
||||
|
||||
declare tailcc i8* @alias_callee()
|
||||
|
||||
define tailcc noalias i8* @noalias_caller() nounwind {
|
||||
; X64-LABEL: noalias_caller:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: jmp alias_callee # TAILCALL
|
||||
;
|
||||
; X32-LABEL: noalias_caller:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: jmp alias_callee # TAILCALL
|
||||
%p = musttail call tailcc i8* @alias_callee()
|
||||
ret i8* %p
|
||||
}
|
||||
|
||||
declare tailcc noalias i8* @noalias_callee()
|
||||
|
||||
define tailcc i8* @alias_caller() nounwind {
|
||||
; X64-LABEL: alias_caller:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: jmp noalias_callee # TAILCALL
|
||||
;
|
||||
; X32-LABEL: alias_caller:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: jmp noalias_callee # TAILCALL
|
||||
%p = musttail call tailcc noalias i8* @noalias_callee()
|
||||
ret i8* %p
|
||||
}
|
||||
|
||||
define tailcc void @void_test(i32, i32, i32, i32) {
|
||||
; X64-LABEL: void_test:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 8
|
||||
; X64-NEXT: jmp void_test # TAILCALL
|
||||
;
|
||||
; X32-LABEL: void_test:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: subl $8, %esp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 16
|
||||
; X32-NEXT: .cfi_offset %esi, -8
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: addl $8, %esp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: .cfi_def_cfa_offset 4
|
||||
; X32-NEXT: jmp void_test # TAILCALL
|
||||
entry:
|
||||
musttail call tailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3)
|
||||
ret void
|
||||
}
|
||||
|
||||
define tailcc i1 @i1test(i32, i32, i32, i32) {
|
||||
; X64-LABEL: i1test:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 8
|
||||
; X64-NEXT: jmp i1test # TAILCALL
|
||||
;
|
||||
; X32-LABEL: i1test:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: subl $8, %esp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 16
|
||||
; X32-NEXT: .cfi_offset %esi, -8
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: addl $8, %esp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: .cfi_def_cfa_offset 4
|
||||
; X32-NEXT: jmp i1test # TAILCALL
|
||||
entry:
|
||||
%4 = musttail call tailcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
|
||||
ret i1 %4
|
||||
}
|
155
test/CodeGen/X86/tailcall-tailcc.ll
Normal file
155
test/CodeGen/X86/tailcall-tailcc.ll
Normal file
@ -0,0 +1,155 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s -check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s -check-prefix=X32
|
||||
|
||||
; With -tailcallopt, CodeGen guarantees a tail call optimization
|
||||
; for all of these.
|
||||
|
||||
declare tailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4)
|
||||
|
||||
define tailcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind {
|
||||
; X64-LABEL: tailcaller:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl %edi, %edx
|
||||
; X64-NEXT: movl %esi, %ecx
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: jmp tailcallee # TAILCALL
|
||||
;
|
||||
; X32-LABEL: tailcaller:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: subl $16, %esp
|
||||
; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: addl $8, %esp
|
||||
; X32-NEXT: jmp tailcallee # TAILCALL
|
||||
entry:
|
||||
%tmp11 = tail call tailcc i32 @tailcallee(i32 %in1, i32 %in2, i32 %in1, i32 %in2)
|
||||
ret i32 %tmp11
|
||||
}
|
||||
|
||||
declare tailcc i8* @alias_callee()
|
||||
|
||||
define tailcc noalias i8* @noalias_caller() nounwind {
|
||||
; X64-LABEL: noalias_caller:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: jmp alias_callee # TAILCALL
|
||||
;
|
||||
; X32-LABEL: noalias_caller:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: jmp alias_callee # TAILCALL
|
||||
%p = tail call tailcc i8* @alias_callee()
|
||||
ret i8* %p
|
||||
}
|
||||
|
||||
declare tailcc noalias i8* @noalias_callee()
|
||||
|
||||
define tailcc i8* @alias_caller() nounwind {
|
||||
; X64-LABEL: alias_caller:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: jmp noalias_callee # TAILCALL
|
||||
;
|
||||
; X32-LABEL: alias_caller:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: jmp noalias_callee # TAILCALL
|
||||
%p = tail call tailcc noalias i8* @noalias_callee()
|
||||
ret i8* %p
|
||||
}
|
||||
|
||||
declare tailcc i32 @i32_callee()
|
||||
|
||||
define tailcc i32 @ret_undef() nounwind {
|
||||
; X64-LABEL: ret_undef:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: jmp i32_callee # TAILCALL
|
||||
;
|
||||
; X32-LABEL: ret_undef:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: jmp i32_callee # TAILCALL
|
||||
%p = tail call tailcc i32 @i32_callee()
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
declare tailcc void @does_not_return()
|
||||
|
||||
define tailcc i32 @noret() nounwind {
|
||||
; X64-LABEL: noret:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: jmp does_not_return # TAILCALL
|
||||
;
|
||||
; X32-LABEL: noret:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: jmp does_not_return # TAILCALL
|
||||
tail call tailcc void @does_not_return()
|
||||
unreachable
|
||||
}
|
||||
|
||||
define tailcc void @void_test(i32, i32, i32, i32) {
|
||||
; X64-LABEL: void_test:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 8
|
||||
; X64-NEXT: jmp void_test # TAILCALL
|
||||
;
|
||||
; X32-LABEL: void_test:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: subl $8, %esp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 16
|
||||
; X32-NEXT: .cfi_offset %esi, -8
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: addl $8, %esp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: .cfi_def_cfa_offset 4
|
||||
; X32-NEXT: jmp void_test # TAILCALL
|
||||
entry:
|
||||
tail call tailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3)
|
||||
ret void
|
||||
}
|
||||
|
||||
define tailcc i1 @i1test(i32, i32, i32, i32) {
|
||||
; X64-LABEL: i1test:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 8
|
||||
; X64-NEXT: jmp i1test # TAILCALL
|
||||
;
|
||||
; X32-LABEL: i1test:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: subl $8, %esp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 16
|
||||
; X32-NEXT: .cfi_offset %esi, -8
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: addl $8, %esp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: .cfi_def_cfa_offset 4
|
||||
; X32-NEXT: jmp i1test # TAILCALL
|
||||
entry:
|
||||
%4 = tail call tailcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
|
||||
ret i1 %4
|
||||
}
|
19
test/CodeGen/X86/tailcc-calleesave.ll
Normal file
19
test/CodeGen/X86/tailcc-calleesave.ll
Normal file
@ -0,0 +1,19 @@
|
||||
; RUN: llc -mcpu=core < %s | FileCheck %s
|
||||
|
||||
target triple = "i686-apple-darwin"
|
||||
|
||||
declare tailcc void @foo(i32, i32, i32, i32, i32, i32)
|
||||
declare i32* @bar(i32*)
|
||||
|
||||
define tailcc void @hoge(i32 %b) nounwind {
|
||||
; Do not overwrite pushed callee-save registers
|
||||
; CHECK: pushl
|
||||
; CHECK: subl $[[SIZE:[0-9]+]], %esp
|
||||
; CHECK-NOT: [[SIZE]](%esp)
|
||||
%a = alloca i32
|
||||
store i32 0, i32* %a
|
||||
%d = tail call i32* @bar(i32* %a) nounwind
|
||||
store i32 %b, i32* %d
|
||||
tail call tailcc void @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) nounwind
|
||||
ret void
|
||||
}
|
40
test/CodeGen/X86/tailcc-disable-tail-calls.ll
Normal file
40
test/CodeGen/X86/tailcc-disable-tail-calls.ll
Normal file
@ -0,0 +1,40 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=NO-OPTION
|
||||
; RUN: llc < %s -mtriple=x86_64-- -disable-tail-calls | FileCheck %s --check-prefix=DISABLE-TRUE
|
||||
; RUN: llc < %s -mtriple=x86_64-- -disable-tail-calls=false | FileCheck %s --check-prefix=DISABLE-FALSE
|
||||
|
||||
; Check that command line option "-disable-tail-calls" overrides function
|
||||
; attribute "disable-tail-calls".
|
||||
|
||||
; NO-OPTION-LABEL: {{\_?}}func_attr
|
||||
; NO-OPTION: callq {{\_?}}callee
|
||||
|
||||
; DISABLE-FALSE-LABEL: {{\_?}}func_attr
|
||||
; DISABLE-FALSE: jmp {{\_?}}callee
|
||||
|
||||
; DISABLE-TRUE-LABEL: {{\_?}}func_attr
|
||||
; DISABLE-TRUE: callq {{\_?}}callee
|
||||
|
||||
define tailcc i32 @func_attr(i32 %a) #0 {
|
||||
entry:
|
||||
%call = tail call tailcc i32 @callee(i32 %a)
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; NO-OPTION-LABEL: {{\_?}}func_noattr
|
||||
; NO-OPTION: jmp {{\_?}}callee
|
||||
|
||||
; DISABLE-FALSE-LABEL: {{\_?}}func_noattr
|
||||
; DISABLE-FALSE: jmp {{\_?}}callee
|
||||
|
||||
; DISABLE-TRUE-LABEL: {{\_?}}func_noattr
|
||||
; DISABLE-TRUE: callq {{\_?}}callee
|
||||
|
||||
define tailcc i32 @func_noattr(i32 %a) {
|
||||
entry:
|
||||
%call = tail call tailcc i32 @callee(i32 %a)
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
declare tailcc i32 @callee(i32)
|
||||
|
||||
attributes #0 = { "disable-tail-calls"="true" }
|
49
test/CodeGen/X86/tailcc-fastcc.ll
Normal file
49
test/CodeGen/X86/tailcc-fastcc.ll
Normal file
@ -0,0 +1,49 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -tailcallopt < %s -mtriple=x86_64-unknown-unknown | FileCheck %s -check-prefix=X64
|
||||
; RUN: llc -tailcallopt < %s -mtriple=i686-unknown-unknown | FileCheck %s -check-prefix=X32
|
||||
|
||||
; llc -tailcallopt should not enable tail calls from fastcc to tailcc or vice versa
|
||||
|
||||
declare tailcc i32 @tailcallee1(i32 %a1, i32 %a2, i32 %a3, i32 %a4)
|
||||
|
||||
define fastcc i32 @tailcaller1(i32 %in1, i32 %in2) nounwind {
|
||||
; X64-LABEL: tailcaller1:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl %edi, %edx
|
||||
; X64-NEXT: movl %esi, %ecx
|
||||
; X64-NEXT: callq tailcallee1
|
||||
; X64-NEXT: retq $8
|
||||
;
|
||||
; X32-LABEL: tailcaller1:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %edx
|
||||
; X32-NEXT: pushl %ecx
|
||||
; X32-NEXT: calll tailcallee1
|
||||
; X32-NEXT: retl
|
||||
entry:
|
||||
%tmp11 = tail call tailcc i32 @tailcallee1(i32 %in1, i32 %in2, i32 %in1, i32 %in2)
|
||||
ret i32 %tmp11
|
||||
}
|
||||
|
||||
declare fastcc i32 @tailcallee2(i32 %a1, i32 %a2, i32 %a3, i32 %a4)
|
||||
|
||||
define tailcc i32 @tailcaller2(i32 %in1, i32 %in2) nounwind {
|
||||
; X64-LABEL: tailcaller2:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movl %edi, %edx
|
||||
; X64-NEXT: movl %esi, %ecx
|
||||
; X64-NEXT: callq tailcallee2
|
||||
; X64-NEXT: retq $8
|
||||
;
|
||||
; X32-LABEL: tailcaller2:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %edx
|
||||
; X32-NEXT: pushl %ecx
|
||||
; X32-NEXT: calll tailcallee2
|
||||
; X32-NEXT: retl
|
||||
entry:
|
||||
%tmp11 = tail call fastcc i32 @tailcallee2(i32 %in1, i32 %in2, i32 %in1, i32 %in2)
|
||||
ret i32 %tmp11
|
||||
}
|
18
test/CodeGen/X86/tailcc-fastisel.ll
Normal file
18
test/CodeGen/X86/tailcc-fastisel.ll
Normal file
@ -0,0 +1,18 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -fast-isel -fast-isel-abort=1 | FileCheck %s
|
||||
|
||||
%0 = type { i64, i32, i8* }
|
||||
|
||||
define tailcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 %arg1) nounwind {
|
||||
fail: ; preds = %entry
|
||||
%tmp20 = tail call tailcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; <i8*> [#uses=1]
|
||||
; CHECK: jmp "_visit_array_aux<`Reference>" ## TAILCALL
|
||||
ret i8* %tmp20
|
||||
}
|
||||
|
||||
define i32 @foo() nounwind {
|
||||
entry:
|
||||
%0 = tail call i32 (...) @bar() nounwind ; <i32> [#uses=1]
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
declare i32 @bar(...) nounwind
|
71
test/CodeGen/X86/tailcc-largecode.ll
Normal file
71
test/CodeGen/X86/tailcc-largecode.ll
Normal file
@ -0,0 +1,71 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-linux-gnu -code-model=large -enable-misched=false | FileCheck %s
|
||||
|
||||
declare tailcc i32 @callee(i32 %arg)
|
||||
define tailcc i32 @directcall(i32 %arg) {
|
||||
entry:
|
||||
; This is the large code model, so &callee may not fit into the jmp
|
||||
; instruction. Instead, stick it into a register.
|
||||
; CHECK: movabsq $callee, [[REGISTER:%r[a-z0-9]+]]
|
||||
; CHECK: jmpq *[[REGISTER]] # TAILCALL
|
||||
%res = tail call tailcc i32 @callee(i32 %arg)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; Check that the register used for an indirect tail call doesn't
|
||||
; clobber any of the arguments.
|
||||
define tailcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target) {
|
||||
; Adjust the stack to enter the function. (The amount of the
|
||||
; adjustment may change in the future, in which case the location of
|
||||
; the stack argument and the return adjustment will change too.)
|
||||
; CHECK: pushq
|
||||
; Put the call target into R11, which won't be clobbered while restoring
|
||||
; callee-saved registers and won't be used for passing arguments.
|
||||
; CHECK: movq %rdi, %rax
|
||||
; Pass the stack argument.
|
||||
; CHECK: movl $7, 16(%rsp)
|
||||
; Pass the register arguments, in the right registers.
|
||||
; CHECK: movl $1, %edi
|
||||
; CHECK: movl $2, %esi
|
||||
; CHECK: movl $3, %edx
|
||||
; CHECK: movl $4, %ecx
|
||||
; CHECK: movl $5, %r8d
|
||||
; CHECK: movl $6, %r9d
|
||||
; Adjust the stack to "return".
|
||||
; CHECK: popq
|
||||
; And tail-call to the target.
|
||||
; CHECK: jmpq *%rax # TAILCALL
|
||||
%res = tail call tailcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5,
|
||||
i32 6, i32 7)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; Check that the register used for a direct tail call doesn't clobber
|
||||
; any of the arguments.
|
||||
declare tailcc i32 @manyargs_callee(i32,i32,i32,i32,i32,i32,i32)
|
||||
define tailcc i32 @direct_manyargs() {
|
||||
; Adjust the stack to enter the function. (The amount of the
|
||||
; adjustment may change in the future, in which case the location of
|
||||
; the stack argument and the return adjustment will change too.)
|
||||
; CHECK: pushq
|
||||
; Pass the stack argument.
|
||||
; CHECK: movl $7, 16(%rsp)
|
||||
; This is the large code model, so &manyargs_callee may not fit into
|
||||
; the jmp instruction. Put it into a register which won't be clobbered
|
||||
; while restoring callee-saved registers and won't be used for passing
|
||||
; arguments.
|
||||
; CHECK: movabsq $manyargs_callee, %rax
|
||||
; Pass the register arguments, in the right registers.
|
||||
; CHECK: movl $1, %edi
|
||||
; CHECK: movl $2, %esi
|
||||
; CHECK: movl $3, %edx
|
||||
; CHECK: movl $4, %ecx
|
||||
; CHECK: movl $5, %r8d
|
||||
; CHECK: movl $6, %r9d
|
||||
; Adjust the stack to "return".
|
||||
; CHECK: popq
|
||||
; And tail-call to the target.
|
||||
; CHECK: jmpq *%rax # TAILCALL
|
||||
%res = tail call tailcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4,
|
||||
i32 5, i32 6, i32 7)
|
||||
ret i32 %res
|
||||
}
|
23
test/CodeGen/X86/tailcc-stackalign.ll
Normal file
23
test/CodeGen/X86/tailcc-stackalign.ll
Normal file
@ -0,0 +1,23 @@
|
||||
; RUN: llc < %s -mtriple=i686-unknown-linux -no-x86-call-frame-opt | FileCheck %s
|
||||
; Linux has 8 byte alignment so the params cause stack size 20,
|
||||
; ensure that a normal tailcc call has matching stack size
|
||||
|
||||
|
||||
define tailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||
ret i32 %a3
|
||||
}
|
||||
|
||||
define tailcc i32 @tailcaller(i32 %in1, i32 %in2, i32 %in3, i32 %in4) {
|
||||
%tmp11 = tail call tailcc i32 @tailcallee(i32 %in1, i32 %in2,
|
||||
i32 %in1, i32 %in2)
|
||||
ret i32 %tmp11
|
||||
}
|
||||
|
||||
define i32 @main(i32 %argc, i8** %argv) {
|
||||
%tmp1 = call tailcc i32 @tailcaller( i32 1, i32 2, i32 3, i32 4 )
|
||||
; expect match subl [stacksize] here
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; CHECK: calll tailcaller
|
||||
; CHECK-NEXT: subl $12
|
7
test/CodeGen/X86/tailcc-structret.ll
Normal file
7
test/CodeGen/X86/tailcc-structret.ll
Normal file
@ -0,0 +1,7 @@
|
||||
; RUN: llc < %s -mtriple=i686-unknown-linux | FileCheck %s
|
||||
define tailcc { { i8*, i8* }*, i8*} @init({ { i8*, i8* }*, i8*}, i32) {
|
||||
entry:
|
||||
%2 = tail call tailcc { { i8*, i8* }*, i8* } @init({ { i8*, i8*}*, i8*} %0, i32 %1)
|
||||
ret { { i8*, i8* }*, i8*} %2
|
||||
; CHECK: jmp init
|
||||
}
|
21
test/CodeGen/X86/tailccbyval.ll
Normal file
21
test/CodeGen/X86/tailccbyval.ll
Normal file
@ -0,0 +1,21 @@
|
||||
; RUN: llc < %s -mtriple=i686-unknown-linux | FileCheck %s
|
||||
%struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32,
|
||||
i32, i32, i32, i32, i32, i32, i32, i32,
|
||||
i32, i32, i32, i32, i32, i32, i32, i32 }
|
||||
|
||||
define tailcc i32 @tailcallee(%struct.s* byval %a) nounwind {
|
||||
entry:
|
||||
%tmp2 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 0
|
||||
%tmp3 = load i32, i32* %tmp2
|
||||
ret i32 %tmp3
|
||||
; CHECK: tailcallee
|
||||
; CHECK: movl 4(%esp), %eax
|
||||
}
|
||||
|
||||
define tailcc i32 @tailcaller(%struct.s* byval %a) nounwind {
|
||||
entry:
|
||||
%tmp4 = tail call tailcc i32 @tailcallee(%struct.s* byval %a )
|
||||
ret i32 %tmp4
|
||||
; CHECK: tailcaller
|
||||
; CHECK: jmp tailcallee
|
||||
}
|
42
test/CodeGen/X86/tailccbyval64.ll
Normal file
42
test/CodeGen/X86/tailccbyval64.ll
Normal file
@ -0,0 +1,42 @@
|
||||
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
|
||||
|
||||
; FIXME: Win64 does not support byval.
|
||||
|
||||
; Expect the entry point.
|
||||
; CHECK-LABEL: tailcaller:
|
||||
|
||||
; Expect 2 rep;movs because of tail call byval lowering.
|
||||
; CHECK: rep;
|
||||
; CHECK: rep;
|
||||
|
||||
; A sequence of copyto/copyfrom virtual registers is used to deal with byval
|
||||
; lowering appearing after moving arguments to registers. The following two
|
||||
; checks verify that the register allocator changes those sequences to direct
|
||||
; moves to argument register where it can (for registers that are not used in
|
||||
; byval lowering - not rsi, not rdi, not rcx).
|
||||
; Expect argument 4 to be moved directly to register edx.
|
||||
; CHECK: movl $7, %edx
|
||||
|
||||
; Expect argument 6 to be moved directly to register r8.
|
||||
; CHECK: movl $17, %r8d
|
||||
|
||||
; Expect not call but jmp to @tailcallee.
|
||||
; CHECK: jmp tailcallee
|
||||
|
||||
; Expect the trailer.
|
||||
; CHECK: .size tailcaller
|
||||
|
||||
%struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
|
||||
i64, i64, i64, i64, i64, i64, i64, i64,
|
||||
i64, i64, i64, i64, i64, i64, i64, i64 }
|
||||
|
||||
declare tailcc i64 @tailcallee(%struct.s* byval %a, i64 %val, i64 %val2, i64 %val3, i64 %val4, i64 %val5)
|
||||
|
||||
|
||||
define tailcc i64 @tailcaller(i64 %b, %struct.s* byval %a) {
|
||||
entry:
|
||||
%tmp2 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 1
|
||||
%tmp3 = load i64, i64* %tmp2, align 8
|
||||
%tmp4 = tail call tailcc i64 @tailcallee(%struct.s* byval %a , i64 %tmp3, i64 %b, i64 7, i64 13, i64 17)
|
||||
ret i64 %tmp4
|
||||
}
|
6
test/CodeGen/X86/tailccfp.ll
Normal file
6
test/CodeGen/X86/tailccfp.ll
Normal file
@ -0,0 +1,6 @@
|
||||
; RUN: llc < %s -mtriple=i686-- | FileCheck %s
|
||||
define tailcc i32 @bar(i32 %X, i32(double, i32) *%FP) {
|
||||
%Y = tail call tailcc i32 %FP(double 0.0, i32 %X)
|
||||
ret i32 %Y
|
||||
; CHECK: jmpl
|
||||
}
|
27
test/CodeGen/X86/tailccfp2.ll
Normal file
27
test/CodeGen/X86/tailccfp2.ll
Normal file
@ -0,0 +1,27 @@
|
||||
; RUN: llc < %s -mtriple=i686-- | FileCheck %s
|
||||
|
||||
declare i32 @putchar(i32)
|
||||
|
||||
define tailcc i32 @checktail(i32 %x, i32* %f, i32 %g) nounwind {
|
||||
; CHECK-LABEL: checktail:
|
||||
%tmp1 = icmp sgt i32 %x, 0
|
||||
br i1 %tmp1, label %if-then, label %if-else
|
||||
|
||||
if-then:
|
||||
%fun_ptr = bitcast i32* %f to i32(i32, i32*, i32)*
|
||||
%arg1 = add i32 %x, -1
|
||||
call i32 @putchar(i32 90)
|
||||
; CHECK: jmpl *%e{{.*}}
|
||||
%res = tail call tailcc i32 %fun_ptr( i32 %arg1, i32 * %f, i32 %g)
|
||||
ret i32 %res
|
||||
|
||||
if-else:
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
|
||||
define i32 @main() nounwind {
|
||||
%f = bitcast i32 (i32, i32*, i32)* @checktail to i32*
|
||||
%res = tail call tailcc i32 @checktail( i32 10, i32* %f,i32 10)
|
||||
ret i32 %res
|
||||
}
|
16
test/CodeGen/X86/tailccpic1.ll
Normal file
16
test/CodeGen/X86/tailccpic1.ll
Normal file
@ -0,0 +1,16 @@
|
||||
; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s
|
||||
|
||||
; This test uses guaranteed TCO so these will be tail calls, despite the early
|
||||
; binding issues.
|
||||
|
||||
define protected tailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||
entry:
|
||||
ret i32 %a3
|
||||
}
|
||||
|
||||
define tailcc i32 @tailcaller(i32 %in1, i32 %in2) {
|
||||
entry:
|
||||
%tmp11 = tail call tailcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1]
|
||||
ret i32 %tmp11
|
||||
; CHECK: jmp tailcallee
|
||||
}
|
15
test/CodeGen/X86/tailccpic2.ll
Normal file
15
test/CodeGen/X86/tailccpic2.ll
Normal file
@ -0,0 +1,15 @@
|
||||
; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s
|
||||
|
||||
define tailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||
entry:
|
||||
ret i32 %a3
|
||||
}
|
||||
|
||||
define tailcc i32 @tailcaller(i32 %in1, i32 %in2) {
|
||||
entry:
|
||||
%tmp11 = tail call tailcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1]
|
||||
ret i32 %tmp11
|
||||
; CHECK: movl tailcallee@GOT
|
||||
; CHECK: jmpl
|
||||
}
|
||||
|
28
test/CodeGen/X86/tailccstack64.ll
Normal file
28
test/CodeGen/X86/tailccstack64.ll
Normal file
@ -0,0 +1,28 @@
|
||||
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
|
||||
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
|
||||
|
||||
; FIXME: Redundant unused stack allocation could be eliminated.
|
||||
; CHECK: subq ${{24|72|80}}, %rsp
|
||||
|
||||
; Check that lowered arguments on the stack do not overwrite each other.
|
||||
; Add %in1 %p1 to a different temporary register (%eax).
|
||||
; CHECK: movl [[A1:32|144]](%rsp), [[R1:%e..]]
|
||||
; Move param %in1 to temp register (%r10d).
|
||||
; CHECK: movl [[A2:40|152]](%rsp), [[R2:%[a-z0-9]+]]
|
||||
; Add %in1 %p1 to a different temporary register (%eax).
|
||||
; CHECK: addl {{%edi|%ecx}}, [[R1]]
|
||||
; Move param %in2 to stack.
|
||||
; CHECK-DAG: movl [[R2]], [[A1]](%rsp)
|
||||
; Move result of addition to stack.
|
||||
; CHECK-DAG: movl [[R1]], [[A2]](%rsp)
|
||||
; Eventually, do a TAILCALL
|
||||
; CHECK: TAILCALL
|
||||
|
||||
declare tailcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %a, i32 %b) nounwind
|
||||
|
||||
define tailcc i32 @tailcaller(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in1, i32 %in2) nounwind {
|
||||
entry:
|
||||
%tmp = add i32 %in1, %p1
|
||||
%retval = tail call tailcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in2,i32 %tmp)
|
||||
ret i32 %retval
|
||||
}
|
@ -82,6 +82,7 @@ syn keyword llvmKeyword
|
||||
\ externally_initialized
|
||||
\ extern_weak
|
||||
\ fastcc
|
||||
\ tailcc
|
||||
\ filter
|
||||
\ from
|
||||
\ gc
|
||||
|
Loading…
Reference in New Issue
Block a user