mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-04-04 08:41:49 +00:00
The basic problem is that some mainstream programs cannot deal with the way
clang optimizes tail calls, as in this example: int foo(void); int bar(void) { return foo(); } where the call is transformed to: calll .L0$pb .L0$pb: popl %eax .Ltmp0: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %eax movl foo@GOT(%eax), %eax popl %ebp jmpl *%eax # TAILCALL However, the GOT references must all be resolved at dlopen() time, and so this approach cannot be used with lazy dynamic linking (e.g. using RTLD_LAZY), which usually populates the PLT with stubs that perform the actual resolving. This patch changes X86TargetLowering::LowerCall() to skip tail call optimization, if the called function is a global or external symbol. Patch by Dimitry Andric! PR15086 llvm-svn: 195318
This commit is contained in:
parent
eba6ab82dd
commit
07a5510fa2
@ -2665,21 +2665,15 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
|
||||
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
|
||||
} else {
|
||||
// If we are tail calling and generating PIC/GOT style code load the
|
||||
// address of the callee into ECX. The value in ecx is used as target of
|
||||
// the tail jump. This is done to circumvent the ebx/callee-saved problem
|
||||
// for tail calls on PIC/GOT architectures. Normally we would just put the
|
||||
// address of GOT into ebx and then call target@PLT. But for tail calls
|
||||
// ebx would be restored (since ebx is callee saved) before jumping to the
|
||||
// target@PLT.
|
||||
|
||||
// Note: The actual moving to ECX is done further down.
|
||||
// If we are tail calling a global or external symbol in GOT pic mode, we
|
||||
// cannot use a direct jump, since that would make lazy dynamic linking
|
||||
// impossible (see PR15086). So pretend this is not a tail call, to
|
||||
// prevent the optimization to a jump.
|
||||
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
|
||||
if (G && !G->getGlobal()->hasHiddenVisibility() &&
|
||||
!G->getGlobal()->hasProtectedVisibility())
|
||||
Callee = LowerGlobalAddress(Callee, DAG);
|
||||
else if (isa<ExternalSymbolSDNode>(Callee))
|
||||
Callee = LowerExternalSymbol(Callee, DAG);
|
||||
if ((G && !G->getGlobal()->hasHiddenVisibility() &&
|
||||
!G->getGlobal()->hasProtectedVisibility()) ||
|
||||
isa<ExternalSymbolSDNode>(Callee))
|
||||
isTailCall = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,8 +5,7 @@ target triple = "i386-unknown-freebsd9.0"
|
||||
|
||||
define double @test1(double %x) nounwind readnone {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: movl foo@GOT
|
||||
; CHECK-NEXT: jmpl
|
||||
; CHECK: calll foo@PLT
|
||||
%1 = tail call double @foo(double %x) nounwind readnone
|
||||
ret double %1
|
||||
}
|
||||
@ -15,8 +14,7 @@ declare double @foo(double) readnone
|
||||
|
||||
define double @test2(double %x) nounwind readnone {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: movl sin@GOT
|
||||
; CHECK-NEXT: jmpl
|
||||
; CHECK: calll sin@PLT
|
||||
%1 = tail call double @sin(double %x) nounwind readnone
|
||||
ret double %1
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
|
||||
entry:
|
||||
%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1]
|
||||
ret i32 %tmp11
|
||||
; CHECK: movl tailcallee@GOT
|
||||
; CHECK: jmpl
|
||||
; Note that this call via PLT could be further optimized into a direct call (no GOT, no PLT):
|
||||
; CHECK: calll tailcallee@PLT
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user