mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-15 23:57:48 +00:00
Do not emit callseq instructions around sibcalls. This eliminated some unnecessary stack adjustments.
llvm-svn: 95475
This commit is contained in:
parent
05cffb66ae
commit
de1a4726e6
@ -1749,8 +1749,6 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
|
||||
SDValue &OutRetAddr, SDValue Chain,
|
||||
bool IsTailCall, bool Is64Bit,
|
||||
int FPDiff, DebugLoc dl) {
|
||||
if (!IsTailCall || FPDiff==0) return Chain;
|
||||
|
||||
// Adjust the Return address stack slot.
|
||||
EVT VT = getPointerTy();
|
||||
OutRetAddr = getReturnAddressFrameIndex(DAG);
|
||||
@ -1796,8 +1794,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
// Check if it's really possible to do a tail call.
|
||||
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
|
||||
Outs, Ins, DAG);
|
||||
|
||||
// Sibcalls are automatically detected tailcalls which do not require
|
||||
// ABI changes.
|
||||
if (!PerformTailCallOpt && isTailCall)
|
||||
IsSibcall = true;
|
||||
|
||||
if (isTailCall)
|
||||
++NumTailCalls;
|
||||
}
|
||||
|
||||
assert(!(isVarArg && CallConv == CallingConv::Fast) &&
|
||||
@ -1811,17 +1815,15 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
|
||||
// Get a count of how many bytes are to be pushed on the stack.
|
||||
unsigned NumBytes = CCInfo.getNextStackOffset();
|
||||
if (FuncIsMadeTailCallSafe(CallConv))
|
||||
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
|
||||
else if (IsSibcall)
|
||||
if (IsSibcall)
|
||||
// This is a sibcall. The memory operands are available in caller's
|
||||
// own caller's stack.
|
||||
NumBytes = 0;
|
||||
else if (PerformTailCallOpt && CallConv == CallingConv::Fast)
|
||||
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
|
||||
|
||||
int FPDiff = 0;
|
||||
if (isTailCall) {
|
||||
++NumTailCalls;
|
||||
|
||||
if (isTailCall && !IsSibcall) {
|
||||
// Lower arguments at fp - stackoffset + fpdiff.
|
||||
unsigned NumBytesCallerPushed =
|
||||
MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
|
||||
@ -1833,12 +1835,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
|
||||
}
|
||||
|
||||
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
|
||||
if (!IsSibcall)
|
||||
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
|
||||
|
||||
SDValue RetAddrFrIdx;
|
||||
// Load return adress for tail calls.
|
||||
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit,
|
||||
FPDiff, dl);
|
||||
if (isTailCall && FPDiff)
|
||||
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
|
||||
Is64Bit, FPDiff, dl);
|
||||
|
||||
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
|
||||
SmallVector<SDValue, 8> MemOpChains;
|
||||
@ -1888,7 +1892,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
|
||||
if (VA.isRegLoc()) {
|
||||
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
||||
} else if ((!isTailCall || isByVal) && !IsSibcall) {
|
||||
} else if (!IsSibcall && (!isTailCall || isByVal)) {
|
||||
assert(VA.isMemLoc());
|
||||
if (StackPtr.getNode() == 0)
|
||||
StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
|
||||
@ -1913,7 +1917,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
|
||||
if (Subtarget->isPICStyleGOT()) {
|
||||
// ELF / PIC requires GOT in the EBX register before function calls via PLT
|
||||
// GOT pointer.
|
||||
@ -2110,7 +2113,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
|
||||
if (isTailCall) {
|
||||
if (!IsSibcall && isTailCall) {
|
||||
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
|
||||
DAG.getIntPtrConstant(0, true), InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
@ -2179,12 +2182,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
NumBytesForCalleeToPush = 0; // Callee pops nothing.
|
||||
|
||||
// Returns a flag for retval copy to use.
|
||||
Chain = DAG.getCALLSEQ_END(Chain,
|
||||
DAG.getIntPtrConstant(NumBytes, true),
|
||||
DAG.getIntPtrConstant(NumBytesForCalleeToPush,
|
||||
true),
|
||||
InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
if (!IsSibcall) {
|
||||
Chain = DAG.getCALLSEQ_END(Chain,
|
||||
DAG.getIntPtrConstant(NumBytes, true),
|
||||
DAG.getIntPtrConstant(NumBytesForCalleeToPush,
|
||||
true),
|
||||
InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
// Handle result values, copying them out of physregs into vregs that we
|
||||
// return.
|
||||
|
@ -48,9 +48,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
|
||||
/// ReturnAddrIndex - FrameIndex for return slot.
|
||||
int ReturnAddrIndex;
|
||||
|
||||
/// TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved
|
||||
/// Used for creating an area before the register spill area on the stack
|
||||
/// the returnaddr can be savely move to this area
|
||||
/// TailCallReturnAddrDelta - The number of bytes by which return address
|
||||
/// stack slot is moved as the result of tail call optimization.
|
||||
int TailCallReturnAddrDelta;
|
||||
|
||||
/// SRetReturnReg - Some subtargets require that sret lowering includes
|
||||
|
@ -1,9 +1,14 @@
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | grep subl | count 1
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | FileCheck %s
|
||||
|
||||
%struct.decode_t = type { i8, i8, i8, i8, i16, i8, i8, %struct.range_t** }
|
||||
%struct.range_t = type { float, float, i32, i32, i32, [0 x i8] }
|
||||
|
||||
define i32 @decode_byte(%struct.decode_t* %decode) nounwind {
|
||||
; CHECK: decode_byte:
|
||||
; CHECK: pushl
|
||||
; CHECK: popl
|
||||
; CHECK: popl
|
||||
; CHECK: jmp
|
||||
entry:
|
||||
%tmp2 = getelementptr %struct.decode_t* %decode, i32 0, i32 4 ; <i16*> [#uses=1]
|
||||
%tmp23 = bitcast i16* %tmp2 to i32* ; <i32*> [#uses=1]
|
||||
|
@ -146,9 +146,13 @@ define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
|
||||
; eliminated currently.
|
||||
|
||||
; 32: t11:
|
||||
; 32-NOT: subl ${{[0-9]+}}, %esp
|
||||
; 32-NOT: addl ${{[0-9]+}}, %esp
|
||||
; 32: jmp {{_?}}foo5
|
||||
|
||||
; 64: t11:
|
||||
; 64-NOT: subq ${{[0-9]+}}, %esp
|
||||
; 64-NOT: addq ${{[0-9]+}}, %esp
|
||||
; 64: jmp {{_?}}foo5
|
||||
entry:
|
||||
%0 = icmp eq i32 %x, 0
|
||||
@ -168,9 +172,13 @@ declare i32 @foo5(i32, i32, i32, i32, i32)
|
||||
|
||||
define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
|
||||
; 32: t12:
|
||||
; 32-NOT: subl ${{[0-9]+}}, %esp
|
||||
; 32-NOT: addl ${{[0-9]+}}, %esp
|
||||
; 32: jmp {{_?}}foo6
|
||||
|
||||
; 64: t12:
|
||||
; 64-NOT: subq ${{[0-9]+}}, %esp
|
||||
; 64-NOT: addq ${{[0-9]+}}, %esp
|
||||
; 64: jmp {{_?}}foo6
|
||||
entry:
|
||||
%0 = icmp eq i32 %x, 0
|
||||
|
Loading…
Reference in New Issue
Block a user