Do not emit callseq instructions around sibcalls. This eliminated some unnecessary stack adjustments.

llvm-svn: 95475
This commit is contained in:
Evan Cheng 2010-02-06 03:28:46 +00:00
parent 05cffb66ae
commit de1a4726e6
4 changed files with 41 additions and 24 deletions

View File

@ -1749,8 +1749,6 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
SDValue &OutRetAddr, SDValue Chain,
bool IsTailCall, bool Is64Bit,
int FPDiff, DebugLoc dl) {
if (!IsTailCall || FPDiff==0) return Chain;
// Adjust the Return address stack slot.
EVT VT = getPointerTy();
OutRetAddr = getReturnAddressFrameIndex(DAG);
@ -1796,8 +1794,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Outs, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
if (!PerformTailCallOpt && isTailCall)
IsSibcall = true;
if (isTailCall)
++NumTailCalls;
}
assert(!(isVarArg && CallConv == CallingConv::Fast) &&
@ -1811,17 +1815,15 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
if (FuncIsMadeTailCallSafe(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
else if (IsSibcall)
if (IsSibcall)
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
else if (PerformTailCallOpt && CallConv == CallingConv::Fast)
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
if (isTailCall) {
++NumTailCalls;
if (isTailCall && !IsSibcall) {
// Lower arguments at fp - stackoffset + fpdiff.
unsigned NumBytesCallerPushed =
MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
@ -1833,12 +1835,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
}
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
if (!IsSibcall)
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SDValue RetAddrFrIdx;
// Load return adress for tail calls.
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit,
FPDiff, dl);
if (isTailCall && FPDiff)
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
Is64Bit, FPDiff, dl);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
@ -1888,7 +1892,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else if ((!isTailCall || isByVal) && !IsSibcall) {
} else if (!IsSibcall && (!isTailCall || isByVal)) {
assert(VA.isMemLoc());
if (StackPtr.getNode() == 0)
StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
@ -1913,7 +1917,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
InFlag = Chain.getValue(1);
}
if (Subtarget->isPICStyleGOT()) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
@ -2110,7 +2113,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SmallVector<SDValue, 8> Ops;
if (isTailCall) {
if (!IsSibcall && isTailCall) {
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(0, true), InFlag);
InFlag = Chain.getValue(1);
@ -2179,12 +2182,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
NumBytesForCalleeToPush = 0; // Callee pops nothing.
// Returns a flag for retval copy to use.
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(NumBytesForCalleeToPush,
true),
InFlag);
InFlag = Chain.getValue(1);
if (!IsSibcall) {
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(NumBytesForCalleeToPush,
true),
InFlag);
InFlag = Chain.getValue(1);
}
// Handle result values, copying them out of physregs into vregs that we
// return.

View File

@ -48,9 +48,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// ReturnAddrIndex - FrameIndex for return slot.
int ReturnAddrIndex;
/// TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved
/// Used for creating an area before the register spill area on the stack
/// the returnaddr can be savely move to this area
/// TailCallReturnAddrDelta - The number of bytes by which return address
/// stack slot is moved as the result of tail call optimization.
int TailCallReturnAddrDelta;
/// SRetReturnReg - Some subtargets require that sret lowering includes

View File

@ -1,9 +1,14 @@
; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | grep subl | count 1
; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | FileCheck %s
%struct.decode_t = type { i8, i8, i8, i8, i16, i8, i8, %struct.range_t** }
%struct.range_t = type { float, float, i32, i32, i32, [0 x i8] }
define i32 @decode_byte(%struct.decode_t* %decode) nounwind {
; CHECK: decode_byte:
; CHECK: pushl
; CHECK: popl
; CHECK: popl
; CHECK: jmp
entry:
%tmp2 = getelementptr %struct.decode_t* %decode, i32 0, i32 4 ; <i16*> [#uses=1]
%tmp23 = bitcast i16* %tmp2 to i32* ; <i32*> [#uses=1]

View File

@ -146,9 +146,13 @@ define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
; eliminated currently.
; 32: t11:
; 32-NOT: subl ${{[0-9]+}}, %esp
; 32-NOT: addl ${{[0-9]+}}, %esp
; 32: jmp {{_?}}foo5
; 64: t11:
; 64-NOT: subq ${{[0-9]+}}, %esp
; 64-NOT: addq ${{[0-9]+}}, %esp
; 64: jmp {{_?}}foo5
entry:
%0 = icmp eq i32 %x, 0
@ -168,9 +172,13 @@ declare i32 @foo5(i32, i32, i32, i32, i32)
define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
; 32: t12:
; 32-NOT: subl ${{[0-9]+}}, %esp
; 32-NOT: addl ${{[0-9]+}}, %esp
; 32: jmp {{_?}}foo6
; 64: t12:
; 64-NOT: subq ${{[0-9]+}}, %esp
; 64-NOT: addq ${{[0-9]+}}, %esp
; 64: jmp {{_?}}foo6
entry:
%0 = icmp eq i32 %x, 0