diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 05419fe3cc4..b268e4b9ae3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1749,8 +1749,6 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, bool Is64Bit, int FPDiff, DebugLoc dl) { - if (!IsTailCall || FPDiff==0) return Chain; - // Adjust the Return address stack slot. EVT VT = getPointerTy(); OutRetAddr = getReturnAddressFrameIndex(DAG); @@ -1796,8 +1794,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Outs, Ins, DAG); + + // Sibcalls are automatically detected tailcalls which do not require + // ABI changes. if (!PerformTailCallOpt && isTailCall) IsSibcall = true; + + if (isTailCall) + ++NumTailCalls; } assert(!(isVarArg && CallConv == CallingConv::Fast) && @@ -1811,17 +1815,15 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); - if (FuncIsMadeTailCallSafe(CallConv)) - NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); - else if (IsSibcall) + if (IsSibcall) // This is a sibcall. The memory operands are available in caller's // own caller's stack. NumBytes = 0; + else if (PerformTailCallOpt && CallConv == CallingConv::Fast) + NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; - if (isTailCall) { - ++NumTailCalls; - + if (isTailCall && !IsSibcall) { // Lower arguments at fp - stackoffset + fpdiff. unsigned NumBytesCallerPushed = MF.getInfo()->getBytesToPopOnReturn(); @@ -1833,12 +1835,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, MF.getInfo()->setTCReturnAddrDelta(FPDiff); } - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + if (!IsSibcall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SDValue RetAddrFrIdx; // Load return adress for tail calls. - Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit, - FPDiff, dl); + if (isTailCall && FPDiff) + Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, + Is64Bit, FPDiff, dl); SmallVector, 8> RegsToPass; SmallVector MemOpChains; @@ -1888,7 +1892,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else if ((!isTailCall || isByVal) && !IsSibcall) { + } else if (!IsSibcall && (!isTailCall || isByVal)) { assert(VA.isMemLoc()); if (StackPtr.getNode() == 0) StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy()); @@ -1913,7 +1917,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } - if (Subtarget->isPICStyleGOT()) { // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. @@ -2110,7 +2113,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); SmallVector Ops; - if (isTailCall) { + if (!IsSibcall && isTailCall) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); @@ -2179,12 +2182,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, NumBytesForCalleeToPush = 0; // Callee pops nothing. // Returns a flag for retval copy to use. - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(NumBytesForCalleeToPush, - true), - InFlag); - InFlag = Chain.getValue(1); + if (!IsSibcall) { + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(NumBytesForCalleeToPush, + true), + InFlag); + InFlag = Chain.getValue(1); + } // Handle result values, copying them out of physregs into vregs that we // return. diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index fafcf7e3d79..bb53bf14cea 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -48,9 +48,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// ReturnAddrIndex - FrameIndex for return slot. int ReturnAddrIndex; - /// TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved - /// Used for creating an area before the register spill area on the stack - /// the returnaddr can be savely move to this area + /// TailCallReturnAddrDelta - The number of bytes by which return address + /// stack slot is moved as the result of tail call optimization. int TailCallReturnAddrDelta; /// SRetReturnReg - Some subtargets require that sret lowering includes diff --git a/test/CodeGen/X86/call-push.ll b/test/CodeGen/X86/call-push.ll index 81516c1c594..02cbccc1a49 100644 --- a/test/CodeGen/X86/call-push.ll +++ b/test/CodeGen/X86/call-push.ll @@ -1,9 +1,14 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | grep subl | count 1 +; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | FileCheck %s %struct.decode_t = type { i8, i8, i8, i8, i16, i8, i8, %struct.range_t** } %struct.range_t = type { float, float, i32, i32, i32, [0 x i8] } define i32 @decode_byte(%struct.decode_t* %decode) nounwind { +; CHECK: decode_byte: +; CHECK: pushl +; CHECK: popl +; CHECK: popl +; CHECK: jmp entry: %tmp2 = getelementptr %struct.decode_t* %decode, i32 0, i32 4 ; [#uses=1] %tmp23 = bitcast i16* %tmp2 to i32* ; [#uses=1] diff --git a/test/CodeGen/X86/tailcall2.ll b/test/CodeGen/X86/tailcall2.ll index b2fe9d3bb91..02bf0c00b43 100644 --- a/test/CodeGen/X86/tailcall2.ll +++ b/test/CodeGen/X86/tailcall2.ll @@ -146,9 +146,13 @@ define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp { ; eliminated currently. ; 32: t11: +; 32-NOT: subl ${{[0-9]+}}, %esp +; 32-NOT: addl ${{[0-9]+}}, %esp ; 32: jmp {{_?}}foo5 ; 64: t11: +; 64-NOT: subq ${{[0-9]+}}, %esp +; 64-NOT: addq ${{[0-9]+}}, %esp ; 64: jmp {{_?}}foo5 entry: %0 = icmp eq i32 %x, 0 @@ -168,9 +172,13 @@ declare i32 @foo5(i32, i32, i32, i32, i32) define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp { ; 32: t12: +; 32-NOT: subl ${{[0-9]+}}, %esp +; 32-NOT: addl ${{[0-9]+}}, %esp ; 32: jmp {{_?}}foo6 ; 64: t12: +; 64-NOT: subq ${{[0-9]+}}, %esp +; 64-NOT: addq ${{[0-9]+}}, %esp ; 64: jmp {{_?}}foo6 entry: %0 = icmp eq i32 %x, 0