Fix for PR7193 was overly conservative. The only case where sibcall callee

address cannot be allocated a register is in 32-bit mode where the first
three arguments are marked inreg. In that case EAX, EDX, and ECX will be
used for argument passing.

This fixes PR7610.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108327 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2010-07-14 06:44:01 +00:00
parent 7e3f0d2690
commit dedd974e7e
2 changed files with 25 additions and 6 deletions

View File

@ -2458,17 +2458,23 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// If the tailcall address may be in a register, then make sure it's // If the tailcall address may be in a register, then make sure it's
// possible to register allocate for it. In 32-bit, the call address can // possible to register allocate for it. In 32-bit, the call address can
// only target EAX, EDX, or ECX since the tail call must be scheduled after // only target EAX, EDX, or ECX since the tail call must be scheduled after
// callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI, // callee-saved registers are restored. These happen to be the same
// RDI, R8, R9, R11. // registers used to pass 'inreg' arguments so watch out for those.
if (!isa<GlobalAddressSDNode>(Callee) && if (!Subtarget->is64Bit() &&
!isa<GlobalAddressSDNode>(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) { !isa<ExternalSymbolSDNode>(Callee)) {
unsigned Limit = Subtarget->is64Bit() ? 8 : 3;
unsigned NumInRegs = 0; unsigned NumInRegs = 0;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i]; CCValAssign &VA = ArgLocs[i];
if (VA.isRegLoc()) { if (!VA.isRegLoc())
if (++NumInRegs == Limit) continue;
unsigned Reg = VA.getLocReg();
switch (Reg) {
default: break;
case X86::EAX: case X86::EDX: case X86::ECX:
if (++NumInRegs == 3)
return false; return false;
break;
} }
} }
} }

View File

@ -0,0 +1,13 @@
; RUN: llc < %s -mtriple=i386-pc-linux-gnu | FileCheck %s
; pr7610
define cc10 void @t(i32* %Base_Arg, i32* %Sp_Arg, i32* %Hp_Arg, i32 %R1_Arg) nounwind {
cm1:
; CHECK: t:
; CHECK: jmpl *%eax
%nm3 = getelementptr i32* %Sp_Arg, i32 1
%nm9 = load i32* %Sp_Arg
%nma = inttoptr i32 %nm9 to void (i32*, i32*, i32*, i32)*
tail call cc10 void %nma(i32* %Base_Arg, i32* %nm3, i32* %Hp_Arg, i32 %R1_Arg) nounwind
ret void
}