findDeadCallerSavedReg needs to pay attention to calling convention

Caller saved regs differ between SysV and Win64. Use the tail call available set to scavenge from.

Refactor register info to create new helper to get at tail call GPRs. Added a new test case for windows. Fixed up a number of X64 tests since now RCX is preferred over RDX on SysV.

Differential Revision: http://reviews.llvm.org/D14878

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253927 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Andy Ayers 2015-11-23 22:17:44 +00:00
parent 272978f362
commit 77a84a9451
12 changed files with 85 additions and 45 deletions

View File

@ -146,21 +146,14 @@ static unsigned getLEArOpcode(unsigned IsLP64) {
/// to this register without worry about clobbering it.
static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const TargetRegisterInfo *TRI,
const X86RegisterInfo *TRI,
bool Is64Bit) {
const MachineFunction *MF = MBB.getParent();
const Function *F = MF->getFunction();
if (!F || MF->getMMI().callsEHReturn())
return 0;
static const uint16_t CallerSavedRegs32Bit[] = {
X86::EAX, X86::EDX, X86::ECX, 0
};
static const uint16_t CallerSavedRegs64Bit[] = {
X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
X86::R8, X86::R9, X86::R10, X86::R11, 0
};
const TargetRegisterClass &AvailableRegs = *TRI->getGPRsForTailCall(*MF);
unsigned Opc = MBBI->getOpcode();
switch (Opc) {
@ -189,10 +182,9 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
Uses.insert(*AI);
}
const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
for (; *CS; ++CS)
if (!Uses.count(*CS))
return *CS;
for (auto CS : AvailableRegs)
if (!Uses.count(CS) && CS != X86::RIP)
return CS;
}
}

View File

@ -177,19 +177,24 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
return &X86::GR64_NOREX_NOSPRegClass;
return &X86::GR32_NOREX_NOSPRegClass;
case 4: // Available for tailcall (not callee-saved GPRs).
const Function *F = MF.getFunction();
if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64))
return &X86::GR64_TCW64RegClass;
else if (Is64Bit)
return &X86::GR64_TCRegClass;
bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
if (hasHipeCC)
return &X86::GR32RegClass;
return &X86::GR32_TCRegClass;
return getGPRsForTailCall(MF);
}
}
const TargetRegisterClass *
X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
const Function *F = MF.getFunction();
if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64))
return &X86::GR64_TCW64RegClass;
else if (Is64Bit)
return &X86::GR64_TCRegClass;
bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
if (hasHipeCC)
return &X86::GR32RegClass;
return &X86::GR32_TCRegClass;
}
const TargetRegisterClass *
X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
if (RC == &X86::CCRRegClass) {

View File

@ -87,6 +87,11 @@ public:
const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
/// getGPRsForTailCall - Returns a register class with registers that can be
/// used in forming tail calls.
const TargetRegisterClass *
getGPRsForTailCall(const MachineFunction &MF) const;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;

View File

@ -375,7 +375,7 @@ def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
R8, R9, R11, RIP)>;
def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
R8, R9, R11)>;
R8, R9, R10, R11)>;
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,

View File

@ -24,7 +24,7 @@ define i32 @my_get_xyz() {
; X64: movq my_emutls_v_xyz@GOTPCREL(%rip), %rdi
; X64-NEXT: callq my_emutls_get_address@PLT
; X64-NEXT: movl (%rax), %eax
; X64-NEXT: popq %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
entry:
@ -50,7 +50,7 @@ define i32 @f1() {
; X64: movq __emutls_v.i@GOTPCREL(%rip), %rdi
; X64-NEXT: callq __emutls_get_address@PLT
; X64-NEXT: movl (%rax), %eax
; X64-NEXT: popq %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
entry:

View File

@ -21,7 +21,7 @@ define i32 @my_get_xyz() {
; X64: movl $my_emutls_v_xyz, %edi
; X64-NEXT: callq my_emutls_get_address
; X64-NEXT: movl (%rax), %eax
; X64-NEXT: popq %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
entry:
@ -50,7 +50,7 @@ define i32 @f1() {
; X64: movl $__emutls_v.i1, %edi
; X64-NEXT: callq __emutls_get_address
; X64-NEXT: movl (%rax), %eax
; X64-NEXT: popq %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
entry:
@ -67,7 +67,7 @@ define i32* @f2() {
; X64-LABEL: f2:
; X64: movl $__emutls_v.i1, %edi
; X64-NEXT: callq __emutls_get_address
; X64-NEXT: popq %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
entry:

View File

@ -77,7 +77,7 @@ define i64 @test_fptosi_i64(half* %p) #0 {
; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi
; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax
; CHECK-LIBCALL-NEXT: popq %rdx
; CHECK-LIBCALL-NEXT: popq %rcx
; CHECK-LIBCALL-NEXT: retq
; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]]
@ -127,7 +127,7 @@ define i64 @test_fptoui_i64(half* %p) #0 {
; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, [[REG5:%[a-z0-9]+]]
; CHECK-LIBCALL-NEXT: ucomiss [[REG1]], %xmm0
; CHECK-LIBCALL-NEXT: cmovaeq [[REG4]], [[REG5]]
; CHECK-LIBCALL-NEXT: popq %rdx
; CHECK-LIBCALL-NEXT: popq %rcx
; CHECK-LIBCALL-NEXT: retq
; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]]

View File

@ -16,7 +16,7 @@ define i32 addrspace(1)* @test(i32 addrspace(1)* %ptr) gc "statepoint-example" {
; CHECK: movq %rdi, (%rsp)
; CHECK: callq return_i1
; CHECK: movq (%rsp), %rax
; CHECK: popq %rdx
; CHECK: popq %rcx
; CHECK: retq
entry:
%alloca = alloca i32 addrspace(1)*, align 8
@ -33,7 +33,7 @@ define i32 addrspace(1)* @test2(i32 addrspace(1)* %ptr) gc "statepoint-example"
; CHECK: movq %rdi, (%rsp)
; CHECK: callq return_i1
; CHECK: xorl %eax, %eax
; CHECK: popq %rdx
; CHECK: popq %rcx
; CHECK: retq
entry:
%alloca = alloca i32 addrspace(1)*, align 8

View File

@ -20,7 +20,7 @@ define i1 @test_i1_return() gc "statepoint-example" {
; state arguments to the statepoint
; CHECK: pushq %rax
; CHECK: callq return_i1
; CHECK: popq %rdx
; CHECK: popq %rcx
; CHECK: retq
entry:
%safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
@ -32,7 +32,7 @@ define i32 @test_i32_return() gc "statepoint-example" {
; CHECK-LABEL: test_i32_return
; CHECK: pushq %rax
; CHECK: callq return_i32
; CHECK: popq %rdx
; CHECK: popq %rcx
; CHECK: retq
entry:
%safepoint_token = tail call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 0, i32 0, i32 0)
@ -44,7 +44,7 @@ define i32* @test_i32ptr_return() gc "statepoint-example" {
; CHECK-LABEL: test_i32ptr_return
; CHECK: pushq %rax
; CHECK: callq return_i32ptr
; CHECK: popq %rdx
; CHECK: popq %rcx
; CHECK: retq
entry:
%safepoint_token = tail call i32 (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 0, i32 0, i32 0)
@ -82,7 +82,7 @@ define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" {
; CHECK: pushq %rax
; CHECK: callq return_i1
; CHECK-NEXT: .Ltmp11:
; CHECK-NEXT: popq %rdx
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
entry:
%safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a)
@ -107,7 +107,7 @@ define i1 @test_i1_return_patchable() gc "statepoint-example" {
; A patchable variant of test_i1_return
; CHECK: pushq %rax
; CHECK: nopl
; CHECK: popq %rdx
; CHECK: popq %rcx
; CHECK: retq
entry:
%safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 3, i1 ()*null, i32 0, i32 0, i32 0, i32 0)

View File

@ -18,7 +18,7 @@ define i1 @test_i1_return() gc "statepoint-example" {
; state arguments to the statepoint
; CHECK: pushq %rax
; CHECK: callq return_i1
; CHECK: popq %rdx
; CHECK: popq %rcx
; CHECK: retq
entry:
%safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0)
@ -30,7 +30,7 @@ define i32 @test_i32_return() gc "statepoint-example" {
; CHECK-LABEL: test_i32_return
; CHECK: pushq %rax
; CHECK: callq return_i32
; CHECK: popq %rdx
; CHECK: popq %rcx
; CHECK: retq
entry:
%safepoint_token = tail call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 1, i32 0, i32 0)
@ -42,7 +42,7 @@ define i32* @test_i32ptr_return() gc "statepoint-example" {
; CHECK-LABEL: test_i32ptr_return
; CHECK: pushq %rax
; CHECK: callq return_i32ptr
; CHECK: popq %rdx
; CHECK: popq %rcx
; CHECK: retq
entry:
%safepoint_token = tail call i32 (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 1, i32 0, i32 0)
@ -68,7 +68,7 @@ define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" {
; CHECK: pushq %rax
; CHECK: callq return_i1
; CHECK-NEXT: .Ltmp9:
; CHECK-NEXT: popq %rdx
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
entry:
%safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0, i32 addrspace(1)* %a)
@ -92,7 +92,7 @@ define i32 @test_transition_args() gc "statepoint-example" {
; CHECK-LABEL: test_transition_args
; CHECK: pushq %rax
; CHECK: callq return_i32
; CHECK: popq %rdx
; CHECK: popq %rcx
; CHECK: retq
entry:
%val = alloca i32
@ -105,7 +105,7 @@ define i32 @test_transition_args_2() gc "statepoint-example" {
; CHECK-LABEL: test_transition_args_2
; CHECK: pushq %rax
; CHECK: callq return_i32
; CHECK: popq %rdx
; CHECK: popq %rcx
; CHECK: retq
entry:
%val = alloca i32

View File

@ -3696,7 +3696,7 @@ define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp {
; X32-SSE41-NEXT: .cfi_def_cfa_offset 8
; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0
; X32-SSE41-NEXT: movd %xmm0, %eax
; X32-SSE41-NEXT: popl %edx
; X32-SSE41-NEXT: popl %ecx
; X32-SSE41-NEXT: retl
entry:
%Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>

View File

@ -0,0 +1,38 @@
; RUN: llc < %s -mtriple=x86_64-pc-win32-coreclr | FileCheck %s -check-prefix=WIN_X64
; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=LINUX
%Object = type <{ [0 x i64*]* }>
define void @C1(%Object addrspace(1)* %param0) gc "coreclr" {
entry:
; WIN_X64: # BB#0:
; WIN_X64: pushq %rax
; LINUX: # BB#0: # %entry
; LINUX: movq $0, -8(%rsp)
%this = alloca %Object addrspace(1)*
store %Object addrspace(1)* null, %Object addrspace(1)** %this
store %Object addrspace(1)* %param0, %Object addrspace(1)** %this
br label %0
; <label>:0 ; preds = %entry
%1 = load %Object addrspace(1)*, %Object addrspace(1)** %this, align 8
; WIN_X64: xorl %r8d, %r8d
; WIN_X64: popq %rax
; WIN_X64: rex64 jmp C2 # TAILCALL
; LINUX: xorl %edx, %edx
; LINUX: jmp C2 # TAILCALL
tail call void @C2(%Object addrspace(1)* %1, i32 0, %Object addrspace(1)* null)
ret void
}
declare void @C2(%Object addrspace(1)*, i32, %Object addrspace(1)*)
; Function Attrs: nounwind
declare void @llvm.localescape(...) #0
attributes #0 = { nounwind }