Support segmented stacks on mac.

This uses TLS slot 90, which actually belongs to JavaScriptCore. We only support
frames with static size
Patch by Brian Anderson.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147960 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Rafael Espindola 2012-01-11 19:00:37 +00:00
parent 7692ce9e81
commit 2028b793e1
3 changed files with 273 additions and 92 deletions

View File

@ -1298,10 +1298,15 @@ HasNestArgument(const MachineFunction *MF) {
return false;
}
/// GetScratchRegister - Get a register for performing work in the segmented
/// stack prologue. Depending on platform and the properties of the function
/// either one or two registers will be needed. Set primary to true for
/// the first register, false for the second.
static unsigned
GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
if (Is64Bit) {
return X86::R11;
return Primary ? X86::R11 : X86::R12;
} else {
CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
bool IsNested = HasNestArgument(&MF);
@ -1313,13 +1318,13 @@ GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
"nested function.");
return -1;
} else {
return X86::EAX;
return Primary ? X86::EAX : X86::ECX;
}
} else {
if (IsNested)
return X86::EDX;
return Primary ? X86::EDX : X86::EAX;
else
return X86::ECX;
return Primary ? X86::ECX : X86::EAX;
}
}
}
@ -1339,14 +1344,14 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
DebugLoc DL;
const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
unsigned ScratchReg = GetScratchRegister(Is64Bit, MF);
unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
"Scratch register is live-in");
if (MF.getFunction()->isVarArg())
report_fatal_error("Segmented stacks do not support vararg functions.");
if (!ST->isTargetLinux())
report_fatal_error("Segmented stacks supported only on linux.");
if (!ST->isTargetLinux() && !ST->isTargetDarwin())
report_fatal_error("Segmented stacks supported only on linux and darwin.");
MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
@ -1377,12 +1382,21 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
// prologue.
StackSize = MFI->getStackSize();
// When the frame size is less than 256 we just compare the stack
// boundary directly to the value of the stack pointer, per gcc.
bool CompareStackPointer = StackSize < kSplitStackAvailable;
// Read the limit off the current stacklet off the stack_guard location.
if (Is64Bit) {
TlsReg = X86::FS;
TlsOffset = 0x70;
if (ST->isTargetLinux()) {
TlsReg = X86::FS;
TlsOffset = 0x70;
} else if (ST->isTargetDarwin()) {
TlsReg = X86::GS;
TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
}
if (StackSize < kSplitStackAvailable)
if (CompareStackPointer)
ScratchReg = X86::RSP;
else
BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
@ -1392,16 +1406,55 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else {
TlsReg = X86::GS;
TlsOffset = 0x30;
if (StackSize < kSplitStackAvailable)
if (CompareStackPointer)
ScratchReg = X86::ESP;
else
BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
.addImm(1).addReg(0).addImm(-StackSize).addReg(0);
BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
.addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
if (ST->isTargetLinux()) {
TlsOffset = 0x30;
BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
.addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else if (ST->isTargetDarwin()) {
TlsOffset = 0x48 + 90*4;
// TlsOffset doesn't fit into a mod r/m byte so we need an extra register
unsigned ScratchReg2;
bool SaveScratch2;
if (CompareStackPointer) {
// The primary scratch register is available for holding the TLS offset
ScratchReg2 = GetScratchRegister(Is64Bit, MF, true);
SaveScratch2 = false;
} else {
// Need to use a second register to hold the TLS offset
ScratchReg2 = GetScratchRegister(Is64Bit, MF, false);
// Unfortunately, with fastcc the second scratch register may hold an arg
SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
}
// If Scratch2 is live-in then it needs to be saved
assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
"Scratch register is live-in and not saved");
if (SaveScratch2)
BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
.addReg(ScratchReg2, RegState::Kill);
BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
.addImm(TlsOffset);
BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
.addReg(ScratchReg)
.addReg(ScratchReg2).addImm(1).addReg(0)
.addImm(0)
.addReg(TlsReg);
if (SaveScratch2)
BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
}
}
// This jump is taken if SP >= (Stacklet Limit + Stack Space required).

View File

@ -102,9 +102,6 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
// default to hard float ABI
if (Options.FloatABIType == FloatABI::Default)
this->Options.FloatABIType = FloatABI::Hard;
if (Options.EnableSegmentedStacks && !Subtarget.isTargetELF())
report_fatal_error("Segmented stacks are only implemented on ELF.");
}
//===----------------------------------------------------------------------===//

View File

@ -1,9 +1,13 @@
; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64
; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
; RUN: llc < %s -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin
; RUN: llc < %s -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin
; We used to crash with filetype=obj
; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=i686-darwin -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=x86_64-darwin -segmented-stacks -filetype=obj
; Just to prevent the alloca from being optimized away
declare void @dummy_use(i32*, i32)
@ -13,25 +17,46 @@ define void @test_basic() {
call void @dummy_use (i32* %mem, i32 10)
ret void
; X32: test_basic:
; X32-Linux: test_basic:
; X32: cmpl %gs:48, %esp
; X32-NEXT: ja .LBB0_2
; X32-Linux: cmpl %gs:48, %esp
; X32-Linux-NEXT: ja .LBB0_2
; X32: pushl $0
; X32-NEXT: pushl $60
; X32-NEXT: calll __morestack
; X32-NEXT: ret
; X32-Linux: pushl $0
; X32-Linux-NEXT: pushl $60
; X32-Linux-NEXT: calll __morestack
; X32-Linux-NEXT: ret
; X64: test_basic:
; X64-Linux: test_basic:
; X64: cmpq %fs:112, %rsp
; X64-NEXT: ja .LBB0_2
; X64-Linux: cmpq %fs:112, %rsp
; X64-Linux-NEXT: ja .LBB0_2
; X64: movabsq $40, %r10
; X64-NEXT: movabsq $0, %r11
; X64-NEXT: callq __morestack
; X64-NEXT: ret
; X64-Linux: movabsq $40, %r10
; X64-Linux-NEXT: movabsq $0, %r11
; X64-Linux-NEXT: callq __morestack
; X64-Linux-NEXT: ret
; X32-Darwin: test_basic:
; X32-Darwin: movl $432, %ecx
; X32-Darwin-NEXT: cmpl %gs:(%ecx), %esp
; X32-Darwin-NEXT: ja LBB0_2
; X32-Darwin: pushl $0
; X32-Darwin-NEXT: pushl $60
; X32-Darwin-NEXT: calll ___morestack
; X32-Darwin-NEXT: ret
; X64-Darwin: test_basic:
; X64-Darwin: cmpq %gs:816, %rsp
; X64-Darwin-NEXT: ja LBB0_2
; X64-Darwin: movabsq $40, %r10
; X64-Darwin-NEXT: movabsq $0, %r11
; X64-Darwin-NEXT: callq ___morestack
; X64-Darwin-NEXT: ret
}
@ -40,23 +65,42 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
%result = add i32 %other, %addend
ret i32 %result
; X32: cmpl %gs:48, %esp
; X32-NEXT: ja .LBB1_2
; X32-Linux: cmpl %gs:48, %esp
; X32-Linux-NEXT: ja .LBB1_2
; X32: pushl $4
; X32-NEXT: pushl $0
; X32-NEXT: calll __morestack
; X32-NEXT: ret
; X32-Linux: pushl $4
; X32-Linux-NEXT: pushl $0
; X32-Linux-NEXT: calll __morestack
; X32-Linux-NEXT: ret
; X64: cmpq %fs:112, %rsp
; X64-NEXT: ja .LBB1_2
; X64-Linux: cmpq %fs:112, %rsp
; X64-Linux-NEXT: ja .LBB1_2
; X64: movq %r10, %rax
; X64-NEXT: movabsq $0, %r10
; X64-NEXT: movabsq $0, %r11
; X64-NEXT: callq __morestack
; X64-NEXT: ret
; X64-NEXT: movq %rax, %r10
; X64-Linux: movq %r10, %rax
; X64-Linux-NEXT: movabsq $0, %r10
; X64-Linux-NEXT: movabsq $0, %r11
; X64-Linux-NEXT: callq __morestack
; X64-Linux-NEXT: ret
; X64-Linux-NEXT: movq %rax, %r10
; X32-Darwin: movl $432, %edx
; X32-Darwin-NEXT: cmpl %gs:(%edx), %esp
; X32-Darwin-NEXT: ja LBB1_2
; X32-Darwin: pushl $4
; X32-Darwin-NEXT: pushl $0
; X32-Darwin-NEXT: calll ___morestack
; X32-Darwin-NEXT: ret
; X64-Darwin: cmpq %gs:816, %rsp
; X64-Darwin-NEXT: ja LBB1_2
; X64-Darwin: movq %r10, %rax
; X64-Darwin-NEXT: movabsq $0, %r10
; X64-Darwin-NEXT: movabsq $0, %r11
; X64-Darwin-NEXT: callq ___morestack
; X64-Darwin-NEXT: ret
; X64-Darwin-NEXT: movq %rax, %r10
}
@ -65,23 +109,42 @@ define void @test_large() {
call void @dummy_use (i32* %mem, i32 0)
ret void
; X32: leal -40012(%esp), %ecx
; X32-NEXT: cmpl %gs:48, %ecx
; X32-NEXT: ja .LBB2_2
; X32-Linux: leal -40012(%esp), %ecx
; X32-Linux-NEXT: cmpl %gs:48, %ecx
; X32-Linux-NEXT: ja .LBB2_2
; X32: pushl $0
; X32-NEXT: pushl $40012
; X32-NEXT: calll __morestack
; X32-NEXT: ret
; X32-Linux: pushl $0
; X32-Linux-NEXT: pushl $40012
; X32-Linux-NEXT: calll __morestack
; X32-Linux-NEXT: ret
; X64: leaq -40008(%rsp), %r11
; X64-NEXT: cmpq %fs:112, %r11
; X64-NEXT: ja .LBB2_2
; X64-Linux: leaq -40008(%rsp), %r11
; X64-Linux-NEXT: cmpq %fs:112, %r11
; X64-Linux-NEXT: ja .LBB2_2
; X64: movabsq $40008, %r10
; X64-NEXT: movabsq $0, %r11
; X64-NEXT: callq __morestack
; X64-NEXT: ret
; X64-Linux: movabsq $40008, %r10
; X64-Linux-NEXT: movabsq $0, %r11
; X64-Linux-NEXT: callq __morestack
; X64-Linux-NEXT: ret
; X32-Darwin: leal -40012(%esp), %ecx
; X32-Darwin-NEXT: movl $432, %eax
; X32-Darwin-NEXT: cmpl %gs:(%eax), %ecx
; X32-Darwin-NEXT: ja LBB2_2
; X32-Darwin: pushl $0
; X32-Darwin-NEXT: pushl $40012
; X32-Darwin-NEXT: calll ___morestack
; X32-Darwin-NEXT: ret
; X64-Darwin: leaq -40008(%rsp), %r11
; X64-Darwin-NEXT: cmpq %gs:816, %r11
; X64-Darwin-NEXT: ja LBB2_2
; X64-Darwin: movabsq $40008, %r10
; X64-Darwin-NEXT: movabsq $0, %r11
; X64-Darwin-NEXT: callq ___morestack
; X64-Darwin-NEXT: ret
}
@ -90,25 +153,46 @@ define fastcc void @test_fastcc() {
call void @dummy_use (i32* %mem, i32 10)
ret void
; X32: test_fastcc:
; X32-Linux: test_fastcc:
; X32: cmpl %gs:48, %esp
; X32-NEXT: ja .LBB3_2
; X32-Linux: cmpl %gs:48, %esp
; X32-Linux-NEXT: ja .LBB3_2
; X32: pushl $0
; X32-NEXT: pushl $60
; X32-NEXT: calll __morestack
; X32-NEXT: ret
; X32-Linux: pushl $0
; X32-Linux-NEXT: pushl $60
; X32-Linux-NEXT: calll __morestack
; X32-Linux-NEXT: ret
; X64: test_fastcc:
; X64-Linux: test_fastcc:
; X64: cmpq %fs:112, %rsp
; X64-NEXT: ja .LBB3_2
; X64-Linux: cmpq %fs:112, %rsp
; X64-Linux-NEXT: ja .LBB3_2
; X64: movabsq $40, %r10
; X64-NEXT: movabsq $0, %r11
; X64-NEXT: callq __morestack
; X64-NEXT: ret
; X64-Linux: movabsq $40, %r10
; X64-Linux-NEXT: movabsq $0, %r11
; X64-Linux-NEXT: callq __morestack
; X64-Linux-NEXT: ret
; X32-Darwin: test_fastcc:
; X32-Darwin: movl $432, %eax
; X32-Darwin-NEXT: cmpl %gs:(%eax), %esp
; X32-Darwin-NEXT: ja LBB3_2
; X32-Darwin: pushl $0
; X32-Darwin-NEXT: pushl $60
; X32-Darwin-NEXT: calll ___morestack
; X32-Darwin-NEXT: ret
; X64-Darwin: test_fastcc:
; X64-Darwin: cmpq %gs:816, %rsp
; X64-Darwin-NEXT: ja LBB3_2
; X64-Darwin: movabsq $40, %r10
; X64-Darwin-NEXT: movabsq $0, %r11
; X64-Darwin-NEXT: callq ___morestack
; X64-Darwin-NEXT: ret
}
@ -117,25 +201,72 @@ define fastcc void @test_fastcc_large() {
call void @dummy_use (i32* %mem, i32 0)
ret void
; X32: test_fastcc_large:
; X32-Linux: test_fastcc_large:
; X32: leal -40012(%esp), %eax
; X32-NEXT: cmpl %gs:48, %eax
; X32-NEXT: ja .LBB4_2
; X32-Linux: leal -40012(%esp), %eax
; X32-Linux-NEXT: cmpl %gs:48, %eax
; X32-Linux-NEXT: ja .LBB4_2
; X32: pushl $0
; X32-NEXT: pushl $40012
; X32-NEXT: calll __morestack
; X32-NEXT: ret
; X32-Linux: pushl $0
; X32-Linux-NEXT: pushl $40012
; X32-Linux-NEXT: calll __morestack
; X32-Linux-NEXT: ret
; X64: test_fastcc_large:
; X64-Linux: test_fastcc_large:
; X64: leaq -40008(%rsp), %r11
; X64-NEXT: cmpq %fs:112, %r11
; X64-NEXT: ja .LBB4_2
; X64-Linux: leaq -40008(%rsp), %r11
; X64-Linux-NEXT: cmpq %fs:112, %r11
; X64-Linux-NEXT: ja .LBB4_2
; X64-Linux: movabsq $40008, %r10
; X64-Linux-NEXT: movabsq $0, %r11
; X64-Linux-NEXT: callq __morestack
; X64-Linux-NEXT: ret
; X32-Darwin: test_fastcc_large:
; X32-Darwin: leal -40012(%esp), %eax
; X32-Darwin-NEXT: movl $432, %ecx
; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
; X32-Darwin-NEXT: ja LBB4_2
; X32-Darwin: pushl $0
; X32-Darwin-NEXT: pushl $40012
; X32-Darwin-NEXT: calll ___morestack
; X32-Darwin-NEXT: ret
; X64-Darwin: test_fastcc_large:
; X64-Darwin: leaq -40008(%rsp), %r11
; X64-Darwin-NEXT: cmpq %gs:816, %r11
; X64-Darwin-NEXT: ja LBB4_2
; X64-Darwin: movabsq $40008, %r10
; X64-Darwin-NEXT: movabsq $0, %r11
; X64-Darwin-NEXT: callq ___morestack
; X64-Darwin-NEXT: ret
}
define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) {
%mem = alloca i32, i32 10000
call void @dummy_use (i32* %mem, i32 %a)
ret void
; This is testing that the Mac implementation preserves ecx
; X32-Darwin: test_fastcc_large_with_ecx_arg:
; X32-Darwin: leal -40012(%esp), %eax
; X32-Darwin-NEXT: pushl %ecx
; X32-Darwin-NEXT: movl $432, %ecx
; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
; X32-Darwin-NEXT: popl %ecx
; X32-Darwin-NEXT: ja LBB5_2
; X32-Darwin: pushl $0
; X32-Darwin-NEXT: pushl $40012
; X32-Darwin-NEXT: calll ___morestack
; X32-Darwin-NEXT: ret
; X64: movabsq $40008, %r10
; X64-NEXT: movabsq $0, %r11
; X64-NEXT: callq __morestack
; X64-NEXT: ret
}