From 2028b793e1fd1a8dd4d99b0b7c9972865d5e806a Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 11 Jan 2012 19:00:37 +0000 Subject: [PATCH] Support segmented stacks on mac. This uses TLS slot 90, which actually belongs to JavaScriptCore. We only support frames with static size Patch by Brian Anderson. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147960 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 83 ++++++-- lib/Target/X86/X86TargetMachine.cpp | 3 - test/CodeGen/X86/segmented-stacks.ll | 279 ++++++++++++++++++++------- 3 files changed, 273 insertions(+), 92 deletions(-) diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 4386762c853..4cda76c0a41 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1298,10 +1298,15 @@ HasNestArgument(const MachineFunction *MF) { return false; } + +/// GetScratchRegister - Get a register for performing work in the segmented +/// stack prologue. Depending on platform and the properties of the function +/// either one or two registers will be needed. Set primary to true for +/// the first register, false for the second. static unsigned -GetScratchRegister(bool Is64Bit, const MachineFunction &MF) { +GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) { if (Is64Bit) { - return X86::R11; + return Primary ? X86::R11 : X86::R12; } else { CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); bool IsNested = HasNestArgument(&MF); @@ -1313,13 +1318,13 @@ GetScratchRegister(bool Is64Bit, const MachineFunction &MF) { "nested function."); return -1; } else { - return X86::EAX; + return Primary ? X86::EAX : X86::ECX; } } else { if (IsNested) - return X86::EDX; + return Primary ? X86::EDX : X86::EAX; else - return X86::ECX; + return Primary ? X86::ECX : X86::EAX; } } } @@ -1339,14 +1344,14 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { DebugLoc DL; const X86Subtarget *ST = &MF.getTarget().getSubtarget(); - unsigned ScratchReg = GetScratchRegister(Is64Bit, MF); + unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true); assert(!MF.getRegInfo().isLiveIn(ScratchReg) && "Scratch register is live-in"); if (MF.getFunction()->isVarArg()) report_fatal_error("Segmented stacks do not support vararg functions."); - if (!ST->isTargetLinux()) - report_fatal_error("Segmented stacks supported only on linux."); + if (!ST->isTargetLinux() && !ST->isTargetDarwin()) + report_fatal_error("Segmented stacks supported only on linux and darwin."); MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); @@ -1377,12 +1382,21 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // prologue. StackSize = MFI->getStackSize(); + // When the frame size is less than 256 we just compare the stack + // boundary directly to the value of the stack pointer, per gcc. + bool CompareStackPointer = StackSize < kSplitStackAvailable; + // Read the limit off the current stacklet off the stack_guard location. if (Is64Bit) { - TlsReg = X86::FS; - TlsOffset = 0x70; + if (ST->isTargetLinux()) { + TlsReg = X86::FS; + TlsOffset = 0x70; + } else if (ST->isTargetDarwin()) { + TlsReg = X86::GS; + TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. + } - if (StackSize < kSplitStackAvailable) + if (CompareStackPointer) ScratchReg = X86::RSP; else BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP) @@ -1392,16 +1406,55 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); } else { TlsReg = X86::GS; - TlsOffset = 0x30; - if (StackSize < kSplitStackAvailable) + if (CompareStackPointer) ScratchReg = X86::ESP; else BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) .addImm(1).addReg(0).addImm(-StackSize).addReg(0); - BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) - .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); + if (ST->isTargetLinux()) { + TlsOffset = 0x30; + + BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) + .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); + } else if (ST->isTargetDarwin()) { + TlsOffset = 0x48 + 90*4; + + // TlsOffset doesn't fit into a mod r/m byte so we need an extra register + unsigned ScratchReg2; + bool SaveScratch2; + if (CompareStackPointer) { + // The primary scratch register is available for holding the TLS offset + ScratchReg2 = GetScratchRegister(Is64Bit, MF, true); + SaveScratch2 = false; + } else { + // Need to use a second register to hold the TLS offset + ScratchReg2 = GetScratchRegister(Is64Bit, MF, false); + + // Unfortunately, with fastcc the second scratch register may hold an arg + SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); + } + + // If Scratch2 is live-in then it needs to be saved + assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && + "Scratch register is live-in and not saved"); + + if (SaveScratch2) + BuildMI(checkMBB, DL, TII.get(X86::PUSH32r)) + .addReg(ScratchReg2, RegState::Kill); + + BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2) + .addImm(TlsOffset); + BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) + .addReg(ScratchReg) + .addReg(ScratchReg2).addImm(1).addReg(0) + .addImm(0) + .addReg(TlsReg); + + if (SaveScratch2) + BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2); + } } // This jump is taken if SP >= (Stacklet Limit + Stack Space required). diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index d73a3dd7f3c..b8002d57ebf 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -102,9 +102,6 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, // default to hard float ABI if (Options.FloatABIType == FloatABI::Default) this->Options.FloatABIType = FloatABI::Hard; - - if (Options.EnableSegmentedStacks && !Subtarget.isTargetELF()) - report_fatal_error("Segmented stacks are only implemented on ELF."); } //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll index 3ba18cffccf..6e91d00ac6d 100644 --- a/test/CodeGen/X86/segmented-stacks.ll +++ b/test/CodeGen/X86/segmented-stacks.ll @@ -1,9 +1,13 @@ -; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux +; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux +; RUN: llc < %s -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin +; RUN: llc < %s -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin ; We used to crash with filetype=obj ; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -filetype=obj ; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -filetype=obj +; RUN: llc < %s -mtriple=i686-darwin -segmented-stacks -filetype=obj +; RUN: llc < %s -mtriple=x86_64-darwin -segmented-stacks -filetype=obj ; Just to prevent the alloca from being optimized away declare void @dummy_use(i32*, i32) @@ -13,25 +17,46 @@ define void @test_basic() { call void @dummy_use (i32* %mem, i32 10) ret void -; X32: test_basic: +; X32-Linux: test_basic: -; X32: cmpl %gs:48, %esp -; X32-NEXT: ja .LBB0_2 +; X32-Linux: cmpl %gs:48, %esp +; X32-Linux-NEXT: ja .LBB0_2 -; X32: pushl $0 -; X32-NEXT: pushl $60 -; X32-NEXT: calll __morestack -; X32-NEXT: ret +; X32-Linux: pushl $0 +; X32-Linux-NEXT: pushl $60 +; X32-Linux-NEXT: calll __morestack +; X32-Linux-NEXT: ret -; X64: test_basic: +; X64-Linux: test_basic: -; X64: cmpq %fs:112, %rsp -; X64-NEXT: ja .LBB0_2 +; X64-Linux: cmpq %fs:112, %rsp +; X64-Linux-NEXT: ja .LBB0_2 -; X64: movabsq $40, %r10 -; X64-NEXT: movabsq $0, %r11 -; X64-NEXT: callq __morestack -; X64-NEXT: ret +; X64-Linux: movabsq $40, %r10 +; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: callq __morestack +; X64-Linux-NEXT: ret + +; X32-Darwin: test_basic: + +; X32-Darwin: movl $432, %ecx +; X32-Darwin-NEXT: cmpl %gs:(%ecx), %esp +; X32-Darwin-NEXT: ja LBB0_2 + +; X32-Darwin: pushl $0 +; X32-Darwin-NEXT: pushl $60 +; X32-Darwin-NEXT: calll ___morestack +; X32-Darwin-NEXT: ret + +; X64-Darwin: test_basic: + +; X64-Darwin: cmpq %gs:816, %rsp +; X64-Darwin-NEXT: ja LBB0_2 + +; X64-Darwin: movabsq $40, %r10 +; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: callq ___morestack +; X64-Darwin-NEXT: ret } @@ -40,23 +65,42 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) { %result = add i32 %other, %addend ret i32 %result -; X32: cmpl %gs:48, %esp -; X32-NEXT: ja .LBB1_2 +; X32-Linux: cmpl %gs:48, %esp +; X32-Linux-NEXT: ja .LBB1_2 -; X32: pushl $4 -; X32-NEXT: pushl $0 -; X32-NEXT: calll __morestack -; X32-NEXT: ret +; X32-Linux: pushl $4 +; X32-Linux-NEXT: pushl $0 +; X32-Linux-NEXT: calll __morestack +; X32-Linux-NEXT: ret -; X64: cmpq %fs:112, %rsp -; X64-NEXT: ja .LBB1_2 +; X64-Linux: cmpq %fs:112, %rsp +; X64-Linux-NEXT: ja .LBB1_2 -; X64: movq %r10, %rax -; X64-NEXT: movabsq $0, %r10 -; X64-NEXT: movabsq $0, %r11 -; X64-NEXT: callq __morestack -; X64-NEXT: ret -; X64-NEXT: movq %rax, %r10 +; X64-Linux: movq %r10, %rax +; X64-Linux-NEXT: movabsq $0, %r10 +; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: callq __morestack +; X64-Linux-NEXT: ret +; X64-Linux-NEXT: movq %rax, %r10 + +; X32-Darwin: movl $432, %edx +; X32-Darwin-NEXT: cmpl %gs:(%edx), %esp +; X32-Darwin-NEXT: ja LBB1_2 + +; X32-Darwin: pushl $4 +; X32-Darwin-NEXT: pushl $0 +; X32-Darwin-NEXT: calll ___morestack +; X32-Darwin-NEXT: ret + +; X64-Darwin: cmpq %gs:816, %rsp +; X64-Darwin-NEXT: ja LBB1_2 + +; X64-Darwin: movq %r10, %rax +; X64-Darwin-NEXT: movabsq $0, %r10 +; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: callq ___morestack +; X64-Darwin-NEXT: ret +; X64-Darwin-NEXT: movq %rax, %r10 } @@ -65,23 +109,42 @@ define void @test_large() { call void @dummy_use (i32* %mem, i32 0) ret void -; X32: leal -40012(%esp), %ecx -; X32-NEXT: cmpl %gs:48, %ecx -; X32-NEXT: ja .LBB2_2 +; X32-Linux: leal -40012(%esp), %ecx +; X32-Linux-NEXT: cmpl %gs:48, %ecx +; X32-Linux-NEXT: ja .LBB2_2 -; X32: pushl $0 -; X32-NEXT: pushl $40012 -; X32-NEXT: calll __morestack -; X32-NEXT: ret +; X32-Linux: pushl $0 +; X32-Linux-NEXT: pushl $40012 +; X32-Linux-NEXT: calll __morestack +; X32-Linux-NEXT: ret -; X64: leaq -40008(%rsp), %r11 -; X64-NEXT: cmpq %fs:112, %r11 -; X64-NEXT: ja .LBB2_2 +; X64-Linux: leaq -40008(%rsp), %r11 +; X64-Linux-NEXT: cmpq %fs:112, %r11 +; X64-Linux-NEXT: ja .LBB2_2 -; X64: movabsq $40008, %r10 -; X64-NEXT: movabsq $0, %r11 -; X64-NEXT: callq __morestack -; X64-NEXT: ret +; X64-Linux: movabsq $40008, %r10 +; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: callq __morestack +; X64-Linux-NEXT: ret + +; X32-Darwin: leal -40012(%esp), %ecx +; X32-Darwin-NEXT: movl $432, %eax +; X32-Darwin-NEXT: cmpl %gs:(%eax), %ecx +; X32-Darwin-NEXT: ja LBB2_2 + +; X32-Darwin: pushl $0 +; X32-Darwin-NEXT: pushl $40012 +; X32-Darwin-NEXT: calll ___morestack +; X32-Darwin-NEXT: ret + +; X64-Darwin: leaq -40008(%rsp), %r11 +; X64-Darwin-NEXT: cmpq %gs:816, %r11 +; X64-Darwin-NEXT: ja LBB2_2 + +; X64-Darwin: movabsq $40008, %r10 +; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: callq ___morestack +; X64-Darwin-NEXT: ret } @@ -90,25 +153,46 @@ define fastcc void @test_fastcc() { call void @dummy_use (i32* %mem, i32 10) ret void -; X32: test_fastcc: +; X32-Linux: test_fastcc: -; X32: cmpl %gs:48, %esp -; X32-NEXT: ja .LBB3_2 +; X32-Linux: cmpl %gs:48, %esp +; X32-Linux-NEXT: ja .LBB3_2 -; X32: pushl $0 -; X32-NEXT: pushl $60 -; X32-NEXT: calll __morestack -; X32-NEXT: ret +; X32-Linux: pushl $0 +; X32-Linux-NEXT: pushl $60 +; X32-Linux-NEXT: calll __morestack +; X32-Linux-NEXT: ret -; X64: test_fastcc: +; X64-Linux: test_fastcc: -; X64: cmpq %fs:112, %rsp -; X64-NEXT: ja .LBB3_2 +; X64-Linux: cmpq %fs:112, %rsp +; X64-Linux-NEXT: ja .LBB3_2 -; X64: movabsq $40, %r10 -; X64-NEXT: movabsq $0, %r11 -; X64-NEXT: callq __morestack -; X64-NEXT: ret +; X64-Linux: movabsq $40, %r10 +; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: callq __morestack +; X64-Linux-NEXT: ret + +; X32-Darwin: test_fastcc: + +; X32-Darwin: movl $432, %eax +; X32-Darwin-NEXT: cmpl %gs:(%eax), %esp +; X32-Darwin-NEXT: ja LBB3_2 + +; X32-Darwin: pushl $0 +; X32-Darwin-NEXT: pushl $60 +; X32-Darwin-NEXT: calll ___morestack +; X32-Darwin-NEXT: ret + +; X64-Darwin: test_fastcc: + +; X64-Darwin: cmpq %gs:816, %rsp +; X64-Darwin-NEXT: ja LBB3_2 + +; X64-Darwin: movabsq $40, %r10 +; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: callq ___morestack +; X64-Darwin-NEXT: ret } @@ -117,25 +201,72 @@ define fastcc void @test_fastcc_large() { call void @dummy_use (i32* %mem, i32 0) ret void -; X32: test_fastcc_large: +; X32-Linux: test_fastcc_large: -; X32: leal -40012(%esp), %eax -; X32-NEXT: cmpl %gs:48, %eax -; X32-NEXT: ja .LBB4_2 +; X32-Linux: leal -40012(%esp), %eax +; X32-Linux-NEXT: cmpl %gs:48, %eax +; X32-Linux-NEXT: ja .LBB4_2 -; X32: pushl $0 -; X32-NEXT: pushl $40012 -; X32-NEXT: calll __morestack -; X32-NEXT: ret +; X32-Linux: pushl $0 +; X32-Linux-NEXT: pushl $40012 +; X32-Linux-NEXT: calll __morestack +; X32-Linux-NEXT: ret -; X64: test_fastcc_large: +; X64-Linux: test_fastcc_large: -; X64: leaq -40008(%rsp), %r11 -; X64-NEXT: cmpq %fs:112, %r11 -; X64-NEXT: ja .LBB4_2 +; X64-Linux: leaq -40008(%rsp), %r11 +; X64-Linux-NEXT: cmpq %fs:112, %r11 +; X64-Linux-NEXT: ja .LBB4_2 + +; X64-Linux: movabsq $40008, %r10 +; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: callq __morestack +; X64-Linux-NEXT: ret + +; X32-Darwin: test_fastcc_large: + +; X32-Darwin: leal -40012(%esp), %eax +; X32-Darwin-NEXT: movl $432, %ecx +; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax +; X32-Darwin-NEXT: ja LBB4_2 + +; X32-Darwin: pushl $0 +; X32-Darwin-NEXT: pushl $40012 +; X32-Darwin-NEXT: calll ___morestack +; X32-Darwin-NEXT: ret + +; X64-Darwin: test_fastcc_large: + +; X64-Darwin: leaq -40008(%rsp), %r11 +; X64-Darwin-NEXT: cmpq %gs:816, %r11 +; X64-Darwin-NEXT: ja LBB4_2 + +; X64-Darwin: movabsq $40008, %r10 +; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: callq ___morestack +; X64-Darwin-NEXT: ret + +} + +define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) { + %mem = alloca i32, i32 10000 + call void @dummy_use (i32* %mem, i32 %a) + ret void + +; This is testing that the Mac implementation preserves ecx + +; X32-Darwin: test_fastcc_large_with_ecx_arg: + +; X32-Darwin: leal -40012(%esp), %eax +; X32-Darwin-NEXT: pushl %ecx +; X32-Darwin-NEXT: movl $432, %ecx +; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax +; X32-Darwin-NEXT: popl %ecx +; X32-Darwin-NEXT: ja LBB5_2 + +; X32-Darwin: pushl $0 +; X32-Darwin-NEXT: pushl $40012 +; X32-Darwin-NEXT: calll ___morestack +; X32-Darwin-NEXT: ret -; X64: movabsq $40008, %r10 -; X64-NEXT: movabsq $0, %r11 -; X64-NEXT: callq __morestack -; X64-NEXT: ret }