Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm changes from Ingo Molnar:
 "There were lots of changes in this development cycle:

   - over 100 separate cleanups, restructuring changes, speedups and
     fixes in the x86 system call, irq, trap and other entry code, part
     of a heroic effort to deobfuscate a decade old spaghetti asm code
     and its C code dependencies (Denys Vlasenko, Andy Lutomirski)

   - alternatives code fixes and enhancements (Borislav Petkov)

   - simplifications and cleanups to the compat code (Brian Gerst)

   - signal handling fixes and new x86 testcases (Andy Lutomirski)

   - various other fixes and cleanups

  By their nature many of these changes are risky - we tried to test
  them well on many different x86 systems (there are no known
  regressions), and they are split up finely to help bisection - but
  there's still a fair bit of residual risk left so caveat emptor"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (148 commits)
  perf/x86/64: Report regs_user->ax too in get_regs_user()
  perf/x86/64: Simplify regs_user->abi setting code in get_regs_user()
  perf/x86/64: Do report user_regs->cx while we are in syscall, in get_regs_user()
  perf/x86/64: Do not guess user_regs->cs, ss, sp in get_regs_user()
  x86/asm/entry/32: Tidy up JNZ instructions after TESTs
  x86/asm/entry/64: Reduce padding in execve stubs
  x86/asm/entry/64: Remove GET_THREAD_INFO() in ret_from_fork
  x86/asm/entry/64: Simplify jumps in ret_from_fork
  x86/asm/entry/64: Remove a redundant jump
  x86/asm/entry/64: Optimize [v]fork/clone stubs
  x86/asm/entry: Zero EXTRA_REGS for stub32_execve() too
  x86/asm/entry/64: Move stub_x32_execvecloser() to stub_execveat()
  x86/asm/entry/64: Use common code for rt_sigreturn() epilogue
  x86/asm/entry/64: Add forgotten CFI annotation
  x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout
  x86/asm/entry/64: Move opportunistic sysret code to syscall code path
  x86, selftests: Add sigreturn selftest
  x86/alternatives: Guard NOPs optimization
  x86/asm/entry: Clear EXTRA_REGS for all executable formats
  x86/signal: Remove pax argument from restore_sigcontext
  ...
This commit is contained in:
Linus Torvalds 2015-04-13 13:16:36 -07:00
commit 60f898eeaa
121 changed files with 3084 additions and 2104 deletions

View File

@ -406,6 +406,12 @@ Protocol: 2.00+
- If 0, the protected-mode code is loaded at 0x10000. - If 0, the protected-mode code is loaded at 0x10000.
- If 1, the protected-mode code is loaded at 0x100000. - If 1, the protected-mode code is loaded at 0x100000.
Bit 1 (kernel internal): ALSR_FLAG
- Used internally by the compressed kernel to communicate
KASLR status to kernel proper.
If 1, KASLR enabled.
If 0, KASLR disabled.
Bit 5 (write): QUIET_FLAG Bit 5 (write): QUIET_FLAG
- If 0, print early messages. - If 0, print early messages.
- If 1, suppress early messages. - If 1, suppress early messages.

View File

@ -295,7 +295,8 @@ static unsigned long find_random_addr(unsigned long minimum,
return slots_fetch_random(); return slots_fetch_random();
} }
unsigned char *choose_kernel_location(unsigned char *input, unsigned char *choose_kernel_location(struct boot_params *boot_params,
unsigned char *input,
unsigned long input_size, unsigned long input_size,
unsigned char *output, unsigned char *output,
unsigned long output_size) unsigned long output_size)
@ -315,6 +316,8 @@ unsigned char *choose_kernel_location(unsigned char *input,
} }
#endif #endif
boot_params->hdr.loadflags |= KASLR_FLAG;
/* Record the various known unsafe memory ranges. */ /* Record the various known unsafe memory ranges. */
mem_avoid_init((unsigned long)input, input_size, mem_avoid_init((unsigned long)input, input_size,
(unsigned long)output, output_size); (unsigned long)output, output_size);

View File

@ -29,6 +29,7 @@
#include <asm/page_types.h> #include <asm/page_types.h>
#include <asm/boot.h> #include <asm/boot.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/bootparam.h>
__HEAD __HEAD
ENTRY(startup_32) ENTRY(startup_32)
@ -102,7 +103,7 @@ preferred_addr:
* Test KEEP_SEGMENTS flag to see if the bootloader is asking * Test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments * us to not reload segments
*/ */
testb $(1<<6), BP_loadflags(%esi) testb $KEEP_SEGMENTS, BP_loadflags(%esi)
jnz 1f jnz 1f
cli cli

View File

@ -31,6 +31,7 @@
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/processor-flags.h> #include <asm/processor-flags.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/bootparam.h>
__HEAD __HEAD
.code32 .code32
@ -46,7 +47,7 @@ ENTRY(startup_32)
* Test KEEP_SEGMENTS flag to see if the bootloader is asking * Test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments * us to not reload segments
*/ */
testb $(1<<6), BP_loadflags(%esi) testb $KEEP_SEGMENTS, BP_loadflags(%esi)
jnz 1f jnz 1f
cli cli
@ -164,7 +165,7 @@ ENTRY(startup_32)
/* After gdt is loaded */ /* After gdt is loaded */
xorl %eax, %eax xorl %eax, %eax
lldt %ax lldt %ax
movl $0x20, %eax movl $__BOOT_TSS, %eax
ltr %ax ltr %ax
/* /*

View File

@ -377,6 +377,9 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
real_mode = rmode; real_mode = rmode;
/* Clear it for solely in-kernel use */
real_mode->hdr.loadflags &= ~KASLR_FLAG;
sanitize_boot_params(real_mode); sanitize_boot_params(real_mode);
if (real_mode->screen_info.orig_video_mode == 7) { if (real_mode->screen_info.orig_video_mode == 7) {
@ -401,7 +404,7 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
* the entire decompressed kernel plus relocation table, or the * the entire decompressed kernel plus relocation table, or the
* entire decompressed kernel plus .bss and .brk sections. * entire decompressed kernel plus .bss and .brk sections.
*/ */
output = choose_kernel_location(input_data, input_len, output, output = choose_kernel_location(real_mode, input_data, input_len, output,
output_len > run_size ? output_len output_len > run_size ? output_len
: run_size); : run_size);

View File

@ -57,7 +57,8 @@ int cmdline_find_option_bool(const char *option);
#if CONFIG_RANDOMIZE_BASE #if CONFIG_RANDOMIZE_BASE
/* aslr.c */ /* aslr.c */
unsigned char *choose_kernel_location(unsigned char *input, unsigned char *choose_kernel_location(struct boot_params *boot_params,
unsigned char *input,
unsigned long input_size, unsigned long input_size,
unsigned char *output, unsigned char *output,
unsigned long output_size); unsigned long output_size);
@ -65,7 +66,8 @@ unsigned char *choose_kernel_location(unsigned char *input,
bool has_cpuflag(int flag); bool has_cpuflag(int flag);
#else #else
static inline static inline
unsigned char *choose_kernel_location(unsigned char *input, unsigned char *choose_kernel_location(struct boot_params *boot_params,
unsigned char *input,
unsigned long input_size, unsigned long input_size,
unsigned char *output, unsigned char *output,
unsigned long output_size) unsigned long output_size)

View File

@ -178,7 +178,7 @@ continue_block:
## 2a) PROCESS FULL BLOCKS: ## 2a) PROCESS FULL BLOCKS:
################################################################ ################################################################
full_block: full_block:
movq $128,%rax movl $128,%eax
lea 128*8*2(block_0), block_1 lea 128*8*2(block_0), block_1
lea 128*8*3(block_0), block_2 lea 128*8*3(block_0), block_2
add $128*8*1, block_0 add $128*8*1, block_0

View File

@ -264,7 +264,7 @@ ENTRY(twofish_enc_blk)
movq R1, 8(%rsi) movq R1, 8(%rsi)
popq R1 popq R1
movq $1,%rax movl $1,%eax
ret ret
ENDPROC(twofish_enc_blk) ENDPROC(twofish_enc_blk)
@ -316,6 +316,6 @@ ENTRY(twofish_dec_blk)
movq R1, 8(%rsi) movq R1, 8(%rsi)
popq R1 popq R1
movq $1,%rax movl $1,%eax
ret ret
ENDPROC(twofish_dec_blk) ENDPROC(twofish_dec_blk)

View File

@ -3,7 +3,6 @@
# #
obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o
obj-$(CONFIG_IA32_AOUT) += ia32_aout.o obj-$(CONFIG_IA32_AOUT) += ia32_aout.o

View File

@ -161,8 +161,7 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
} }
static int ia32_restore_sigcontext(struct pt_regs *regs, static int ia32_restore_sigcontext(struct pt_regs *regs,
struct sigcontext_ia32 __user *sc, struct sigcontext_ia32 __user *sc)
unsigned int *pax)
{ {
unsigned int tmpflags, err = 0; unsigned int tmpflags, err = 0;
void __user *buf; void __user *buf;
@ -184,7 +183,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
RELOAD_SEG(es); RELOAD_SEG(es);
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip); COPY(dx); COPY(cx); COPY(ip); COPY(ax);
/* Don't touch extended registers */ /* Don't touch extended registers */
COPY_SEG_CPL3(cs); COPY_SEG_CPL3(cs);
@ -197,12 +196,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
get_user_ex(tmp, &sc->fpstate); get_user_ex(tmp, &sc->fpstate);
buf = compat_ptr(tmp); buf = compat_ptr(tmp);
get_user_ex(*pax, &sc->ax);
} get_user_catch(err); } get_user_catch(err);
err |= restore_xstate_sig(buf, 1); err |= restore_xstate_sig(buf, 1);
force_iret();
return err; return err;
} }
@ -211,7 +210,6 @@ asmlinkage long sys32_sigreturn(void)
struct pt_regs *regs = current_pt_regs(); struct pt_regs *regs = current_pt_regs();
struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
sigset_t set; sigset_t set;
unsigned int ax;
if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe; goto badframe;
@ -224,9 +222,9 @@ asmlinkage long sys32_sigreturn(void)
set_current_blocked(&set); set_current_blocked(&set);
if (ia32_restore_sigcontext(regs, &frame->sc, &ax)) if (ia32_restore_sigcontext(regs, &frame->sc))
goto badframe; goto badframe;
return ax; return regs->ax;
badframe: badframe:
signal_fault(regs, frame, "32bit sigreturn"); signal_fault(regs, frame, "32bit sigreturn");
@ -238,7 +236,6 @@ asmlinkage long sys32_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs(); struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_ia32 __user *frame; struct rt_sigframe_ia32 __user *frame;
sigset_t set; sigset_t set;
unsigned int ax;
frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
@ -249,13 +246,13 @@ asmlinkage long sys32_rt_sigreturn(void)
set_current_blocked(&set); set_current_blocked(&set);
if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe; goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack)) if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe; goto badframe;
return ax; return regs->ax;
badframe: badframe:
signal_fault(regs, frame, "32bit rt sigreturn"); signal_fault(regs, frame, "32bit rt sigreturn");

View File

@ -30,24 +30,13 @@
.section .entry.text, "ax" .section .entry.text, "ax"
.macro IA32_ARG_FIXUP noebp=0 /* clobbers %rax */
movl %edi,%r8d .macro CLEAR_RREGS _r9=rax
.if \noebp
.else
movl %ebp,%r9d
.endif
xchg %ecx,%esi
movl %ebx,%edi
movl %edx,%edx /* zero extension */
.endm
/* clobbers %eax */
.macro CLEAR_RREGS offset=0, _r9=rax
xorl %eax,%eax xorl %eax,%eax
movq %rax,\offset+R11(%rsp) movq %rax,R11(%rsp)
movq %rax,\offset+R10(%rsp) movq %rax,R10(%rsp)
movq %\_r9,\offset+R9(%rsp) movq %\_r9,R9(%rsp)
movq %rax,\offset+R8(%rsp) movq %rax,R8(%rsp)
.endm .endm
/* /*
@ -60,14 +49,14 @@
* If it's -1 to make us punt the syscall, then (u32)-1 is still * If it's -1 to make us punt the syscall, then (u32)-1 is still
* an appropriately invalid value. * an appropriately invalid value.
*/ */
.macro LOAD_ARGS32 offset, _r9=0 .macro LOAD_ARGS32 _r9=0
.if \_r9 .if \_r9
movl \offset+16(%rsp),%r9d movl R9(%rsp),%r9d
.endif .endif
movl \offset+40(%rsp),%ecx movl RCX(%rsp),%ecx
movl \offset+48(%rsp),%edx movl RDX(%rsp),%edx
movl \offset+56(%rsp),%esi movl RSI(%rsp),%esi
movl \offset+64(%rsp),%edi movl RDI(%rsp),%edi
movl %eax,%eax /* zero extension */ movl %eax,%eax /* zero extension */
.endm .endm
@ -99,54 +88,69 @@ ENDPROC(native_irq_enable_sysexit)
/* /*
* 32bit SYSENTER instruction entry. * 32bit SYSENTER instruction entry.
* *
* SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
* IF and VM in rflags are cleared (IOW: interrupts are off).
* SYSENTER does not save anything on the stack,
* and does not save old rip (!!!) and rflags.
*
* Arguments: * Arguments:
* %eax System call number. * eax system call number
* %ebx Arg1 * ebx arg1
* %ecx Arg2 * ecx arg2
* %edx Arg3 * edx arg3
* %esi Arg4 * esi arg4
* %edi Arg5 * edi arg5
* %ebp user stack * ebp user stack
* 0(%ebp) Arg6 * 0(%ebp) arg6
* *
* Interrupts off.
*
* This is purely a fast path. For anything complicated we use the int 0x80 * This is purely a fast path. For anything complicated we use the int 0x80
* path below. Set up a complete hardware stack frame to share code * path below. We set up a complete hardware stack frame to share code
* with the int 0x80 path. * with the int 0x80 path.
*/ */
ENTRY(ia32_sysenter_target) ENTRY(ia32_sysenter_target)
CFI_STARTPROC32 simple CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,0 CFI_DEF_CFA rsp,0
CFI_REGISTER rsp,rbp CFI_REGISTER rsp,rbp
SWAPGS_UNSAFE_STACK
movq PER_CPU_VAR(kernel_stack), %rsp
addq $(KERNEL_STACK_OFFSET),%rsp
/* /*
* No need to follow this irqs on/off section: the syscall * Interrupts are off on entry.
* disabled irqs, here we enable it straight after entry: * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
* it is too small to ever cause noticeable irq latency.
*/ */
SWAPGS_UNSAFE_STACK
movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
ENABLE_INTERRUPTS(CLBR_NONE) ENABLE_INTERRUPTS(CLBR_NONE)
movl %ebp,%ebp /* zero extension */
pushq_cfi $__USER32_DS /* Zero-extending 32-bit regs, do not remove */
/*CFI_REL_OFFSET ss,0*/ movl %ebp, %ebp
pushq_cfi %rbp
CFI_REL_OFFSET rsp,0
pushfq_cfi
/*CFI_REL_OFFSET rflags,0*/
movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
CFI_REGISTER rip,r10
pushq_cfi $__USER32_CS
/*CFI_REL_OFFSET cs,0*/
movl %eax, %eax movl %eax, %eax
pushq_cfi %r10
CFI_REL_OFFSET rip,0 movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
pushq_cfi %rax CFI_REGISTER rip,r10
/* Construct struct pt_regs on stack */
pushq_cfi $__USER32_DS /* pt_regs->ss */
pushq_cfi %rbp /* pt_regs->sp */
CFI_REL_OFFSET rsp,0
pushfq_cfi /* pt_regs->flags */
pushq_cfi $__USER32_CS /* pt_regs->cs */
pushq_cfi %r10 /* pt_regs->ip = thread_info->sysenter_return */
CFI_REL_OFFSET rip,0
pushq_cfi_reg rax /* pt_regs->orig_ax */
pushq_cfi_reg rdi /* pt_regs->di */
pushq_cfi_reg rsi /* pt_regs->si */
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rcx /* pt_regs->cx */
pushq_cfi_reg rax /* pt_regs->ax */
cld cld
SAVE_ARGS 0,1,0 sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
/* no need to do an access_ok check here because rbp has been CFI_ADJUST_CFA_OFFSET 10*8
32bit zero extended */
/*
* no need to do an access_ok check here because rbp has been
* 32bit zero extended
*/
ASM_STAC ASM_STAC
1: movl (%rbp),%ebp 1: movl (%rbp),%ebp
_ASM_EXTABLE(1b,ia32_badarg) _ASM_EXTABLE(1b,ia32_badarg)
@ -157,42 +161,80 @@ ENTRY(ia32_sysenter_target)
* ourselves. To save a few cycles, we can check whether * ourselves. To save a few cycles, we can check whether
* NT was set instead of doing an unconditional popfq. * NT was set instead of doing an unconditional popfq.
*/ */
testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp) testl $X86_EFLAGS_NT,EFLAGS(%rsp)
jnz sysenter_fix_flags jnz sysenter_fix_flags
sysenter_flags_fixed: sysenter_flags_fixed:
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
CFI_REMEMBER_STATE CFI_REMEMBER_STATE
jnz sysenter_tracesys jnz sysenter_tracesys
cmpq $(IA32_NR_syscalls-1),%rax cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys ja ia32_badsys
sysenter_do_call: sysenter_do_call:
IA32_ARG_FIXUP /* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
movl %ebp,%r9d /* arg6 */
xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
sysenter_dispatch: sysenter_dispatch:
call *ia32_sys_call_table(,%rax,8) call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp) movq %rax,RAX(%rsp)
DISABLE_INTERRUPTS(CLBR_NONE) DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz sysexit_audit jnz sysexit_audit
sysexit_from_sys_call: sysexit_from_sys_call:
andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) /*
/* clear IF, that popfq doesn't enable interrupts early */ * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
andl $~0x200,EFLAGS-ARGOFFSET(%rsp) * NMI between STI and SYSEXIT has poorly specified behavior,
movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */ * and and NMI followed by an IRQ with usergs is fatal. So
CFI_REGISTER rip,rdx * we just pretend we're using SYSEXIT but we really use
RESTORE_ARGS 0,24,0,0,0,0 * SYSRETL instead.
*
* This code path is still called 'sysexit' because it pairs
* with 'sysenter' and it uses the SYSENTER calling convention.
*/
andl $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
movl RIP(%rsp),%ecx /* User %eip */
CFI_REGISTER rip,rcx
RESTORE_RSI_RDI
xorl %edx,%edx /* avoid info leaks */
xorq %r8,%r8 xorq %r8,%r8
xorq %r9,%r9 xorq %r9,%r9
xorq %r10,%r10 xorq %r10,%r10
xorq %r11,%r11 movl EFLAGS(%rsp),%r11d /* User eflags */
popfq_cfi
/*CFI_RESTORE rflags*/ /*CFI_RESTORE rflags*/
popq_cfi %rcx /* User %esp */
CFI_REGISTER rsp,rcx
TRACE_IRQS_ON TRACE_IRQS_ON
ENABLE_INTERRUPTS_SYSEXIT32
/*
* SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT,
* since it avoids a dicey window with interrupts enabled.
*/
movl RSP(%rsp),%esp
/*
* USERGS_SYSRET32 does:
* gsbase = user's gs base
* eip = ecx
* rflags = r11
* cs = __USER32_CS
* ss = __USER_DS
*
* The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
*
* pop %ebp
* pop %edx
* pop %ecx
*
* Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
* avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's
* address (already known to user code), and R12-R15 are
* callee-saved and therefore don't contain any interesting
* kernel data.
*/
USERGS_SYSRET32
CFI_RESTORE_STATE CFI_RESTORE_STATE
@ -205,18 +247,18 @@ sysexit_from_sys_call:
movl %ebx,%esi /* 2nd arg: 1st syscall arg */ movl %ebx,%esi /* 2nd arg: 1st syscall arg */
movl %eax,%edi /* 1st arg: syscall number */ movl %eax,%edi /* 1st arg: syscall number */
call __audit_syscall_entry call __audit_syscall_entry
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ movl RAX(%rsp),%eax /* reload syscall number */
cmpq $(IA32_NR_syscalls-1),%rax cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys ja ia32_badsys
movl %ebx,%edi /* reload 1st syscall arg */ movl %ebx,%edi /* reload 1st syscall arg */
movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ movl RCX(%rsp),%esi /* reload 2nd syscall arg */
movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */ movl RDX(%rsp),%edx /* reload 3rd syscall arg */
movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */ movl RSI(%rsp),%ecx /* reload 4th syscall arg */
movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ movl RDI(%rsp),%r8d /* reload 5th syscall arg */
.endm .endm
.macro auditsys_exit exit .macro auditsys_exit exit
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz ia32_ret_from_sys_call jnz ia32_ret_from_sys_call
TRACE_IRQS_ON TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE) ENABLE_INTERRUPTS(CLBR_NONE)
@ -227,13 +269,13 @@ sysexit_from_sys_call:
1: setbe %al /* 1 if error, 0 if not */ 1: setbe %al /* 1 if error, 0 if not */
movzbl %al,%edi /* zero-extend that into %edi */ movzbl %al,%edi /* zero-extend that into %edi */
call __audit_syscall_exit call __audit_syscall_exit
movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ movq RAX(%rsp),%rax /* reload syscall return value */
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
DISABLE_INTERRUPTS(CLBR_NONE) DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF TRACE_IRQS_OFF
testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jz \exit jz \exit
CLEAR_RREGS -ARGOFFSET CLEAR_RREGS
jmp int_with_check jmp int_with_check
.endm .endm
@ -253,16 +295,16 @@ sysenter_fix_flags:
sysenter_tracesys: sysenter_tracesys:
#ifdef CONFIG_AUDITSYSCALL #ifdef CONFIG_AUDITSYSCALL
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jz sysenter_auditsys jz sysenter_auditsys
#endif #endif
SAVE_REST SAVE_EXTRA_REGS
CLEAR_RREGS CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */ movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
RESTORE_REST RESTORE_EXTRA_REGS
cmpq $(IA32_NR_syscalls-1),%rax cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
jmp sysenter_do_call jmp sysenter_do_call
@ -272,94 +314,128 @@ ENDPROC(ia32_sysenter_target)
/* /*
* 32bit SYSCALL instruction entry. * 32bit SYSCALL instruction entry.
* *
* 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
* then loads new ss, cs, and rip from previously programmed MSRs.
* rflags gets masked by a value from another MSR (so CLD and CLAC
* are not needed). SYSCALL does not save anything on the stack
* and does not change rsp.
*
* Note: rflags saving+masking-with-MSR happens only in Long mode
* (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it).
* Don't get confused: rflags saving+masking depends on Long Mode Active bit
* (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
* or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
*
* Arguments: * Arguments:
* %eax System call number. * eax system call number
* %ebx Arg1 * ecx return address
* %ecx return EIP * ebx arg1
* %edx Arg3 * ebp arg2 (note: not saved in the stack frame, should not be touched)
* %esi Arg4 * edx arg3
* %edi Arg5 * esi arg4
* %ebp Arg2 [note: not saved in the stack frame, should not be touched] * edi arg5
* %esp user stack * esp user stack
* 0(%esp) Arg6 * 0(%esp) arg6
* *
* Interrupts off.
*
* This is purely a fast path. For anything complicated we use the int 0x80 * This is purely a fast path. For anything complicated we use the int 0x80
* path below. Set up a complete hardware stack frame to share code * path below. We set up a complete hardware stack frame to share code
* with the int 0x80 path. * with the int 0x80 path.
*/ */
ENTRY(ia32_cstar_target) ENTRY(ia32_cstar_target)
CFI_STARTPROC32 simple CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET CFI_DEF_CFA rsp,0
CFI_REGISTER rip,rcx CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/ /*CFI_REGISTER rflags,r11*/
/*
* Interrupts are off on entry.
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
* it is too small to ever cause noticeable irq latency.
*/
SWAPGS_UNSAFE_STACK SWAPGS_UNSAFE_STACK
movl %esp,%r8d movl %esp,%r8d
CFI_REGISTER rsp,r8 CFI_REGISTER rsp,r8
movq PER_CPU_VAR(kernel_stack),%rsp movq PER_CPU_VAR(kernel_stack),%rsp
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs and here we enable it straight after entry:
*/
ENABLE_INTERRUPTS(CLBR_NONE) ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_ARGS 8,0,0
movl %eax,%eax /* zero extension */ /* Zero-extending 32-bit regs, do not remove */
movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movl %eax,%eax
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET /* Construct struct pt_regs on stack */
movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */ pushq_cfi $__USER32_DS /* pt_regs->ss */
pushq_cfi %r8 /* pt_regs->sp */
CFI_REL_OFFSET rsp,0
pushq_cfi %r11 /* pt_regs->flags */
pushq_cfi $__USER32_CS /* pt_regs->cs */
pushq_cfi %rcx /* pt_regs->ip */
CFI_REL_OFFSET rip,0
pushq_cfi_reg rax /* pt_regs->orig_ax */
pushq_cfi_reg rdi /* pt_regs->di */
pushq_cfi_reg rsi /* pt_regs->si */
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rbp /* pt_regs->cx */
movl %ebp,%ecx movl %ebp,%ecx
movq $__USER32_CS,CS-ARGOFFSET(%rsp) pushq_cfi_reg rax /* pt_regs->ax */
movq $__USER32_DS,SS-ARGOFFSET(%rsp) sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
movq %r11,EFLAGS-ARGOFFSET(%rsp) CFI_ADJUST_CFA_OFFSET 10*8
/*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
movq %r8,RSP-ARGOFFSET(%rsp) /*
CFI_REL_OFFSET rsp,RSP-ARGOFFSET * no need to do an access_ok check here because r8 has been
/* no need to do an access_ok check here because r8 has been * 32bit zero extended
32bit zero extended */ */
/* hardware stack frame is complete now */
ASM_STAC ASM_STAC
1: movl (%r8),%r9d 1: movl (%r8),%r9d
_ASM_EXTABLE(1b,ia32_badarg) _ASM_EXTABLE(1b,ia32_badarg)
ASM_CLAC ASM_CLAC
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
CFI_REMEMBER_STATE CFI_REMEMBER_STATE
jnz cstar_tracesys jnz cstar_tracesys
cmpq $IA32_NR_syscalls-1,%rax cmpq $IA32_NR_syscalls-1,%rax
ja ia32_badsys ja ia32_badsys
cstar_do_call: cstar_do_call:
IA32_ARG_FIXUP 1 /* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
/* r9 already loaded */ /* arg6 */
xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
cstar_dispatch: cstar_dispatch:
call *ia32_sys_call_table(,%rax,8) call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp) movq %rax,RAX(%rsp)
DISABLE_INTERRUPTS(CLBR_NONE) DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz sysretl_audit jnz sysretl_audit
sysretl_from_sys_call: sysretl_from_sys_call:
andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
RESTORE_ARGS 0,-ARG_SKIP,0,0,0 RESTORE_RSI_RDI_RDX
movl RIP-ARGOFFSET(%rsp),%ecx movl RIP(%rsp),%ecx
CFI_REGISTER rip,rcx CFI_REGISTER rip,rcx
movl EFLAGS-ARGOFFSET(%rsp),%r11d movl EFLAGS(%rsp),%r11d
/*CFI_REGISTER rflags,r11*/ /*CFI_REGISTER rflags,r11*/
xorq %r10,%r10 xorq %r10,%r10
xorq %r9,%r9 xorq %r9,%r9
xorq %r8,%r8 xorq %r8,%r8
TRACE_IRQS_ON TRACE_IRQS_ON
movl RSP-ARGOFFSET(%rsp),%esp movl RSP(%rsp),%esp
CFI_RESTORE rsp CFI_RESTORE rsp
/*
* 64bit->32bit SYSRET restores eip from ecx,
* eflags from r11 (but RF and VM bits are forced to 0),
* cs and ss are loaded from MSRs.
* (Note: 32bit->32bit SYSRET is different: since r11
* does not exist, it merely sets eflags.IF=1).
*/
USERGS_SYSRET32 USERGS_SYSRET32
#ifdef CONFIG_AUDITSYSCALL #ifdef CONFIG_AUDITSYSCALL
cstar_auditsys: cstar_auditsys:
CFI_RESTORE_STATE CFI_RESTORE_STATE
movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */ movl %r9d,R9(%rsp) /* register to be clobbered by call */
auditsys_entry_common auditsys_entry_common
movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */ movl R9(%rsp),%r9d /* reload 6th syscall arg */
jmp cstar_dispatch jmp cstar_dispatch
sysretl_audit: sysretl_audit:
@ -368,17 +444,17 @@ sysretl_audit:
cstar_tracesys: cstar_tracesys:
#ifdef CONFIG_AUDITSYSCALL #ifdef CONFIG_AUDITSYSCALL
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jz cstar_auditsys jz cstar_auditsys
#endif #endif
xchgl %r9d,%ebp xchgl %r9d,%ebp
SAVE_REST SAVE_EXTRA_REGS
CLEAR_RREGS 0, r9 CLEAR_RREGS r9
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */ movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */
RESTORE_REST RESTORE_EXTRA_REGS
xchgl %ebp,%r9d xchgl %ebp,%r9d
cmpq $(IA32_NR_syscalls-1),%rax cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
@ -391,78 +467,94 @@ ia32_badarg:
jmp ia32_sysret jmp ia32_sysret
CFI_ENDPROC CFI_ENDPROC
/* /*
* Emulated IA32 system calls via int 0x80. * Emulated IA32 system calls via int 0x80.
* *
* Arguments: * Arguments:
* %eax System call number. * eax system call number
* %ebx Arg1 * ebx arg1
* %ecx Arg2 * ecx arg2
* %edx Arg3 * edx arg3
* %esi Arg4 * esi arg4
* %edi Arg5 * edi arg5
* %ebp Arg6 [note: not saved in the stack frame, should not be touched] * ebp arg6 (note: not saved in the stack frame, should not be touched)
* *
* Notes: * Notes:
* Uses the same stack frame as the x86-64 version. * Uses the same stack frame as the x86-64 version.
* All registers except %eax must be saved (but ptrace may violate that) * All registers except eax must be saved (but ptrace may violate that).
* Arguments are zero extended. For system calls that want sign extension and * Arguments are zero extended. For system calls that want sign extension and
* take long arguments a wrapper is needed. Most calls can just be called * take long arguments a wrapper is needed. Most calls can just be called
* directly. * directly.
* Assumes it is only called from user space and entered with interrupts off. * Assumes it is only called from user space and entered with interrupts off.
*/ */
ENTRY(ia32_syscall) ENTRY(ia32_syscall)
CFI_STARTPROC32 simple CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,SS+8-RIP CFI_DEF_CFA rsp,5*8
/*CFI_REL_OFFSET ss,SS-RIP*/ /*CFI_REL_OFFSET ss,4*8 */
CFI_REL_OFFSET rsp,RSP-RIP CFI_REL_OFFSET rsp,3*8
/*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ /*CFI_REL_OFFSET rflags,2*8 */
/*CFI_REL_OFFSET cs,CS-RIP*/ /*CFI_REL_OFFSET cs,1*8 */
CFI_REL_OFFSET rip,RIP-RIP CFI_REL_OFFSET rip,0*8
/*
* Interrupts are off on entry.
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
* it is too small to ever cause noticeable irq latency.
*/
PARAVIRT_ADJUST_EXCEPTION_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME
SWAPGS SWAPGS
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs and here we enable it straight after entry:
*/
ENABLE_INTERRUPTS(CLBR_NONE) ENABLE_INTERRUPTS(CLBR_NONE)
movl %eax,%eax
pushq_cfi %rax /* Zero-extending 32-bit regs, do not remove */
movl %eax,%eax
/* Construct struct pt_regs on stack (iret frame is already on stack) */
pushq_cfi_reg rax /* pt_regs->orig_ax */
pushq_cfi_reg rdi /* pt_regs->di */
pushq_cfi_reg rsi /* pt_regs->si */
pushq_cfi_reg rdx /* pt_regs->dx */
pushq_cfi_reg rcx /* pt_regs->cx */
pushq_cfi_reg rax /* pt_regs->ax */
cld cld
/* note the registers are not zero extended to the sf. sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
this could be a problem. */ CFI_ADJUST_CFA_OFFSET 10*8
SAVE_ARGS 0,1,0
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz ia32_tracesys jnz ia32_tracesys
cmpq $(IA32_NR_syscalls-1),%rax cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys ja ia32_badsys
ia32_do_call: ia32_do_call:
IA32_ARG_FIXUP /* 32bit syscall -> 64bit C ABI argument conversion */
movl %edi,%r8d /* arg5 */
movl %ebp,%r9d /* arg6 */
xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
movl %ebx,%edi /* arg1 */
movl %edx,%edx /* arg3 (zero extension) */
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret: ia32_sysret:
movq %rax,RAX-ARGOFFSET(%rsp) movq %rax,RAX(%rsp)
ia32_ret_from_sys_call: ia32_ret_from_sys_call:
CLEAR_RREGS -ARGOFFSET CLEAR_RREGS
jmp int_ret_from_sys_call jmp int_ret_from_sys_call
ia32_tracesys: ia32_tracesys:
SAVE_REST SAVE_EXTRA_REGS
CLEAR_RREGS CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */ movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
RESTORE_REST RESTORE_EXTRA_REGS
cmpq $(IA32_NR_syscalls-1),%rax cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call jmp ia32_do_call
END(ia32_syscall) END(ia32_syscall)
ia32_badsys: ia32_badsys:
movq $0,ORIG_RAX-ARGOFFSET(%rsp) movq $0,ORIG_RAX(%rsp)
movq $-ENOSYS,%rax movq $-ENOSYS,%rax
jmp ia32_sysret jmp ia32_sysret
@ -479,8 +571,6 @@ GLOBAL(\label)
PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
PTREGSCALL stub32_sigreturn, sys32_sigreturn PTREGSCALL stub32_sigreturn, sys32_sigreturn
PTREGSCALL stub32_execve, compat_sys_execve
PTREGSCALL stub32_execveat, compat_sys_execveat
PTREGSCALL stub32_fork, sys_fork PTREGSCALL stub32_fork, sys_fork
PTREGSCALL stub32_vfork, sys_vfork PTREGSCALL stub32_vfork, sys_vfork
@ -492,24 +582,23 @@ GLOBAL(stub32_clone)
ALIGN ALIGN
ia32_ptregs_common: ia32_ptregs_common:
popq %r11
CFI_ENDPROC CFI_ENDPROC
CFI_STARTPROC32 simple CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,SS+8-ARGOFFSET CFI_DEF_CFA rsp,SIZEOF_PTREGS
CFI_REL_OFFSET rax,RAX-ARGOFFSET CFI_REL_OFFSET rax,RAX
CFI_REL_OFFSET rcx,RCX-ARGOFFSET CFI_REL_OFFSET rcx,RCX
CFI_REL_OFFSET rdx,RDX-ARGOFFSET CFI_REL_OFFSET rdx,RDX
CFI_REL_OFFSET rsi,RSI-ARGOFFSET CFI_REL_OFFSET rsi,RSI
CFI_REL_OFFSET rdi,RDI-ARGOFFSET CFI_REL_OFFSET rdi,RDI
CFI_REL_OFFSET rip,RIP-ARGOFFSET CFI_REL_OFFSET rip,RIP
/* CFI_REL_OFFSET cs,CS-ARGOFFSET*/ /* CFI_REL_OFFSET cs,CS*/
/* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ /* CFI_REL_OFFSET rflags,EFLAGS*/
CFI_REL_OFFSET rsp,RSP-ARGOFFSET CFI_REL_OFFSET rsp,RSP
/* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ /* CFI_REL_OFFSET ss,SS*/
SAVE_REST SAVE_EXTRA_REGS 8
call *%rax call *%rax
RESTORE_REST RESTORE_EXTRA_REGS 8
jmp ia32_sysret /* misbalances the return cache */ ret
CFI_ENDPROC CFI_ENDPROC
END(ia32_ptregs_common) END(ia32_ptregs_common)

View File

@ -1,7 +0,0 @@
#include <linux/kernel.h>
#include <linux/errno.h>
long compat_ni_syscall(void)
{
return -ENOSYS;
}

View File

@ -201,20 +201,6 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
advice); advice);
} }
long sys32_vm86_warning(void)
{
struct task_struct *me = current;
static char lastcomm[sizeof(me->comm)];
if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
compat_printk(KERN_INFO
"%s: vm86 mode not supported on 64 bit kernel\n",
me->comm);
strncpy(lastcomm, me->comm, sizeof(lastcomm));
}
return -ENOSYS;
}
asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
size_t count) size_t count)
{ {

View File

@ -1,25 +0,0 @@
/* System call table for ia32 emulation. */
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
#include <asm/asm-offsets.h>
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
#define __SYSCALL_I386(nr, sym, compat) [nr] = compat,
typedef void (*sys_call_ptr_t)(void);
extern void compat_ni_syscall(void);
const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
[0 ... __NR_ia32_syscall_max] = &compat_ni_syscall,
#include <asm/syscalls_32.h>
};

View File

@ -18,12 +18,63 @@
.endm .endm
#endif #endif
.macro altinstruction_entry orig alt feature orig_len alt_len .macro altinstruction_entry orig alt feature orig_len alt_len pad_len
.long \orig - . .long \orig - .
.long \alt - . .long \alt - .
.word \feature .word \feature
.byte \orig_len .byte \orig_len
.byte \alt_len .byte \alt_len
.byte \pad_len
.endm
.macro ALTERNATIVE oldinstr, newinstr, feature
140:
\oldinstr
141:
.skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
142:
.pushsection .altinstructions,"a"
altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
.popsection
.pushsection .altinstr_replacement,"ax"
143:
\newinstr
144:
.popsection
.endm
#define old_len 141b-140b
#define new_len1 144f-143f
#define new_len2 145f-144f
/*
* max without conditionals. Idea adapted from:
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
*/
#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
140:
\oldinstr
141:
.skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
(alt_max_short(new_len1, new_len2) - (old_len)),0x90
142:
.pushsection .altinstructions,"a"
altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
.popsection
.pushsection .altinstr_replacement,"ax"
143:
\newinstr1
144:
\newinstr2
145:
.popsection
.endm .endm
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */

View File

@ -48,8 +48,9 @@ struct alt_instr {
s32 repl_offset; /* offset to replacement instruction */ s32 repl_offset; /* offset to replacement instruction */
u16 cpuid; /* cpuid bit set for replacement */ u16 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */ u8 instrlen; /* length of original instruction */
u8 replacementlen; /* length of new instruction, <= instrlen */ u8 replacementlen; /* length of new instruction */
}; u8 padlen; /* length of build-time padding */
} __packed;
extern void alternative_instructions(void); extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
@ -76,50 +77,69 @@ static inline int alternatives_text_reserved(void *start, void *end)
} }
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n" #define b_replacement(num) "664"#num
#define e_replacement(num) "665"#num
#define b_replacement(number) "663"#number #define alt_end_marker "663"
#define e_replacement(number) "664"#number #define alt_slen "662b-661b"
#define alt_pad_len alt_end_marker"b-662b"
#define alt_total_slen alt_end_marker"b-661b"
#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
#define alt_slen "662b-661b" #define __OLDINSTR(oldinstr, num) \
#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f" "661:\n\t" oldinstr "\n662:\n" \
".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
"((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
#define ALTINSTR_ENTRY(feature, number) \ #define OLDINSTR(oldinstr, num) \
__OLDINSTR(oldinstr, num) \
alt_end_marker ":\n"
/*
* max without conditionals. Idea adapted from:
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
*
* The additional "-" is needed because gas works with s32s.
*/
#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
/*
* Pad the second replacement alternative with additional NOPs if it is
* additionally longer than the first replacement alternative.
*/
#define OLDINSTR_2(oldinstr, num1, num2) \
"661:\n\t" oldinstr "\n662:\n" \
".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
"(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
alt_end_marker ":\n"
#define ALTINSTR_ENTRY(feature, num) \
" .long 661b - .\n" /* label */ \ " .long 661b - .\n" /* label */ \
" .long " b_replacement(number)"f - .\n" /* new instruction */ \ " .long " b_replacement(num)"f - .\n" /* new instruction */ \
" .word " __stringify(feature) "\n" /* feature bit */ \ " .word " __stringify(feature) "\n" /* feature bit */ \
" .byte " alt_slen "\n" /* source len */ \ " .byte " alt_total_slen "\n" /* source len */ \
" .byte " alt_rlen(number) "\n" /* replacement len */ " .byte " alt_rlen(num) "\n" /* replacement len */ \
" .byte " alt_pad_len "\n" /* pad len */
#define DISCARD_ENTRY(number) /* rlen <= slen */ \ #define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
" .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n" b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \
b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
/* alternative assembly primitive: */ /* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, newinstr, feature) \ #define ALTERNATIVE(oldinstr, newinstr, feature) \
OLDINSTR(oldinstr) \ OLDINSTR(oldinstr, 1) \
".pushsection .altinstructions,\"a\"\n" \ ".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(feature, 1) \ ALTINSTR_ENTRY(feature, 1) \
".popsection\n" \ ".popsection\n" \
".pushsection .discard,\"aw\",@progbits\n" \
DISCARD_ENTRY(1) \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
".popsection" ".popsection"
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
OLDINSTR(oldinstr) \ OLDINSTR_2(oldinstr, 1, 2) \
".pushsection .altinstructions,\"a\"\n" \ ".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(feature1, 1) \ ALTINSTR_ENTRY(feature1, 1) \
ALTINSTR_ENTRY(feature2, 2) \ ALTINSTR_ENTRY(feature2, 2) \
".popsection\n" \ ".popsection\n" \
".pushsection .discard,\"aw\",@progbits\n" \
DISCARD_ENTRY(1) \
DISCARD_ENTRY(2) \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
@ -146,6 +166,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define alternative(oldinstr, newinstr, feature) \ #define alternative(oldinstr, newinstr, feature) \
asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
/* /*
* Alternative inline assembly with input. * Alternative inline assembly with input.
* *

View File

@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
{ {
volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP, alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
ASM_OUTPUT2("=r" (v), "=m" (*addr)), ASM_OUTPUT2("=r" (v), "=m" (*addr)),
ASM_OUTPUT2("0" (v), "m" (*addr))); ASM_OUTPUT2("0" (v), "m" (*addr)));
} }

View File

@ -95,13 +95,11 @@ do { \
* Stop RDTSC speculation. This is needed when you need to use RDTSC * Stop RDTSC speculation. This is needed when you need to use RDTSC
* (or get_cycles or vread that possibly accesses the TSC) in a defined * (or get_cycles or vread that possibly accesses the TSC) in a defined
* code region. * code region.
*
* (Could use an alternative three way for this if there was one.)
*/ */
static __always_inline void rdtsc_barrier(void) static __always_inline void rdtsc_barrier(void)
{ {
alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); "lfence", X86_FEATURE_LFENCE_RDTSC);
} }
#endif /* _ASM_X86_BARRIER_H */ #endif /* _ASM_X86_BARRIER_H */

View File

@ -55,143 +55,157 @@ For 32-bit we have the following conventions - kernel is built with
* for assembly code: * for assembly code:
*/ */
#define R15 0 /* The layout forms the "struct pt_regs" on the stack: */
#define R14 8 /*
#define R13 16 * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
#define R12 24 * unless syscall needs a complete, fully filled "struct pt_regs".
#define RBP 32 */
#define RBX 40 #define R15 0*8
#define R14 1*8
#define R13 2*8
#define R12 3*8
#define RBP 4*8
#define RBX 5*8
/* These regs are callee-clobbered. Always saved on kernel entry. */
#define R11 6*8
#define R10 7*8
#define R9 8*8
#define R8 9*8
#define RAX 10*8
#define RCX 11*8
#define RDX 12*8
#define RSI 13*8
#define RDI 14*8
/*
* On syscall entry, this is syscall#. On CPU exception, this is error code.
* On hw interrupt, it's IRQ number:
*/
#define ORIG_RAX 15*8
/* Return frame for iretq */
#define RIP 16*8
#define CS 17*8
#define EFLAGS 18*8
#define RSP 19*8
#define SS 20*8
/* arguments: interrupts/non tracing syscalls only save up to here: */ #define SIZEOF_PTREGS 21*8
#define R11 48
#define R10 56
#define R9 64
#define R8 72
#define RAX 80
#define RCX 88
#define RDX 96
#define RSI 104
#define RDI 112
#define ORIG_RAX 120 /* + error_code */
/* end of arguments */
/* cpu exception frame or undefined in case of fast syscall: */
#define RIP 128
#define CS 136
#define EFLAGS 144
#define RSP 152
#define SS 160
#define ARGOFFSET R11
.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
subq $9*8+\addskip, %rsp
CFI_ADJUST_CFA_OFFSET 9*8+\addskip
movq_cfi rdi, 8*8
movq_cfi rsi, 7*8
movq_cfi rdx, 6*8
.if \save_rcx
movq_cfi rcx, 5*8
.endif
.if \rax_enosys
movq $-ENOSYS, 4*8(%rsp)
.else
movq_cfi rax, 4*8
.endif
.if \save_r891011
movq_cfi r8, 3*8
movq_cfi r9, 2*8
movq_cfi r10, 1*8
movq_cfi r11, 0*8
.endif
.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
subq $15*8+\addskip, %rsp
CFI_ADJUST_CFA_OFFSET 15*8+\addskip
.endm .endm
#define ARG_SKIP (9*8) .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
.if \r11
movq_cfi r11, 6*8+\offset
.endif
.if \r8910
movq_cfi r10, 7*8+\offset
movq_cfi r9, 8*8+\offset
movq_cfi r8, 9*8+\offset
.endif
.if \rax
movq_cfi rax, 10*8+\offset
.endif
.if \rcx
movq_cfi rcx, 11*8+\offset
.endif
movq_cfi rdx, 12*8+\offset
movq_cfi rsi, 13*8+\offset
movq_cfi rdi, 14*8+\offset
.endm
.macro SAVE_C_REGS offset=0
SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
.endm
.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
.endm
.macro SAVE_C_REGS_EXCEPT_R891011
SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
.endm
.macro SAVE_C_REGS_EXCEPT_RCX_R891011
SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
.endm
.macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
.endm
.macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \ .macro SAVE_EXTRA_REGS offset=0
rstor_r8910=1, rstor_rdx=1 movq_cfi r15, 0*8+\offset
movq_cfi r14, 1*8+\offset
movq_cfi r13, 2*8+\offset
movq_cfi r12, 3*8+\offset
movq_cfi rbp, 4*8+\offset
movq_cfi rbx, 5*8+\offset
.endm
.macro SAVE_EXTRA_REGS_RBP offset=0
movq_cfi rbp, 4*8+\offset
.endm
.macro RESTORE_EXTRA_REGS offset=0
movq_cfi_restore 0*8+\offset, r15
movq_cfi_restore 1*8+\offset, r14
movq_cfi_restore 2*8+\offset, r13
movq_cfi_restore 3*8+\offset, r12
movq_cfi_restore 4*8+\offset, rbp
movq_cfi_restore 5*8+\offset, rbx
.endm
.macro ZERO_EXTRA_REGS
xorl %r15d, %r15d
xorl %r14d, %r14d
xorl %r13d, %r13d
xorl %r12d, %r12d
xorl %ebp, %ebp
xorl %ebx, %ebx
.endm
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
.if \rstor_r11 .if \rstor_r11
movq_cfi_restore 0*8, r11 movq_cfi_restore 6*8, r11
.endif .endif
.if \rstor_r8910 .if \rstor_r8910
movq_cfi_restore 1*8, r10 movq_cfi_restore 7*8, r10
movq_cfi_restore 2*8, r9 movq_cfi_restore 8*8, r9
movq_cfi_restore 3*8, r8 movq_cfi_restore 9*8, r8
.endif .endif
.if \rstor_rax .if \rstor_rax
movq_cfi_restore 4*8, rax movq_cfi_restore 10*8, rax
.endif .endif
.if \rstor_rcx .if \rstor_rcx
movq_cfi_restore 5*8, rcx movq_cfi_restore 11*8, rcx
.endif .endif
.if \rstor_rdx .if \rstor_rdx
movq_cfi_restore 6*8, rdx movq_cfi_restore 12*8, rdx
.endif
movq_cfi_restore 7*8, rsi
movq_cfi_restore 8*8, rdi
.if ARG_SKIP+\addskip > 0
addq $ARG_SKIP+\addskip, %rsp
CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip)
.endif .endif
movq_cfi_restore 13*8, rsi
movq_cfi_restore 14*8, rdi
.endm
.macro RESTORE_C_REGS
RESTORE_C_REGS_HELPER 1,1,1,1,1
.endm
.macro RESTORE_C_REGS_EXCEPT_RAX
RESTORE_C_REGS_HELPER 0,1,1,1,1
.endm
.macro RESTORE_C_REGS_EXCEPT_RCX
RESTORE_C_REGS_HELPER 1,0,1,1,1
.endm
.macro RESTORE_C_REGS_EXCEPT_R11
RESTORE_C_REGS_HELPER 1,1,0,1,1
.endm
.macro RESTORE_C_REGS_EXCEPT_RCX_R11
RESTORE_C_REGS_HELPER 1,0,0,1,1
.endm
.macro RESTORE_RSI_RDI
RESTORE_C_REGS_HELPER 0,0,0,0,0
.endm
.macro RESTORE_RSI_RDI_RDX
RESTORE_C_REGS_HELPER 0,0,0,0,1
.endm .endm
.macro LOAD_ARGS offset, skiprax=0 .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
movq \offset(%rsp), %r11 addq $15*8+\addskip, %rsp
movq \offset+8(%rsp), %r10 CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
movq \offset+16(%rsp), %r9
movq \offset+24(%rsp), %r8
movq \offset+40(%rsp), %rcx
movq \offset+48(%rsp), %rdx
movq \offset+56(%rsp), %rsi
movq \offset+64(%rsp), %rdi
.if \skiprax
.else
movq \offset+72(%rsp), %rax
.endif
.endm
#define REST_SKIP (6*8)
.macro SAVE_REST
subq $REST_SKIP, %rsp
CFI_ADJUST_CFA_OFFSET REST_SKIP
movq_cfi rbx, 5*8
movq_cfi rbp, 4*8
movq_cfi r12, 3*8
movq_cfi r13, 2*8
movq_cfi r14, 1*8
movq_cfi r15, 0*8
.endm
.macro RESTORE_REST
movq_cfi_restore 0*8, r15
movq_cfi_restore 1*8, r14
movq_cfi_restore 2*8, r13
movq_cfi_restore 3*8, r12
movq_cfi_restore 4*8, rbp
movq_cfi_restore 5*8, rbx
addq $REST_SKIP, %rsp
CFI_ADJUST_CFA_OFFSET -(REST_SKIP)
.endm
.macro SAVE_ALL
SAVE_ARGS
SAVE_REST
.endm
.macro RESTORE_ALL addskip=0
RESTORE_REST
RESTORE_ARGS 1, \addskip
.endm .endm
.macro icebp .macro icebp
@ -210,37 +224,23 @@ For 32-bit we have the following conventions - kernel is built with
*/ */
.macro SAVE_ALL .macro SAVE_ALL
pushl_cfi %eax pushl_cfi_reg eax
CFI_REL_OFFSET eax, 0 pushl_cfi_reg ebp
pushl_cfi %ebp pushl_cfi_reg edi
CFI_REL_OFFSET ebp, 0 pushl_cfi_reg esi
pushl_cfi %edi pushl_cfi_reg edx
CFI_REL_OFFSET edi, 0 pushl_cfi_reg ecx
pushl_cfi %esi pushl_cfi_reg ebx
CFI_REL_OFFSET esi, 0
pushl_cfi %edx
CFI_REL_OFFSET edx, 0
pushl_cfi %ecx
CFI_REL_OFFSET ecx, 0
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
.endm .endm
.macro RESTORE_ALL .macro RESTORE_ALL
popl_cfi %ebx popl_cfi_reg ebx
CFI_RESTORE ebx popl_cfi_reg ecx
popl_cfi %ecx popl_cfi_reg edx
CFI_RESTORE ecx popl_cfi_reg esi
popl_cfi %edx popl_cfi_reg edi
CFI_RESTORE edx popl_cfi_reg ebp
popl_cfi %esi popl_cfi_reg eax
CFI_RESTORE esi
popl_cfi %edi
CFI_RESTORE edi
popl_cfi %ebp
CFI_RESTORE ebp
popl_cfi %eax
CFI_RESTORE eax
.endm .endm
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */

View File

@ -301,7 +301,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
sp = task_pt_regs(current)->sp; sp = task_pt_regs(current)->sp;
} else { } else {
/* -128 for the x32 ABI redzone */ /* -128 for the x32 ABI redzone */
sp = this_cpu_read(old_rsp) - 128; sp = task_pt_regs(current)->sp - 128;
} }
return (void __user *)round_down(sp - len, 16); return (void __user *)round_down(sp - len, 16);

View File

@ -231,7 +231,9 @@
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
@ -418,6 +420,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
" .word %P0\n" /* 1: do replace */ " .word %P0\n" /* 1: do replace */
" .byte 2b - 1b\n" /* source len */ " .byte 2b - 1b\n" /* source len */
" .byte 0\n" /* replacement len */ " .byte 0\n" /* replacement len */
" .byte 0\n" /* pad len */
".previous\n" ".previous\n"
/* skipping size check since replacement size = 0 */ /* skipping size check since replacement size = 0 */
: : "i" (X86_FEATURE_ALWAYS) : : t_warn); : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
@ -432,6 +435,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
" .word %P0\n" /* feature bit */ " .word %P0\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */ " .byte 2b - 1b\n" /* source len */
" .byte 0\n" /* replacement len */ " .byte 0\n" /* replacement len */
" .byte 0\n" /* pad len */
".previous\n" ".previous\n"
/* skipping size check since replacement size = 0 */ /* skipping size check since replacement size = 0 */
: : "i" (bit) : : t_no); : : "i" (bit) : : t_no);
@ -457,6 +461,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
" .word %P1\n" /* feature bit */ " .word %P1\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */ " .byte 2b - 1b\n" /* source len */
" .byte 4f - 3f\n" /* replacement len */ " .byte 4f - 3f\n" /* replacement len */
" .byte 0\n" /* pad len */
".previous\n" ".previous\n"
".section .discard,\"aw\",@progbits\n" ".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@ -483,31 +488,30 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
static __always_inline __pure bool _static_cpu_has_safe(u16 bit) static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
{ {
#ifdef CC_HAVE_ASM_GOTO #ifdef CC_HAVE_ASM_GOTO
/* asm_volatile_goto("1: jmp %l[t_dynamic]\n"
* We need to spell the jumps to the compiler because, depending on the offset,
* the replacement jump can be bigger than the original jump, and this we cannot
* have. Thus, we force the jump to the widest, 4-byte, signed relative
* offset even though the last would often fit in less bytes.
*/
asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
"2:\n" "2:\n"
".skip -(((5f-4f) - (2b-1b)) > 0) * "
"((5f-4f) - (2b-1b)),0x90\n"
"3:\n"
".section .altinstructions,\"a\"\n" ".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */ " .long 1b - .\n" /* src offset */
" .long 3f - .\n" /* repl offset */ " .long 4f - .\n" /* repl offset */
" .word %P1\n" /* always replace */ " .word %P1\n" /* always replace */
" .byte 2b - 1b\n" /* src len */ " .byte 3b - 1b\n" /* src len */
" .byte 4f - 3f\n" /* repl len */ " .byte 5f - 4f\n" /* repl len */
" .byte 3b - 2b\n" /* pad len */
".previous\n" ".previous\n"
".section .altinstr_replacement,\"ax\"\n" ".section .altinstr_replacement,\"ax\"\n"
"3: .byte 0xe9\n .long %l[t_no] - 2b\n" "4: jmp %l[t_no]\n"
"4:\n" "5:\n"
".previous\n" ".previous\n"
".section .altinstructions,\"a\"\n" ".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */ " .long 1b - .\n" /* src offset */
" .long 0\n" /* no replacement */ " .long 0\n" /* no replacement */
" .word %P0\n" /* feature bit */ " .word %P0\n" /* feature bit */
" .byte 2b - 1b\n" /* src len */ " .byte 3b - 1b\n" /* src len */
" .byte 0\n" /* repl len */ " .byte 0\n" /* repl len */
" .byte 0\n" /* pad len */
".previous\n" ".previous\n"
: : "i" (bit), "i" (X86_FEATURE_ALWAYS) : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
: : t_dynamic, t_no); : : t_dynamic, t_no);
@ -527,6 +531,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
" .word %P2\n" /* always replace */ " .word %P2\n" /* always replace */
" .byte 2b - 1b\n" /* source len */ " .byte 2b - 1b\n" /* source len */
" .byte 4f - 3f\n" /* replacement len */ " .byte 4f - 3f\n" /* replacement len */
" .byte 0\n" /* pad len */
".previous\n" ".previous\n"
".section .discard,\"aw\",@progbits\n" ".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@ -541,6 +546,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
" .word %P1\n" /* feature bit */ " .word %P1\n" /* feature bit */
" .byte 4b - 3b\n" /* src len */ " .byte 4b - 3b\n" /* src len */
" .byte 6f - 5f\n" /* repl len */ " .byte 6f - 5f\n" /* repl len */
" .byte 0\n" /* pad len */
".previous\n" ".previous\n"
".section .discard,\"aw\",@progbits\n" ".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */ " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */

View File

@ -376,11 +376,16 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
* Pentium F0 0F bugfix can have resulted in the mapped * Pentium F0 0F bugfix can have resulted in the mapped
* IDT being write-protected. * IDT being write-protected.
*/ */
#define set_intr_gate(n, addr) \ #define set_intr_gate_notrace(n, addr) \
do { \ do { \
BUG_ON((unsigned)n > 0xFF); \ BUG_ON((unsigned)n > 0xFF); \
_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \ _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \
__KERNEL_CS); \ __KERNEL_CS); \
} while (0)
#define set_intr_gate(n, addr) \
do { \
set_intr_gate_notrace(n, addr); \
_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\ _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
0, 0, __KERNEL_CS); \ 0, 0, __KERNEL_CS); \
} while (0) } while (0)

View File

@ -86,11 +86,23 @@
CFI_ADJUST_CFA_OFFSET 8 CFI_ADJUST_CFA_OFFSET 8
.endm .endm
.macro pushq_cfi_reg reg
pushq %\reg
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET \reg, 0
.endm
.macro popq_cfi reg .macro popq_cfi reg
popq \reg popq \reg
CFI_ADJUST_CFA_OFFSET -8 CFI_ADJUST_CFA_OFFSET -8
.endm .endm
.macro popq_cfi_reg reg
popq %\reg
CFI_ADJUST_CFA_OFFSET -8
CFI_RESTORE \reg
.endm
.macro pushfq_cfi .macro pushfq_cfi
pushfq pushfq
CFI_ADJUST_CFA_OFFSET 8 CFI_ADJUST_CFA_OFFSET 8
@ -116,11 +128,23 @@
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
.endm .endm
.macro pushl_cfi_reg reg
pushl %\reg
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET \reg, 0
.endm
.macro popl_cfi reg .macro popl_cfi reg
popl \reg popl \reg
CFI_ADJUST_CFA_OFFSET -4 CFI_ADJUST_CFA_OFFSET -4
.endm .endm
.macro popl_cfi_reg reg
popl %\reg
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE \reg
.endm
.macro pushfl_cfi .macro pushfl_cfi
pushfl pushfl
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4

View File

@ -171,10 +171,11 @@ do { \
static inline void elf_common_init(struct thread_struct *t, static inline void elf_common_init(struct thread_struct *t,
struct pt_regs *regs, const u16 ds) struct pt_regs *regs, const u16 ds)
{ {
regs->ax = regs->bx = regs->cx = regs->dx = 0; /* Commented-out registers are cleared in stub_execve */
regs->si = regs->di = regs->bp = 0; /*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0;
regs->si = regs->di /*= regs->bp*/ = 0;
regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; /*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/
t->fs = t->gs = 0; t->fs = t->gs = 0;
t->fsindex = t->gsindex = 0; t->fsindex = t->gsindex = 0;
t->ds = t->es = ds; t->ds = t->es = ds;

View File

@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
extern __visible void smp_invalidate_interrupt(struct pt_regs *); extern __visible void smp_invalidate_interrupt(struct pt_regs *);
#endif #endif
extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR extern char irq_entries_start[];
- FIRST_EXTERNAL_VECTOR])(void);
#ifdef CONFIG_TRACING #ifdef CONFIG_TRACING
#define trace_interrupt interrupt #define trace_irq_entries_start irq_entries_start
#endif #endif
#define VECTOR_UNDEFINED (-1) #define VECTOR_UNDEFINED (-1)

View File

@ -69,7 +69,7 @@ struct insn {
const insn_byte_t *next_byte; const insn_byte_t *next_byte;
}; };
#define MAX_INSN_SIZE 16 #define MAX_INSN_SIZE 15
#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)

View File

@ -136,10 +136,6 @@ static inline notrace unsigned long arch_local_irq_save(void)
#define USERGS_SYSRET32 \ #define USERGS_SYSRET32 \
swapgs; \ swapgs; \
sysretl sysretl
#define ENABLE_INTERRUPTS_SYSEXIT32 \
swapgs; \
sti; \
sysexit
#else #else
#define INTERRUPT_RETURN iret #define INTERRUPT_RETURN iret
@ -163,33 +159,9 @@ static inline int arch_irqs_disabled(void)
return arch_irqs_disabled_flags(flags); return arch_irqs_disabled_flags(flags);
} }
#endif /* !__ASSEMBLY__ */
#else #ifdef __ASSEMBLY__
#ifdef CONFIG_X86_64
#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
#define ARCH_LOCKDEP_SYS_EXIT_IRQ \
TRACE_IRQS_ON; \
sti; \
SAVE_REST; \
LOCKDEP_SYS_EXIT; \
RESTORE_REST; \
cli; \
TRACE_IRQS_OFF;
#else
#define ARCH_LOCKDEP_SYS_EXIT \
pushl %eax; \
pushl %ecx; \
pushl %edx; \
call lockdep_sys_exit; \
popl %edx; \
popl %ecx; \
popl %eax;
#define ARCH_LOCKDEP_SYS_EXIT_IRQ
#endif
#ifdef CONFIG_TRACE_IRQFLAGS #ifdef CONFIG_TRACE_IRQFLAGS
# define TRACE_IRQS_ON call trace_hardirqs_on_thunk; # define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; # define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
@ -198,12 +170,29 @@ static inline int arch_irqs_disabled(void)
# define TRACE_IRQS_OFF # define TRACE_IRQS_OFF
#endif #endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC #ifdef CONFIG_DEBUG_LOCK_ALLOC
# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT # ifdef CONFIG_X86_64
# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ # define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
# else # define LOCKDEP_SYS_EXIT_IRQ \
TRACE_IRQS_ON; \
sti; \
call lockdep_sys_exit_thunk; \
cli; \
TRACE_IRQS_OFF;
# else
# define LOCKDEP_SYS_EXIT \
pushl %eax; \
pushl %ecx; \
pushl %edx; \
call lockdep_sys_exit; \
popl %edx; \
popl %ecx; \
popl %eax;
# define LOCKDEP_SYS_EXIT_IRQ
# endif
#else
# define LOCKDEP_SYS_EXIT # define LOCKDEP_SYS_EXIT
# define LOCKDEP_SYS_EXIT_IRQ # define LOCKDEP_SYS_EXIT_IRQ
# endif #endif
#endif /* __ASSEMBLY__ */
#endif /* __ASSEMBLY__ */
#endif #endif

View File

@ -976,11 +976,6 @@ extern void default_banner(void);
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
CLBR_NONE, \ CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
#define ENABLE_INTERRUPTS_SYSEXIT32 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
#endif /* CONFIG_X86_32 */ #endif /* CONFIG_X86_32 */
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */

View File

@ -210,8 +210,23 @@ struct x86_hw_tss {
unsigned long sp0; unsigned long sp0;
unsigned short ss0, __ss0h; unsigned short ss0, __ss0h;
unsigned long sp1; unsigned long sp1;
/* ss1 caches MSR_IA32_SYSENTER_CS: */
unsigned short ss1, __ss1h; /*
* We don't use ring 1, so ss1 is a convenient scratch space in
* the same cacheline as sp0. We use ss1 to cache the value in
* MSR_IA32_SYSENTER_CS. When we context switch
* MSR_IA32_SYSENTER_CS, we first check if the new value being
* written matches ss1, and, if it's not, then we wrmsr the new
* value and update ss1.
*
* The only reason we context switch MSR_IA32_SYSENTER_CS is
* that we set it to zero in vm86 tasks to avoid corrupting the
* stack if we were to go through the sysenter path from vm86
* mode.
*/
unsigned short ss1; /* MSR_IA32_SYSENTER_CS */
unsigned short __ss1h;
unsigned long sp2; unsigned long sp2;
unsigned short ss2, __ss2h; unsigned short ss2, __ss2h;
unsigned long __cr3; unsigned long __cr3;
@ -276,13 +291,17 @@ struct tss_struct {
unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
/* /*
* .. and then another 0x100 bytes for the emergency kernel stack: * Space for the temporary SYSENTER stack:
*/ */
unsigned long stack[64]; unsigned long SYSENTER_stack[64];
} ____cacheline_aligned; } ____cacheline_aligned;
DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss); DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#endif
/* /*
* Save the original ist values for checking stack pointers during debugging * Save the original ist values for checking stack pointers during debugging
@ -474,7 +493,6 @@ struct thread_struct {
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
unsigned long sysenter_cs; unsigned long sysenter_cs;
#else #else
unsigned long usersp; /* Copy from PDA */
unsigned short es; unsigned short es;
unsigned short ds; unsigned short ds;
unsigned short fsindex; unsigned short fsindex;
@ -564,6 +582,16 @@ static inline void native_swapgs(void)
#endif #endif
} }
static inline unsigned long current_top_of_stack(void)
{
#ifdef CONFIG_X86_64
return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
#else
/* sp0 on x86_32 is special in and around vm86 mode. */
return this_cpu_read_stable(cpu_current_top_of_stack);
#endif
}
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h> #include <asm/paravirt.h>
#else #else
@ -761,10 +789,10 @@ extern char ignore_fpu_irq;
#define ARCH_HAS_SPINLOCK_PREFETCH #define ARCH_HAS_SPINLOCK_PREFETCH
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
# define BASE_PREFETCH ASM_NOP4 # define BASE_PREFETCH ""
# define ARCH_HAS_PREFETCH # define ARCH_HAS_PREFETCH
#else #else
# define BASE_PREFETCH "prefetcht0 (%1)" # define BASE_PREFETCH "prefetcht0 %P1"
#endif #endif
/* /*
@ -775,10 +803,9 @@ extern char ignore_fpu_irq;
*/ */
static inline void prefetch(const void *x) static inline void prefetch(const void *x)
{ {
alternative_input(BASE_PREFETCH, alternative_input(BASE_PREFETCH, "prefetchnta %P1",
"prefetchnta (%1)",
X86_FEATURE_XMM, X86_FEATURE_XMM,
"r" (x)); "m" (*(const char *)x));
} }
/* /*
@ -788,10 +815,9 @@ static inline void prefetch(const void *x)
*/ */
static inline void prefetchw(const void *x) static inline void prefetchw(const void *x)
{ {
alternative_input(BASE_PREFETCH, alternative_input(BASE_PREFETCH, "prefetchw %P1",
"prefetchw (%1)", X86_FEATURE_3DNOWPREFETCH,
X86_FEATURE_3DNOW, "m" (*(const char *)x));
"r" (x));
} }
static inline void spin_lock_prefetch(const void *x) static inline void spin_lock_prefetch(const void *x)
@ -799,6 +825,9 @@ static inline void spin_lock_prefetch(const void *x)
prefetchw(x); prefetchw(x);
} }
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
TOP_OF_KERNEL_STACK_PADDING)
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* /*
* User space process size: 3GB (default). * User space process size: 3GB (default).
@ -809,39 +838,16 @@ static inline void spin_lock_prefetch(const void *x)
#define STACK_TOP_MAX STACK_TOP #define STACK_TOP_MAX STACK_TOP
#define INIT_THREAD { \ #define INIT_THREAD { \
.sp0 = sizeof(init_stack) + (long)&init_stack, \ .sp0 = TOP_OF_INIT_STACK, \
.vm86_info = NULL, \ .vm86_info = NULL, \
.sysenter_cs = __KERNEL_CS, \ .sysenter_cs = __KERNEL_CS, \
.io_bitmap_ptr = NULL, \ .io_bitmap_ptr = NULL, \
} }
/*
* Note that the .io_bitmap member must be extra-big. This is because
* the CPU will access an additional byte beyond the end of the IO
* permission bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
#define INIT_TSS { \
.x86_tss = { \
.sp0 = sizeof(init_stack) + (long)&init_stack, \
.ss0 = __KERNEL_DS, \
.ss1 = __KERNEL_CS, \
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
}, \
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \
}
extern unsigned long thread_saved_pc(struct task_struct *tsk); extern unsigned long thread_saved_pc(struct task_struct *tsk);
#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long))
#define KSTK_TOP(info) \
({ \
unsigned long *__ptr = (unsigned long *)(info); \
(unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \
})
/* /*
* The below -8 is to reserve 8 bytes on top of the ring0 stack. * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
* This is necessary to guarantee that the entire "struct pt_regs" * This is necessary to guarantee that the entire "struct pt_regs"
* is accessible even if the CPU haven't stored the SS/ESP registers * is accessible even if the CPU haven't stored the SS/ESP registers
* on the stack (interrupt gate does not save these registers * on the stack (interrupt gate does not save these registers
@ -850,11 +856,11 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
* "struct pt_regs" is possible, but they may contain the * "struct pt_regs" is possible, but they may contain the
* completely wrong values. * completely wrong values.
*/ */
#define task_pt_regs(task) \ #define task_pt_regs(task) \
({ \ ({ \
struct pt_regs *__regs__; \ unsigned long __ptr = (unsigned long)task_stack_page(task); \
__regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
__regs__ - 1; \ ((struct pt_regs *)__ptr) - 1; \
}) })
#define KSTK_ESP(task) (task_pt_regs(task)->sp) #define KSTK_ESP(task) (task_pt_regs(task)->sp)
@ -886,11 +892,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
#define STACK_TOP_MAX TASK_SIZE_MAX #define STACK_TOP_MAX TASK_SIZE_MAX
#define INIT_THREAD { \ #define INIT_THREAD { \
.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ .sp0 = TOP_OF_INIT_STACK \
}
#define INIT_TSS { \
.x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
} }
/* /*
@ -902,11 +904,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
extern unsigned long KSTK_ESP(struct task_struct *task); extern unsigned long KSTK_ESP(struct task_struct *task);
/*
* User space RSP while inside the SYSCALL fast path
*/
DECLARE_PER_CPU(unsigned long, old_rsp);
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern void start_thread(struct pt_regs *regs, unsigned long new_ip,

View File

@ -31,13 +31,17 @@ struct pt_regs {
#else /* __i386__ */ #else /* __i386__ */
struct pt_regs { struct pt_regs {
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
*/
unsigned long r15; unsigned long r15;
unsigned long r14; unsigned long r14;
unsigned long r13; unsigned long r13;
unsigned long r12; unsigned long r12;
unsigned long bp; unsigned long bp;
unsigned long bx; unsigned long bx;
/* arguments: non interrupts/non tracing syscalls only save up to here*/ /* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11; unsigned long r11;
unsigned long r10; unsigned long r10;
unsigned long r9; unsigned long r9;
@ -47,9 +51,12 @@ struct pt_regs {
unsigned long dx; unsigned long dx;
unsigned long si; unsigned long si;
unsigned long di; unsigned long di;
/*
* On syscall entry, this is syscall#. On CPU exception, this is error code.
* On hw interrupt, it's IRQ number:
*/
unsigned long orig_ax; unsigned long orig_ax;
/* end of arguments */ /* Return frame for iretq */
/* cpu exception frame or undefined */
unsigned long ip; unsigned long ip;
unsigned long cs; unsigned long cs;
unsigned long flags; unsigned long flags;
@ -89,11 +96,13 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
} }
/* /*
* user_mode_vm(regs) determines whether a register set came from user mode. * user_mode(regs) determines whether a register set came from user
* This is true if V8086 mode was enabled OR if the register set was from * mode. On x86_32, this is true if V8086 mode was enabled OR if the
* protected mode with RPL-3 CS value. This tricky test checks that with * register set was from protected mode with RPL-3 CS value. This
* one comparison. Many places in the kernel can bypass this full check * tricky test checks that with one comparison.
* if they have already ruled out V8086 mode, so user_mode(regs) can be used. *
* On x86_64, vm86 mode is mercifully nonexistent, and we don't need
* the extra check.
*/ */
static inline int user_mode(struct pt_regs *regs) static inline int user_mode(struct pt_regs *regs)
{ {
@ -104,16 +113,6 @@ static inline int user_mode(struct pt_regs *regs)
#endif #endif
} }
static inline int user_mode_vm(struct pt_regs *regs)
{
#ifdef CONFIG_X86_32
return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >=
USER_RPL;
#else
return user_mode(regs);
#endif
}
static inline int v8086_mode(struct pt_regs *regs) static inline int v8086_mode(struct pt_regs *regs)
{ {
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
@ -138,12 +137,8 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#endif #endif
} }
#define current_user_stack_pointer() this_cpu_read(old_rsp) #define current_user_stack_pointer() current_pt_regs()->sp
/* ia32 vs. x32 difference */ #define compat_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() \
(test_thread_flag(TIF_IA32) \
? current_pt_regs()->sp \
: this_cpu_read(old_rsp))
#endif #endif
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
@ -248,7 +243,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
*/ */
#define arch_ptrace_stop_needed(code, info) \ #define arch_ptrace_stop_needed(code, info) \
({ \ ({ \
set_thread_flag(TIF_NOTIFY_RESUME); \ force_iret(); \
false; \ false; \
}) })

View File

@ -3,8 +3,10 @@
#include <linux/const.h> #include <linux/const.h>
/* Constructor for a conventional segment GDT (or LDT) entry */ /*
/* This is a macro so it can be used in initializers */ * Constructor for a conventional segment GDT (or LDT) entry.
* This is a macro so it can be used in initializers.
*/
#define GDT_ENTRY(flags, base, limit) \ #define GDT_ENTRY(flags, base, limit) \
((((base) & _AC(0xff000000,ULL)) << (56-24)) | \ ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \
(((flags) & _AC(0x0000f0ff,ULL)) << 40) | \ (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \
@ -12,198 +14,228 @@
(((base) & _AC(0x00ffffff,ULL)) << 16) | \ (((base) & _AC(0x00ffffff,ULL)) << 16) | \
(((limit) & _AC(0x0000ffff,ULL)))) (((limit) & _AC(0x0000ffff,ULL))))
/* Simple and small GDT entries for booting only */ /* Simple and small GDT entries for booting only: */
#define GDT_ENTRY_BOOT_CS 2 #define GDT_ENTRY_BOOT_CS 2
#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8) #define GDT_ENTRY_BOOT_DS 3
#define GDT_ENTRY_BOOT_TSS 4
#define __BOOT_CS (GDT_ENTRY_BOOT_CS*8)
#define __BOOT_DS (GDT_ENTRY_BOOT_DS*8)
#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS*8)
#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1) /*
#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8) * Bottom two bits of selector give the ring
* privilege level
*/
#define SEGMENT_RPL_MASK 0x3
#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2) /* User mode is privilege level 3: */
#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8) #define USER_RPL 0x3
#define SEGMENT_RPL_MASK 0x3 /* /* Bit 2 is Table Indicator (TI): selects between LDT or GDT */
* Bottom two bits of selector give the ring #define SEGMENT_TI_MASK 0x4
* privilege level /* LDT segment has TI set ... */
*/ #define SEGMENT_LDT 0x4
#define SEGMENT_TI_MASK 0x4 /* Bit 2 is table indicator (LDT/GDT) */ /* ... GDT has it cleared */
#define USER_RPL 0x3 /* User mode is privilege level 3 */ #define SEGMENT_GDT 0x0
#define SEGMENT_LDT 0x4 /* LDT segment has TI set... */
#define SEGMENT_GDT 0x0 /* ... GDT has it cleared */ #define GDT_ENTRY_INVALID_SEG 0
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* /*
* The layout of the per-CPU GDT under Linux: * The layout of the per-CPU GDT under Linux:
* *
* 0 - null * 0 - null <=== cacheline #1
* 1 - reserved * 1 - reserved
* 2 - reserved * 2 - reserved
* 3 - reserved * 3 - reserved
* *
* 4 - unused <==== new cacheline * 4 - unused <=== cacheline #2
* 5 - unused * 5 - unused
* *
* ------- start of TLS (Thread-Local Storage) segments: * ------- start of TLS (Thread-Local Storage) segments:
* *
* 6 - TLS segment #1 [ glibc's TLS segment ] * 6 - TLS segment #1 [ glibc's TLS segment ]
* 7 - TLS segment #2 [ Wine's %fs Win32 segment ] * 7 - TLS segment #2 [ Wine's %fs Win32 segment ]
* 8 - TLS segment #3 * 8 - TLS segment #3 <=== cacheline #3
* 9 - reserved * 9 - reserved
* 10 - reserved * 10 - reserved
* 11 - reserved * 11 - reserved
* *
* ------- start of kernel segments: * ------- start of kernel segments:
* *
* 12 - kernel code segment <==== new cacheline * 12 - kernel code segment <=== cacheline #4
* 13 - kernel data segment * 13 - kernel data segment
* 14 - default user CS * 14 - default user CS
* 15 - default user DS * 15 - default user DS
* 16 - TSS * 16 - TSS <=== cacheline #5
* 17 - LDT * 17 - LDT
* 18 - PNPBIOS support (16->32 gate) * 18 - PNPBIOS support (16->32 gate)
* 19 - PNPBIOS support * 19 - PNPBIOS support
* 20 - PNPBIOS support * 20 - PNPBIOS support <=== cacheline #6
* 21 - PNPBIOS support * 21 - PNPBIOS support
* 22 - PNPBIOS support * 22 - PNPBIOS support
* 23 - APM BIOS support * 23 - APM BIOS support
* 24 - APM BIOS support * 24 - APM BIOS support <=== cacheline #7
* 25 - APM BIOS support * 25 - APM BIOS support
* *
* 26 - ESPFIX small SS * 26 - ESPFIX small SS
* 27 - per-cpu [ offset to per-cpu data area ] * 27 - per-cpu [ offset to per-cpu data area ]
* 28 - stack_canary-20 [ for stack protector ] * 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8
* 29 - unused * 29 - unused
* 30 - unused * 30 - unused
* 31 - TSS for double fault handler * 31 - TSS for double fault handler
*/ */
#define GDT_ENTRY_TLS_MIN 6 #define GDT_ENTRY_TLS_MIN 6
#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) #define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
#define GDT_ENTRY_KERNEL_CS 12
#define GDT_ENTRY_KERNEL_DS 13
#define GDT_ENTRY_DEFAULT_USER_CS 14 #define GDT_ENTRY_DEFAULT_USER_CS 14
#define GDT_ENTRY_DEFAULT_USER_DS 15 #define GDT_ENTRY_DEFAULT_USER_DS 15
#define GDT_ENTRY_TSS 16
#define GDT_ENTRY_LDT 17
#define GDT_ENTRY_PNPBIOS_CS32 18
#define GDT_ENTRY_PNPBIOS_CS16 19
#define GDT_ENTRY_PNPBIOS_DS 20
#define GDT_ENTRY_PNPBIOS_TS1 21
#define GDT_ENTRY_PNPBIOS_TS2 22
#define GDT_ENTRY_APMBIOS_BASE 23
#define GDT_ENTRY_KERNEL_BASE (12) #define GDT_ENTRY_ESPFIX_SS 26
#define GDT_ENTRY_PERCPU 27
#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE+0) #define GDT_ENTRY_STACK_CANARY 28
#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE+1)
#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE+4)
#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE+5)
#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE+6)
#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE+11)
#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE+14)
#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)
#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE+15)
#ifdef CONFIG_SMP
#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
#else
#define __KERNEL_PERCPU 0
#endif
#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE+16)
#ifdef CONFIG_CC_STACKPROTECTOR
#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
#else
#define __KERNEL_STACK_CANARY 0
#endif
#define GDT_ENTRY_DOUBLEFAULT_TSS 31 #define GDT_ENTRY_DOUBLEFAULT_TSS 31
/* /*
* The GDT has 32 entries * Number of entries in the GDT table:
*/ */
#define GDT_ENTRIES 32 #define GDT_ENTRIES 32
/* The PnP BIOS entries in the GDT */
#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0)
#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1)
#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2)
#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3)
#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4)
/* The PnP BIOS selectors */
#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */
#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */
#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */
#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */
#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */
/* /*
* Matching rules for certain types of segments. * Segment selector values corresponding to the above entries:
*/ */
/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)
/* segment for calling fn: */
#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32*8)
/* code segment for BIOS: */
#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16*8)
/* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */
#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == PNP_CS32)
/* data segment for BIOS: */
#define PNP_DS (GDT_ENTRY_PNPBIOS_DS*8)
/* transfer data segment: */
#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1*8)
/* another data segment: */
#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2*8)
#ifdef CONFIG_SMP
# define __KERNEL_PERCPU (GDT_ENTRY_PERCPU*8)
#else #else
# define __KERNEL_PERCPU 0
#endif
#ifdef CONFIG_CC_STACKPROTECTOR
# define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
#else
# define __KERNEL_STACK_CANARY 0
#endif
#else /* 64-bit: */
#include <asm/cache.h> #include <asm/cache.h>
#define GDT_ENTRY_KERNEL32_CS 1 #define GDT_ENTRY_KERNEL32_CS 1
#define GDT_ENTRY_KERNEL_CS 2 #define GDT_ENTRY_KERNEL_CS 2
#define GDT_ENTRY_KERNEL_DS 3 #define GDT_ENTRY_KERNEL_DS 3
#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8)
/* /*
* we cannot use the same code segment descriptor for user and kernel * We cannot use the same code segment descriptor for user and kernel mode,
* -- not even in the long flat mode, because of different DPL /kkeil * not even in long flat mode, because of different DPL.
* The segment offset needs to contain a RPL. Grr. -AK *
* GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) * GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes
* selectors:
*
* if returning to 32-bit userspace: cs = STAR.SYSRET_CS,
* if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16,
*
* ss = STAR.SYSRET_CS+8 (in either case)
*
* thus USER_DS should be between 32-bit and 64-bit code selectors:
*/ */
#define GDT_ENTRY_DEFAULT_USER32_CS 4 #define GDT_ENTRY_DEFAULT_USER32_CS 4
#define GDT_ENTRY_DEFAULT_USER_DS 5 #define GDT_ENTRY_DEFAULT_USER_DS 5
#define GDT_ENTRY_DEFAULT_USER_CS 6 #define GDT_ENTRY_DEFAULT_USER_CS 6
#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8+3)
#define __USER32_DS __USER_DS
#define GDT_ENTRY_TSS 8 /* needs two entries */ /* Needs two entries */
#define GDT_ENTRY_LDT 10 /* needs two entries */ #define GDT_ENTRY_TSS 8
#define GDT_ENTRY_TLS_MIN 12 /* Needs two entries */
#define GDT_ENTRY_TLS_MAX 14 #define GDT_ENTRY_LDT 10
#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */ #define GDT_ENTRY_TLS_MIN 12
#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3) #define GDT_ENTRY_TLS_MAX 14
/* TLS indexes for 64bit - hardcoded in arch_prctl */ /* Abused to load per CPU data from limit */
#define FS_TLS 0 #define GDT_ENTRY_PER_CPU 15
#define GS_TLS 1
#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) /*
#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) * Number of entries in the GDT table:
*/
#define GDT_ENTRIES 16
#define GDT_ENTRIES 16 /*
* Segment selector values corresponding to the above entries:
*
* Note, selectors also need to have a correct RPL,
* expressed with the +3 value for user-space selectors:
*/
#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS*8)
#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3)
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
#define __USER32_DS __USER_DS
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3)
/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
#define FS_TLS 0
#define GS_TLS 1
#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
#endif #endif
#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8+3)
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8+3)
#ifndef CONFIG_PARAVIRT #ifndef CONFIG_PARAVIRT
#define get_kernel_rpl() 0 # define get_kernel_rpl() 0
#endif #endif
#define IDT_ENTRIES 256 #define IDT_ENTRIES 256
#define NUM_EXCEPTION_VECTORS 32 #define NUM_EXCEPTION_VECTORS 32
/* Bitmask of exception vectors which push an error code on the stack */
#define EXCEPTION_ERRCODE_MASK 0x00027d00 /* Bitmask of exception vectors which push an error code on the stack: */
#define GDT_SIZE (GDT_ENTRIES * 8) #define EXCEPTION_ERRCODE_MASK 0x00027d00
#define GDT_ENTRY_TLS_ENTRIES 3
#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) #define GDT_SIZE (GDT_ENTRIES*8)
#define GDT_ENTRY_TLS_ENTRIES 3
#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8)
#ifdef __KERNEL__ #ifdef __KERNEL__
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5]; extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
#ifdef CONFIG_TRACING #ifdef CONFIG_TRACING
#define trace_early_idt_handlers early_idt_handlers # define trace_early_idt_handlers early_idt_handlers
#endif #endif
/* /*
@ -228,37 +260,30 @@ do { \
} while (0) } while (0)
/* /*
* Save a segment register away * Save a segment register away:
*/ */
#define savesegment(seg, value) \ #define savesegment(seg, value) \
asm("mov %%" #seg ",%0":"=r" (value) : : "memory") asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
/* /*
* x86_32 user gs accessors. * x86-32 user GS accessors:
*/ */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
#ifdef CONFIG_X86_32_LAZY_GS # ifdef CONFIG_X86_32_LAZY_GS
#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) # define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) # define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
#define task_user_gs(tsk) ((tsk)->thread.gs) # define task_user_gs(tsk) ((tsk)->thread.gs)
#define lazy_save_gs(v) savesegment(gs, (v)) # define lazy_save_gs(v) savesegment(gs, (v))
#define lazy_load_gs(v) loadsegment(gs, (v)) # define lazy_load_gs(v) loadsegment(gs, (v))
#else /* X86_32_LAZY_GS */ # else /* X86_32_LAZY_GS */
#define get_user_gs(regs) (u16)((regs)->gs) # define get_user_gs(regs) (u16)((regs)->gs)
#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) # define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
#define task_user_gs(tsk) (task_pt_regs(tsk)->gs) # define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
#define lazy_save_gs(v) do { } while (0) # define lazy_save_gs(v) do { } while (0)
#define lazy_load_gs(v) do { } while (0) # define lazy_load_gs(v) do { } while (0)
#endif /* X86_32_LAZY_GS */ # endif /* X86_32_LAZY_GS */
#endif /* X86_32 */ #endif /* X86_32 */
static inline unsigned long get_limit(unsigned long segment)
{
unsigned long __limit;
asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
return __limit + 1;
}
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */

View File

@ -66,6 +66,11 @@ static inline void x86_ce4100_early_setup(void) { }
*/ */
extern struct boot_params boot_params; extern struct boot_params boot_params;
static inline bool kaslr_enabled(void)
{
return !!(boot_params.hdr.loadflags & KASLR_FLAG);
}
/* /*
* Do NOT EVER look at the BIOS memory size location. * Do NOT EVER look at the BIOS memory size location.
* It does not work on many machines. * It does not work on many machines.

View File

@ -57,9 +57,9 @@ struct sigcontext {
unsigned long ip; unsigned long ip;
unsigned long flags; unsigned long flags;
unsigned short cs; unsigned short cs;
unsigned short gs; unsigned short __pad2; /* Was called gs, but was always zero. */
unsigned short fs; unsigned short __pad1; /* Was called fs, but was always zero. */
unsigned short __pad0; unsigned short ss;
unsigned long err; unsigned long err;
unsigned long trapno; unsigned long trapno;
unsigned long oldmask; unsigned long oldmask;

View File

@ -13,9 +13,7 @@
X86_EFLAGS_CF | X86_EFLAGS_RF) X86_EFLAGS_CF | X86_EFLAGS_RF)
void signal_fault(struct pt_regs *regs, void __user *frame, char *where); void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned long *pax);
int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask); struct pt_regs *regs, unsigned long mask);

View File

@ -27,23 +27,11 @@
#ifdef CONFIG_X86_SMAP #ifdef CONFIG_X86_SMAP
#define ASM_CLAC \ #define ASM_CLAC \
661: ASM_NOP3 ; \ ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
.pushsection .altinstr_replacement, "ax" ; \
662: __ASM_CLAC ; \
.popsection ; \
.pushsection .altinstructions, "a" ; \
altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
.popsection
#define ASM_STAC \ #define ASM_STAC \
661: ASM_NOP3 ; \ ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
.pushsection .altinstr_replacement, "ax" ; \
662: __ASM_STAC ; \
.popsection ; \
.pushsection .altinstructions, "a" ; \
altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
.popsection
#else /* CONFIG_X86_SMAP */ #else /* CONFIG_X86_SMAP */
@ -61,20 +49,20 @@
static __always_inline void clac(void) static __always_inline void clac(void)
{ {
/* Note: a barrier is implicit in alternative() */ /* Note: a barrier is implicit in alternative() */
alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP); alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
} }
static __always_inline void stac(void) static __always_inline void stac(void)
{ {
/* Note: a barrier is implicit in alternative() */ /* Note: a barrier is implicit in alternative() */
alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP); alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
} }
/* These macros can be used in asm() statements */ /* These macros can be used in asm() statements */
#define ASM_CLAC \ #define ASM_CLAC \
ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP) ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
#define ASM_STAC \ #define ASM_STAC \
ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP) ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
#else /* CONFIG_X86_SMAP */ #else /* CONFIG_X86_SMAP */

View File

@ -154,6 +154,7 @@ void cpu_die_common(unsigned int cpu);
void native_smp_prepare_boot_cpu(void); void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus); void native_smp_prepare_cpus(unsigned int max_cpus);
void native_smp_cpus_done(unsigned int max_cpus); void native_smp_cpus_done(unsigned int max_cpus);
void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_disable(void); int native_cpu_disable(void);
void native_cpu_die(unsigned int cpu); void native_cpu_die(unsigned int cpu);

View File

@ -4,6 +4,8 @@
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <asm/nops.h>
static inline void native_clts(void) static inline void native_clts(void)
{ {
asm volatile("clts"); asm volatile("clts");
@ -199,6 +201,28 @@ static inline void clflushopt(volatile void *__p)
"+m" (*(volatile char __force *)__p)); "+m" (*(volatile char __force *)__p));
} }
static inline void clwb(volatile void *__p)
{
volatile struct { char x[64]; } *p = __p;
asm volatile(ALTERNATIVE_2(
".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
X86_FEATURE_CLFLUSHOPT,
".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */
X86_FEATURE_CLWB)
: [p] "+m" (*p)
: [pax] "a" (p));
}
static inline void pcommit_sfence(void)
{
alternative(ASM_NOP7,
".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
"sfence",
X86_FEATURE_PCOMMIT);
}
#define nop() asm volatile ("nop") #define nop() asm volatile ("nop")

View File

@ -12,6 +12,33 @@
#include <asm/percpu.h> #include <asm/percpu.h>
#include <asm/types.h> #include <asm/types.h>
/*
* TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
* reserve at the top of the kernel stack. We do it because of a nasty
* 32-bit corner case. On x86_32, the hardware stack frame is
* variable-length. Except for vm86 mode, struct pt_regs assumes a
* maximum-length frame. If we enter from CPL 0, the top 8 bytes of
* pt_regs don't actually exist. Ordinarily this doesn't matter, but it
* does in at least one case:
*
* If we take an NMI early enough in SYSENTER, then we can end up with
* pt_regs that extends above sp0. On the way out, in the espfix code,
* we can read the saved SS value, but that value will be above sp0.
* Without this offset, that can result in a page fault. (We are
* careful that, in this case, the value we read doesn't matter.)
*
* In vm86 mode, the hardware frame is much longer still, but we neither
* access the extra members from NMI context, nor do we write such a
* frame at sp0 at all.
*
* x86_64 has a fixed-length stack frame.
*/
#ifdef CONFIG_X86_32
# define TOP_OF_KERNEL_STACK_PADDING 8
#else
# define TOP_OF_KERNEL_STACK_PADDING 0
#endif
/* /*
* low level task data that entry.S needs immediate access to * low level task data that entry.S needs immediate access to
* - this struct should fit entirely inside of one cache line * - this struct should fit entirely inside of one cache line
@ -145,7 +172,6 @@ struct thread_info {
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
#define STACK_WARN (THREAD_SIZE/8) #define STACK_WARN (THREAD_SIZE/8)
#define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8))
/* /*
* macros/functions for gaining access to the thread information structure * macros/functions for gaining access to the thread information structure
@ -158,10 +184,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
static inline struct thread_info *current_thread_info(void) static inline struct thread_info *current_thread_info(void)
{ {
struct thread_info *ti; return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
ti = (void *)(this_cpu_read_stable(kernel_stack) +
KERNEL_STACK_OFFSET - THREAD_SIZE);
return ti;
} }
static inline unsigned long current_stack_pointer(void) static inline unsigned long current_stack_pointer(void)
@ -177,16 +200,37 @@ static inline unsigned long current_stack_pointer(void)
#else /* !__ASSEMBLY__ */ #else /* !__ASSEMBLY__ */
/* how to get the thread information struct from ASM */ /* Load thread_info address into "reg" */
#define GET_THREAD_INFO(reg) \ #define GET_THREAD_INFO(reg) \
_ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
_ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ; _ASM_SUB $(THREAD_SIZE),reg ;
/* /*
* Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in * ASM operand which evaluates to a 'thread_info' address of
* a certain register (to be used in assembler memory operands). * the current task, if it is known that "reg" is exactly "off"
* bytes below the top of the stack currently.
*
* ( The kernel stack's size is known at build time, it is usually
* 2 or 4 pages, and the bottom of the kernel stack contains
* the thread_info structure. So to access the thread_info very
* quickly from assembly code we can calculate down from the
* top of the kernel stack to the bottom, using constant,
* build-time calculations only. )
*
* For example, to fetch the current thread_info->flags value into %eax
* on x86-64 defconfig kernels, in syscall entry code where RSP is
* currently at exactly SIZEOF_PTREGS bytes away from the top of the
* stack:
*
* mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
*
* will translate to:
*
* 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax
*
* which is below the current RSP by almost 16K.
*/ */
#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg) #define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
#endif #endif
@ -236,6 +280,16 @@ static inline bool is_ia32_task(void)
#endif #endif
return false; return false;
} }
/*
* Force syscall return via IRET by making it look as if there was
* some work pending. IRET is our most capable (but slowest) syscall
* return path, which is able to restore modified SS, CS and certain
* EFLAGS values that other (fast) syscall return instructions
* are not able to restore properly.
*/
#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__

View File

@ -15,6 +15,7 @@
/* loadflags */ /* loadflags */
#define LOADED_HIGH (1<<0) #define LOADED_HIGH (1<<0)
#define KASLR_FLAG (1<<1)
#define QUIET_FLAG (1<<5) #define QUIET_FLAG (1<<5)
#define KEEP_SEGMENTS (1<<6) #define KEEP_SEGMENTS (1<<6)
#define CAN_USE_HEAP (1<<7) #define CAN_USE_HEAP (1<<7)

View File

@ -25,13 +25,17 @@
#else /* __i386__ */ #else /* __i386__ */
#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
*/
#define R15 0 #define R15 0
#define R14 8 #define R14 8
#define R13 16 #define R13 16
#define R12 24 #define R12 24
#define RBP 32 #define RBP 32
#define RBX 40 #define RBX 40
/* arguments: interrupts/non tracing syscalls only save up to here*/ /* These regs are callee-clobbered. Always saved on kernel entry. */
#define R11 48 #define R11 48
#define R10 56 #define R10 56
#define R9 64 #define R9 64
@ -41,15 +45,17 @@
#define RDX 96 #define RDX 96
#define RSI 104 #define RSI 104
#define RDI 112 #define RDI 112
#define ORIG_RAX 120 /* = ERROR */ /*
/* end of arguments */ * On syscall entry, this is syscall#. On CPU exception, this is error code.
/* cpu exception frame or undefined in case of fast syscall. */ * On hw interrupt, it's IRQ number:
*/
#define ORIG_RAX 120
/* Return frame for iretq */
#define RIP 128 #define RIP 128
#define CS 136 #define CS 136
#define EFLAGS 144 #define EFLAGS 144
#define RSP 152 #define RSP 152
#define SS 160 #define SS 160
#define ARGOFFSET R11
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
/* top of stack page */ /* top of stack page */

View File

@ -41,13 +41,17 @@ struct pt_regs {
#ifndef __KERNEL__ #ifndef __KERNEL__
struct pt_regs { struct pt_regs {
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
*/
unsigned long r15; unsigned long r15;
unsigned long r14; unsigned long r14;
unsigned long r13; unsigned long r13;
unsigned long r12; unsigned long r12;
unsigned long rbp; unsigned long rbp;
unsigned long rbx; unsigned long rbx;
/* arguments: non interrupts/non tracing syscalls only save up to here*/ /* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11; unsigned long r11;
unsigned long r10; unsigned long r10;
unsigned long r9; unsigned long r9;
@ -57,9 +61,12 @@ struct pt_regs {
unsigned long rdx; unsigned long rdx;
unsigned long rsi; unsigned long rsi;
unsigned long rdi; unsigned long rdi;
/*
* On syscall entry, this is syscall#. On CPU exception, this is error code.
* On hw interrupt, it's IRQ number:
*/
unsigned long orig_rax; unsigned long orig_rax;
/* end of arguments */ /* Return frame for iretq */
/* cpu exception frame or undefined */
unsigned long rip; unsigned long rip;
unsigned long cs; unsigned long cs;
unsigned long eflags; unsigned long eflags;

View File

@ -177,9 +177,24 @@ struct sigcontext {
__u64 rip; __u64 rip;
__u64 eflags; /* RFLAGS */ __u64 eflags; /* RFLAGS */
__u16 cs; __u16 cs;
__u16 gs;
__u16 fs; /*
__u16 __pad0; * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
* Linux saved and restored fs and gs in these slots. This
* was counterproductive, as fsbase and gsbase were never
* saved, so arch_prctl was presumably unreliable.
*
* If these slots are ever needed for any other purpose, there
* is some risk that very old 64-bit binaries could get
* confused. I doubt that many such binaries still work,
* though, since the same patch in 2.5.64 also removed the
* 64-bit set_thread_area syscall, so it appears that there is
* no TLS API that works in both pre- and post-2.5.64 kernels.
*/
__u16 __pad2; /* Was gs. */
__u16 __pad1; /* Was fs. */
__u16 ss;
__u64 err; __u64 err;
__u64 trapno; __u64 trapno;
__u64 oldmask; __u64 oldmask;

View File

@ -32,6 +32,7 @@ obj-$(CONFIG_X86_32) += i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-$(CONFIG_X86_64) += mcount_64.o obj-$(CONFIG_X86_64) += mcount_64.o
obj-y += syscall_$(BITS).o vsyscall_gtod.o obj-y += syscall_$(BITS).o vsyscall_gtod.o
obj-$(CONFIG_IA32_EMULATION) += syscall_32.o
obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
obj-$(CONFIG_SYSFS) += ksysfs.o obj-$(CONFIG_SYSFS) += ksysfs.o

View File

@ -52,10 +52,25 @@ static int __init setup_noreplace_paravirt(char *str)
__setup("noreplace-paravirt", setup_noreplace_paravirt); __setup("noreplace-paravirt", setup_noreplace_paravirt);
#endif #endif
#define DPRINTK(fmt, ...) \ #define DPRINTK(fmt, args...) \
do { \ do { \
if (debug_alternative) \ if (debug_alternative) \
printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \
} while (0)
#define DUMP_BYTES(buf, len, fmt, args...) \
do { \
if (unlikely(debug_alternative)) { \
int j; \
\
if (!(len)) \
break; \
\
printk(KERN_DEBUG fmt, ##args); \
for (j = 0; j < (len) - 1; j++) \
printk(KERN_CONT "%02hhx ", buf[j]); \
printk(KERN_CONT "%02hhx\n", buf[j]); \
} \
} while (0) } while (0)
/* /*
@ -243,12 +258,89 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[]; extern s32 __smp_locks[], __smp_locks_end[];
void *text_poke_early(void *addr, const void *opcode, size_t len); void *text_poke_early(void *addr, const void *opcode, size_t len);
/* Replace instructions with better alternatives for this CPU type. /*
This runs before SMP is initialized to avoid SMP problems with * Are we looking at a near JMP with a 1 or 4-byte displacement.
self modifying code. This implies that asymmetric systems where */
APs have less capabilities than the boot processor are not handled. static inline bool is_jmp(const u8 opcode)
Tough. Make sure you disable such features by hand. */ {
return opcode == 0xeb || opcode == 0xe9;
}
static void __init_or_module
recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
{
u8 *next_rip, *tgt_rip;
s32 n_dspl, o_dspl;
int repl_len;
if (a->replacementlen != 5)
return;
o_dspl = *(s32 *)(insnbuf + 1);
/* next_rip of the replacement JMP */
next_rip = repl_insn + a->replacementlen;
/* target rip of the replacement JMP */
tgt_rip = next_rip + o_dspl;
n_dspl = tgt_rip - orig_insn;
DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
if (tgt_rip - orig_insn >= 0) {
if (n_dspl - 2 <= 127)
goto two_byte_jmp;
else
goto five_byte_jmp;
/* negative offset */
} else {
if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
goto two_byte_jmp;
else
goto five_byte_jmp;
}
two_byte_jmp:
n_dspl -= 2;
insnbuf[0] = 0xeb;
insnbuf[1] = (s8)n_dspl;
add_nops(insnbuf + 2, 3);
repl_len = 2;
goto done;
five_byte_jmp:
n_dspl -= 5;
insnbuf[0] = 0xe9;
*(s32 *)&insnbuf[1] = n_dspl;
repl_len = 5;
done:
DPRINTK("final displ: 0x%08x, JMP 0x%lx",
n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
}
static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
{
if (instr[0] != 0x90)
return;
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
instr, a->instrlen - a->padlen, a->padlen);
}
/*
* Replace instructions with better alternatives for this CPU type. This runs
* before SMP is initialized to avoid SMP problems with self modifying code.
* This implies that asymmetric systems where APs have less capabilities than
* the boot processor are not handled. Tough. Make sure you disable such
* features by hand.
*/
void __init_or_module apply_alternatives(struct alt_instr *start, void __init_or_module apply_alternatives(struct alt_instr *start,
struct alt_instr *end) struct alt_instr *end)
{ {
@ -256,10 +348,10 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
u8 *instr, *replacement; u8 *instr, *replacement;
u8 insnbuf[MAX_PATCH_LEN]; u8 insnbuf[MAX_PATCH_LEN];
DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); DPRINTK("alt table %p -> %p", start, end);
/* /*
* The scan order should be from start to end. A later scanned * The scan order should be from start to end. A later scanned
* alternative code can overwrite a previous scanned alternative code. * alternative code can overwrite previously scanned alternative code.
* Some kernel functions (e.g. memcpy, memset, etc) use this order to * Some kernel functions (e.g. memcpy, memset, etc) use this order to
* patch code. * patch code.
* *
@ -267,29 +359,54 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
* order. * order.
*/ */
for (a = start; a < end; a++) { for (a = start; a < end; a++) {
int insnbuf_sz = 0;
instr = (u8 *)&a->instr_offset + a->instr_offset; instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset;
BUG_ON(a->replacementlen > a->instrlen);
BUG_ON(a->instrlen > sizeof(insnbuf)); BUG_ON(a->instrlen > sizeof(insnbuf));
BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
if (!boot_cpu_has(a->cpuid)) if (!boot_cpu_has(a->cpuid)) {
if (a->padlen > 1)
optimize_nops(a, instr);
continue; continue;
}
DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
a->cpuid >> 5,
a->cpuid & 0x1f,
instr, a->instrlen,
replacement, a->replacementlen, a->padlen);
DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
memcpy(insnbuf, replacement, a->replacementlen); memcpy(insnbuf, replacement, a->replacementlen);
insnbuf_sz = a->replacementlen;
/* 0xe8 is a relative jump; fix the offset. */ /* 0xe8 is a relative jump; fix the offset. */
if (*insnbuf == 0xe8 && a->replacementlen == 5) if (*insnbuf == 0xe8 && a->replacementlen == 5) {
*(s32 *)(insnbuf + 1) += replacement - instr; *(s32 *)(insnbuf + 1) += replacement - instr;
DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
*(s32 *)(insnbuf + 1),
(unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
}
add_nops(insnbuf + a->replacementlen, if (a->replacementlen && is_jmp(replacement[0]))
a->instrlen - a->replacementlen); recompute_jump(a, instr, replacement, insnbuf);
text_poke_early(instr, insnbuf, a->instrlen); if (a->instrlen > a->replacementlen) {
add_nops(insnbuf + a->replacementlen,
a->instrlen - a->replacementlen);
insnbuf_sz += a->instrlen - a->replacementlen;
}
DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
text_poke_early(instr, insnbuf, insnbuf_sz);
} }
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static void alternatives_smp_lock(const s32 *start, const s32 *end, static void alternatives_smp_lock(const s32 *start, const s32 *end,
u8 *text, u8 *text_end) u8 *text, u8 *text_end)
{ {
@ -371,8 +488,8 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
smp->locks_end = locks_end; smp->locks_end = locks_end;
smp->text = text; smp->text = text;
smp->text_end = text_end; smp->text_end = text_end;
DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", DPRINTK("locks %p -> %p, text %p -> %p, name %s\n",
__func__, smp->locks, smp->locks_end, smp->locks, smp->locks_end,
smp->text, smp->text_end, smp->name); smp->text, smp->text_end, smp->name);
list_add_tail(&smp->next, &smp_alt_modules); list_add_tail(&smp->next, &smp_alt_modules);
@ -440,7 +557,7 @@ int alternatives_text_reserved(void *start, void *end)
return 0; return 0;
} }
#endif #endif /* CONFIG_SMP */
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
void __init_or_module apply_paravirt(struct paravirt_patch_site *start, void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
@ -601,7 +718,7 @@ int poke_int3_handler(struct pt_regs *regs)
if (likely(!bp_patching_in_progress)) if (likely(!bp_patching_in_progress))
return 0; return 0;
if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr) if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
return 0; return 0;
/* set up the specified breakpoint handler */ /* set up the specified breakpoint handler */

View File

@ -68,7 +68,7 @@ void foo(void)
/* Offset from the sysenter stack to tss.sp0 */ /* Offset from the sysenter stack to tss.sp0 */
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
sizeof(struct tss_struct)); offsetofend(struct tss_struct, SYSENTER_stack));
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK(); BLANK();

View File

@ -81,6 +81,7 @@ int main(void)
#undef ENTRY #undef ENTRY
OFFSET(TSS_ist, tss_struct, x86_tss.ist); OFFSET(TSS_ist, tss_struct, x86_tss.ist);
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
BLANK(); BLANK();
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);

View File

@ -711,6 +711,11 @@ static void init_amd(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
/* 3DNow or LM implies PREFETCHW */
if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
} }
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32

View File

@ -959,38 +959,37 @@ static void identify_cpu(struct cpuinfo_x86 *c)
#endif #endif
} }
#ifdef CONFIG_X86_64 /*
#ifdef CONFIG_IA32_EMULATION * Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions
/* May not be __init: called during resume */ * on 32-bit kernels:
static void syscall32_cpu_init(void) */
{
/* Load these always in case some future AMD CPU supports
SYSENTER from compat mode too. */
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
wrmsrl(MSR_CSTAR, ia32_cstar_target);
}
#endif /* CONFIG_IA32_EMULATION */
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
void enable_sep_cpu(void) void enable_sep_cpu(void)
{ {
int cpu = get_cpu(); struct tss_struct *tss;
struct tss_struct *tss = &per_cpu(init_tss, cpu); int cpu;
if (!boot_cpu_has(X86_FEATURE_SEP)) { cpu = get_cpu();
put_cpu(); tss = &per_cpu(cpu_tss, cpu);
return;
} if (!boot_cpu_has(X86_FEATURE_SEP))
goto out;
/*
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
* see the big comment in struct x86_hw_tss's definition.
*/
tss->x86_tss.ss1 = __KERNEL_CS; tss->x86_tss.ss1 = __KERNEL_CS;
tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss; wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0); wrmsr(MSR_IA32_SYSENTER_ESP,
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0); (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
0);
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0);
out:
put_cpu(); put_cpu();
} }
#endif #endif
@ -1118,7 +1117,7 @@ static __init int setup_disablecpuid(char *arg)
__setup("clearcpuid=", setup_disablecpuid); __setup("clearcpuid=", setup_disablecpuid);
DEFINE_PER_CPU(unsigned long, kernel_stack) = DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; (unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack); EXPORT_PER_CPU_SYMBOL(kernel_stack);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
@ -1130,8 +1129,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE) __visible; irq_stack_union) __aligned(PAGE_SIZE) __visible;
/* /*
* The following four percpu variables are hot. Align current_task to * The following percpu variables are hot. Align current_task to
* cacheline size such that all four fall in the same cacheline. * cacheline size such that they fall in the same cacheline.
*/ */
DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task; &init_task;
@ -1171,10 +1170,23 @@ void syscall_init(void)
*/ */
wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
wrmsrl(MSR_LSTAR, system_call); wrmsrl(MSR_LSTAR, system_call);
wrmsrl(MSR_CSTAR, ignore_sysret);
#ifdef CONFIG_IA32_EMULATION #ifdef CONFIG_IA32_EMULATION
syscall32_cpu_init(); wrmsrl(MSR_CSTAR, ia32_cstar_target);
/*
* This only works on Intel CPUs.
* On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
* This does not cause SYSENTER to jump to the wrong location, because
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
*/
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
#else
wrmsrl(MSR_CSTAR, ignore_sysret);
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
#endif #endif
/* Flags to clear on syscall */ /* Flags to clear on syscall */
@ -1226,6 +1238,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count); EXPORT_PER_CPU_SYMBOL(__preempt_count);
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
/*
* On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
* the top of the kernel stack. Use an extra percpu variable to track the
* top of the kernel stack directly.
*/
DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
(unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
#ifdef CONFIG_CC_STACKPROTECTOR #ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif #endif
@ -1307,7 +1328,7 @@ void cpu_init(void)
*/ */
load_ucode_ap(); load_ucode_ap();
t = &per_cpu(init_tss, cpu); t = &per_cpu(cpu_tss, cpu);
oist = &per_cpu(orig_ist, cpu); oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
@ -1391,7 +1412,7 @@ void cpu_init(void)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
struct task_struct *curr = current; struct task_struct *curr = current;
struct tss_struct *t = &per_cpu(init_tss, cpu); struct tss_struct *t = &per_cpu(cpu_tss, cpu);
struct thread_struct *thread = &curr->thread; struct thread_struct *thread = &curr->thread;
wait_for_master_cpu(cpu); wait_for_master_cpu(cpu);

View File

@ -2146,6 +2146,12 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
*/ */
static unsigned long code_segment_base(struct pt_regs *regs) static unsigned long code_segment_base(struct pt_regs *regs)
{ {
/*
* For IA32 we look at the GDT/LDT segment base to convert the
* effective IP to a linear address.
*/
#ifdef CONFIG_X86_32
/* /*
* If we are in VM86 mode, add the segment offset to convert to a * If we are in VM86 mode, add the segment offset to convert to a
* linear address. * linear address.
@ -2153,18 +2159,12 @@ static unsigned long code_segment_base(struct pt_regs *regs)
if (regs->flags & X86_VM_MASK) if (regs->flags & X86_VM_MASK)
return 0x10 * regs->cs; return 0x10 * regs->cs;
/*
* For IA32 we look at the GDT/LDT segment base to convert the
* effective IP to a linear address.
*/
#ifdef CONFIG_X86_32
if (user_mode(regs) && regs->cs != __USER_CS) if (user_mode(regs) && regs->cs != __USER_CS)
return get_segment_base(regs->cs); return get_segment_base(regs->cs);
#else #else
if (test_thread_flag(TIF_IA32)) { if (user_mode(regs) && !user_64bit_mode(regs) &&
if (user_mode(regs) && regs->cs != __USER32_CS) regs->cs != __USER32_CS)
return get_segment_base(regs->cs); return get_segment_base(regs->cs);
}
#endif #endif
return 0; return 0;
} }

View File

@ -105,7 +105,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
struct pt_regs fixed_regs; struct pt_regs fixed_regs;
if (!user_mode_vm(regs)) { if (!user_mode(regs)) {
crash_fixup_ss_esp(&fixed_regs, regs); crash_fixup_ss_esp(&fixed_regs, regs);
regs = &fixed_regs; regs = &fixed_regs;
} }

View File

@ -278,7 +278,7 @@ int __die(const char *str, struct pt_regs *regs, long err)
print_modules(); print_modules();
show_regs(regs); show_regs(regs);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
if (user_mode_vm(regs)) { if (user_mode(regs)) {
sp = regs->sp; sp = regs->sp;
ss = regs->ss & 0xffff; ss = regs->ss & 0xffff;
} else { } else {
@ -307,7 +307,7 @@ void die(const char *str, struct pt_regs *regs, long err)
unsigned long flags = oops_begin(); unsigned long flags = oops_begin();
int sig = SIGSEGV; int sig = SIGSEGV;
if (!user_mode_vm(regs)) if (!user_mode(regs))
report_bug(regs->ip, regs); report_bug(regs->ip, regs);
if (__die(str, regs, err)) if (__die(str, regs, err))

View File

@ -123,13 +123,13 @@ void show_regs(struct pt_regs *regs)
int i; int i;
show_regs_print_info(KERN_EMERG); show_regs_print_info(KERN_EMERG);
__show_regs(regs, !user_mode_vm(regs)); __show_regs(regs, !user_mode(regs));
/* /*
* When in-kernel, we also print out the stack and code at the * When in-kernel, we also print out the stack and code at the
* time of the fault.. * time of the fault..
*/ */
if (!user_mode_vm(regs)) { if (!user_mode(regs)) {
unsigned int code_prologue = code_bytes * 43 / 64; unsigned int code_prologue = code_bytes * 43 / 64;
unsigned int code_len = code_bytes; unsigned int code_len = code_bytes;
unsigned char c; unsigned char c;

View File

@ -395,10 +395,13 @@ sysenter_past_esp:
/*CFI_REL_OFFSET cs, 0*/ /*CFI_REL_OFFSET cs, 0*/
/* /*
* Push current_thread_info()->sysenter_return to the stack. * Push current_thread_info()->sysenter_return to the stack.
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words * A tiny bit of offset fixup is necessary: TI_sysenter_return
* pushed above; +8 corresponds to copy_thread's esp0 setting. * is relative to thread_info, which is at the bottom of the
* kernel stack page. 4*4 means the 4 words pushed above;
* TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
* and THREAD_SIZE takes us to the bottom.
*/ */
pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp) pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
CFI_REL_OFFSET eip, 0 CFI_REL_OFFSET eip, 0
pushl_cfi %eax pushl_cfi %eax
@ -432,7 +435,7 @@ sysenter_after_call:
TRACE_IRQS_OFF TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx movl TI_flags(%ebp), %ecx
testl $_TIF_ALLWORK_MASK, %ecx testl $_TIF_ALLWORK_MASK, %ecx
jne sysexit_audit jnz sysexit_audit
sysenter_exit: sysenter_exit:
/* if something modifies registers it must also disable sysexit */ /* if something modifies registers it must also disable sysexit */
movl PT_EIP(%esp), %edx movl PT_EIP(%esp), %edx
@ -460,7 +463,7 @@ sysenter_audit:
sysexit_audit: sysexit_audit:
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
jne syscall_exit_work jnz syscall_exit_work
TRACE_IRQS_ON TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY) ENABLE_INTERRUPTS(CLBR_ANY)
movl %eax,%edx /* second arg, syscall return value */ movl %eax,%edx /* second arg, syscall return value */
@ -472,7 +475,7 @@ sysexit_audit:
TRACE_IRQS_OFF TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx movl TI_flags(%ebp), %ecx
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
jne syscall_exit_work jnz syscall_exit_work
movl PT_EAX(%esp),%eax /* reload syscall return value */ movl PT_EAX(%esp),%eax /* reload syscall return value */
jmp sysenter_exit jmp sysenter_exit
#endif #endif
@ -510,7 +513,7 @@ syscall_exit:
TRACE_IRQS_OFF TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx movl TI_flags(%ebp), %ecx
testl $_TIF_ALLWORK_MASK, %ecx # current->work testl $_TIF_ALLWORK_MASK, %ecx # current->work
jne syscall_exit_work jnz syscall_exit_work
restore_all: restore_all:
TRACE_IRQS_IRET TRACE_IRQS_IRET
@ -612,7 +615,7 @@ work_notifysig: # deal with pending signals and
#ifdef CONFIG_VM86 #ifdef CONFIG_VM86
testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
movl %esp, %eax movl %esp, %eax
jne work_notifysig_v86 # returning to kernel-space or jnz work_notifysig_v86 # returning to kernel-space or
# vm86-space # vm86-space
1: 1:
#else #else
@ -720,43 +723,22 @@ END(sysenter_badsys)
.endm .endm
/* /*
* Build the entry stubs and pointer table with some assembler magic. * Build the entry stubs with some assembler magic.
* We pack 7 stubs into a single 32-byte chunk, which will fit in a * We pack 1 stub into every 8-byte block.
* single cache line on all modern x86 implementations.
*/ */
.section .init.rodata,"a" .align 8
ENTRY(interrupt)
.section .entry.text, "ax"
.p2align 5
.p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start) ENTRY(irq_entries_start)
RING0_INT_FRAME RING0_INT_FRAME
vector=FIRST_EXTERNAL_VECTOR vector=FIRST_EXTERNAL_VECTOR
.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
.balign 32 pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
.rept 7 vector=vector+1
.if vector < FIRST_SYSTEM_VECTOR jmp common_interrupt
.if vector <> FIRST_EXTERNAL_VECTOR
CFI_ADJUST_CFA_OFFSET -4 CFI_ADJUST_CFA_OFFSET -4
.endif .align 8
1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */ .endr
.if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
jmp 2f
.endif
.previous
.long 1b
.section .entry.text, "ax"
vector=vector+1
.endif
.endr
2: jmp common_interrupt
.endr
END(irq_entries_start) END(irq_entries_start)
.previous
END(interrupt)
.previous
/* /*
* the CPU automatically disables interrupts when executing an IRQ vector, * the CPU automatically disables interrupts when executing an IRQ vector,
* so IRQ-flags tracing has to follow that: * so IRQ-flags tracing has to follow that:
@ -816,15 +798,9 @@ ENTRY(simd_coprocessor_error)
pushl_cfi $0 pushl_cfi $0
#ifdef CONFIG_X86_INVD_BUG #ifdef CONFIG_X86_INVD_BUG
/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
661: pushl_cfi $do_general_protection ALTERNATIVE "pushl_cfi $do_general_protection", \
662: "pushl $do_simd_coprocessor_error", \
.section .altinstructions,"a" X86_FEATURE_XMM
altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
.previous
.section .altinstr_replacement,"ax"
663: pushl $do_simd_coprocessor_error
664:
.previous
#else #else
pushl_cfi $do_simd_coprocessor_error pushl_cfi $do_simd_coprocessor_error
#endif #endif
@ -1240,20 +1216,13 @@ error_code:
/*CFI_REL_OFFSET es, 0*/ /*CFI_REL_OFFSET es, 0*/
pushl_cfi %ds pushl_cfi %ds
/*CFI_REL_OFFSET ds, 0*/ /*CFI_REL_OFFSET ds, 0*/
pushl_cfi %eax pushl_cfi_reg eax
CFI_REL_OFFSET eax, 0 pushl_cfi_reg ebp
pushl_cfi %ebp pushl_cfi_reg edi
CFI_REL_OFFSET ebp, 0 pushl_cfi_reg esi
pushl_cfi %edi pushl_cfi_reg edx
CFI_REL_OFFSET edi, 0 pushl_cfi_reg ecx
pushl_cfi %esi pushl_cfi_reg ebx
CFI_REL_OFFSET esi, 0
pushl_cfi %edx
CFI_REL_OFFSET edx, 0
pushl_cfi %ecx
CFI_REL_OFFSET ecx, 0
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
cld cld
movl $(__KERNEL_PERCPU), %ecx movl $(__KERNEL_PERCPU), %ecx
movl %ecx, %fs movl %ecx, %fs

File diff suppressed because it is too large Load Diff

View File

@ -22,6 +22,7 @@
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/percpu.h> #include <asm/percpu.h>
#include <asm/nops.h> #include <asm/nops.h>
#include <asm/bootparam.h>
/* Physical address */ /* Physical address */
#define pa(X) ((X) - __PAGE_OFFSET) #define pa(X) ((X) - __PAGE_OFFSET)
@ -90,7 +91,7 @@ ENTRY(startup_32)
/* test KEEP_SEGMENTS flag to see if the bootloader is asking /* test KEEP_SEGMENTS flag to see if the bootloader is asking
us to not reload segments */ us to not reload segments */
testb $(1<<6), BP_loadflags(%esi) testb $KEEP_SEGMENTS, BP_loadflags(%esi)
jnz 2f jnz 2f
/* /*

View File

@ -1,5 +1,5 @@
/* /*
* linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit * linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit
* *
* Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz> * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
@ -56,7 +56,7 @@ startup_64:
* %rsi holds a physical pointer to real_mode_data. * %rsi holds a physical pointer to real_mode_data.
* *
* We come here either directly from a 64bit bootloader, or from * We come here either directly from a 64bit bootloader, or from
* arch/x86_64/boot/compressed/head.S. * arch/x86/boot/compressed/head_64.S.
* *
* We only come here initially at boot nothing else comes here. * We only come here initially at boot nothing else comes here.
* *
@ -146,7 +146,7 @@ startup_64:
leaq level2_kernel_pgt(%rip), %rdi leaq level2_kernel_pgt(%rip), %rdi
leaq 4096(%rdi), %r8 leaq 4096(%rdi), %r8
/* See if it is a valid page table entry */ /* See if it is a valid page table entry */
1: testq $1, 0(%rdi) 1: testb $1, 0(%rdi)
jz 2f jz 2f
addq %rbp, 0(%rdi) addq %rbp, 0(%rdi)
/* Go to the next page */ /* Go to the next page */

View File

@ -68,7 +68,7 @@ static inline bool interrupted_kernel_fpu_idle(void)
static inline bool interrupted_user_mode(void) static inline bool interrupted_user_mode(void)
{ {
struct pt_regs *regs = get_irq_regs(); struct pt_regs *regs = get_irq_regs();
return regs && user_mode_vm(regs); return regs && user_mode(regs);
} }
/* /*

View File

@ -54,7 +54,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
* because the ->io_bitmap_max value must match the bitmap * because the ->io_bitmap_max value must match the bitmap
* contents: * contents:
*/ */
tss = &per_cpu(init_tss, get_cpu()); tss = &per_cpu(cpu_tss, get_cpu());
if (turn_on) if (turn_on)
bitmap_clear(t->io_bitmap_ptr, from, num); bitmap_clear(t->io_bitmap_ptr, from, num);

View File

@ -165,7 +165,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
if (unlikely(!desc)) if (unlikely(!desc))
return false; return false;
if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) { if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
if (unlikely(overflow)) if (unlikely(overflow))
print_stack_overflow(); print_stack_overflow();
desc->handle_irq(irq, desc); desc->handle_irq(irq, desc);

View File

@ -44,7 +44,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
u64 estack_top, estack_bottom; u64 estack_top, estack_bottom;
u64 curbase = (u64)task_stack_page(current); u64 curbase = (u64)task_stack_page(current);
if (user_mode_vm(regs)) if (user_mode(regs))
return; return;
if (regs->sp >= curbase + sizeof(struct thread_info) + if (regs->sp >= curbase + sizeof(struct thread_info) +

View File

@ -178,7 +178,8 @@ void __init native_init_IRQ(void)
#endif #endif
for_each_clear_bit_from(i, used_vectors, first_system_vector) { for_each_clear_bit_from(i, used_vectors, first_system_vector) {
/* IA32_SYSCALL_VECTOR could be used in trap_init already. */ /* IA32_SYSCALL_VECTOR could be used in trap_init already. */
set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); set_intr_gate(i, irq_entries_start +
8 * (i - FIRST_EXTERNAL_VECTOR));
} }
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
for_each_clear_bit_from(i, used_vectors, NR_VECTORS) for_each_clear_bit_from(i, used_vectors, NR_VECTORS)

View File

@ -126,11 +126,11 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
switch (regno) { switch (regno) {
case GDB_SS: case GDB_SS:
if (!user_mode_vm(regs)) if (!user_mode(regs))
*(unsigned long *)mem = __KERNEL_DS; *(unsigned long *)mem = __KERNEL_DS;
break; break;
case GDB_SP: case GDB_SP:
if (!user_mode_vm(regs)) if (!user_mode(regs))
*(unsigned long *)mem = kernel_stack_pointer(regs); *(unsigned long *)mem = kernel_stack_pointer(regs);
break; break;
case GDB_GS: case GDB_GS:

View File

@ -602,7 +602,7 @@ int kprobe_int3_handler(struct pt_regs *regs)
struct kprobe *p; struct kprobe *p;
struct kprobe_ctlblk *kcb; struct kprobe_ctlblk *kcb;
if (user_mode_vm(regs)) if (user_mode(regs))
return 0; return 0;
addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
@ -1007,7 +1007,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
struct die_args *args = data; struct die_args *args = data;
int ret = NOTIFY_DONE; int ret = NOTIFY_DONE;
if (args->regs && user_mode_vm(args->regs)) if (args->regs && user_mode(args->regs))
return ret; return ret;
if (val == DIE_GPF) { if (val == DIE_GPF) {

View File

@ -33,6 +33,7 @@
#include <asm/page.h> #include <asm/page.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/setup.h>
#if 0 #if 0
#define DEBUGP(fmt, ...) \ #define DEBUGP(fmt, ...) \
@ -47,21 +48,13 @@ do { \
#ifdef CONFIG_RANDOMIZE_BASE #ifdef CONFIG_RANDOMIZE_BASE
static unsigned long module_load_offset; static unsigned long module_load_offset;
static int randomize_modules = 1;
/* Mutex protects the module_load_offset. */ /* Mutex protects the module_load_offset. */
static DEFINE_MUTEX(module_kaslr_mutex); static DEFINE_MUTEX(module_kaslr_mutex);
static int __init parse_nokaslr(char *p)
{
randomize_modules = 0;
return 0;
}
early_param("nokaslr", parse_nokaslr);
static unsigned long int get_module_load_offset(void) static unsigned long int get_module_load_offset(void)
{ {
if (randomize_modules) { if (kaslr_enabled()) {
mutex_lock(&module_kaslr_mutex); mutex_lock(&module_kaslr_mutex);
/* /*
* Calculate the module_load_offset the first time this * Calculate the module_load_offset the first time this

View File

@ -131,10 +131,11 @@ void perf_get_regs_user(struct perf_regs *regs_user,
} }
/* /*
* RIP, flags, and the argument registers are usually saved. * These registers are always saved on 64-bit syscall entry.
* orig_ax is probably okay, too. * On 32-bit entry points, they are saved too except r8..r11.
*/ */
regs_user_copy->ip = user_regs->ip; regs_user_copy->ip = user_regs->ip;
regs_user_copy->ax = user_regs->ax;
regs_user_copy->cx = user_regs->cx; regs_user_copy->cx = user_regs->cx;
regs_user_copy->dx = user_regs->dx; regs_user_copy->dx = user_regs->dx;
regs_user_copy->si = user_regs->si; regs_user_copy->si = user_regs->si;
@ -145,9 +146,12 @@ void perf_get_regs_user(struct perf_regs *regs_user,
regs_user_copy->r11 = user_regs->r11; regs_user_copy->r11 = user_regs->r11;
regs_user_copy->orig_ax = user_regs->orig_ax; regs_user_copy->orig_ax = user_regs->orig_ax;
regs_user_copy->flags = user_regs->flags; regs_user_copy->flags = user_regs->flags;
regs_user_copy->sp = user_regs->sp;
regs_user_copy->cs = user_regs->cs;
regs_user_copy->ss = user_regs->ss;
/* /*
* Don't even try to report the "rest" regs. * Most system calls don't save these registers, don't report them.
*/ */
regs_user_copy->bx = -1; regs_user_copy->bx = -1;
regs_user_copy->bp = -1; regs_user_copy->bp = -1;
@ -158,37 +162,13 @@ void perf_get_regs_user(struct perf_regs *regs_user,
/* /*
* For this to be at all useful, we need a reasonable guess for * For this to be at all useful, we need a reasonable guess for
* sp and the ABI. Be careful: we're in NMI context, and we're * the ABI. Be careful: we're in NMI context, and we're
* considering current to be the current task, so we should * considering current to be the current task, so we should
* be careful not to look at any other percpu variables that might * be careful not to look at any other percpu variables that might
* change during context switches. * change during context switches.
*/ */
if (IS_ENABLED(CONFIG_IA32_EMULATION) && regs_user->abi = user_64bit_mode(user_regs) ?
task_thread_info(current)->status & TS_COMPAT) { PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
/* Easy case: we're in a compat syscall. */
regs_user->abi = PERF_SAMPLE_REGS_ABI_32;
regs_user_copy->sp = user_regs->sp;
regs_user_copy->cs = user_regs->cs;
regs_user_copy->ss = user_regs->ss;
} else if (user_regs->orig_ax != -1) {
/*
* We're probably in a 64-bit syscall.
* Warning: this code is severely racy. At least it's better
* than just blindly copying user_regs.
*/
regs_user->abi = PERF_SAMPLE_REGS_ABI_64;
regs_user_copy->sp = this_cpu_read(old_rsp);
regs_user_copy->cs = __USER_CS;
regs_user_copy->ss = __USER_DS;
regs_user_copy->cx = -1; /* usually contains garbage */
} else {
/* We're probably in an interrupt or exception. */
regs_user->abi = user_64bit_mode(user_regs) ?
PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
regs_user_copy->sp = user_regs->sp;
regs_user_copy->cs = user_regs->cs;
regs_user_copy->ss = user_regs->ss;
}
regs_user->regs = regs_user_copy; regs_user->regs = regs_user_copy;
} }

View File

@ -38,7 +38,26 @@
* section. Since TSS's are completely CPU-local, we want them * section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong. * on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/ */
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
.x86_tss = {
.sp0 = TOP_OF_INIT_STACK,
#ifdef CONFIG_X86_32
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
#endif
},
#ifdef CONFIG_X86_32
/*
* Note that the .io_bitmap member must be extra-big. This is because
* the CPU will access an additional byte beyond the end of the IO
* permission bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
};
EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
static DEFINE_PER_CPU(unsigned char, is_idle); static DEFINE_PER_CPU(unsigned char, is_idle);
@ -110,7 +129,7 @@ void exit_thread(void)
unsigned long *bp = t->io_bitmap_ptr; unsigned long *bp = t->io_bitmap_ptr;
if (bp) { if (bp) {
struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
t->io_bitmap_ptr = NULL; t->io_bitmap_ptr = NULL;
clear_thread_flag(TIF_IO_BITMAP); clear_thread_flag(TIF_IO_BITMAP);

View File

@ -73,7 +73,7 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned long sp; unsigned long sp;
unsigned short ss, gs; unsigned short ss, gs;
if (user_mode_vm(regs)) { if (user_mode(regs)) {
sp = regs->sp; sp = regs->sp;
ss = regs->ss & 0xffff; ss = regs->ss & 0xffff;
gs = get_user_gs(regs); gs = get_user_gs(regs);
@ -206,11 +206,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
regs->ip = new_ip; regs->ip = new_ip;
regs->sp = new_sp; regs->sp = new_sp;
regs->flags = X86_EFLAGS_IF; regs->flags = X86_EFLAGS_IF;
/* force_iret();
* force it to the iret return path by making it look as if there was
* some work pending.
*/
set_thread_flag(TIF_NOTIFY_RESUME);
} }
EXPORT_SYMBOL_GPL(start_thread); EXPORT_SYMBOL_GPL(start_thread);
@ -248,18 +244,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct thread_struct *prev = &prev_p->thread, struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread; *next = &next_p->thread;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu); struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
fpu_switch_t fpu; fpu_switch_t fpu;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
fpu = switch_fpu_prepare(prev_p, next_p, cpu); fpu = switch_fpu_prepare(prev_p, next_p, cpu);
/*
* Reload esp0.
*/
load_sp0(tss, next);
/* /*
* Save away %gs. No need to save %fs, as it was saved on the * Save away %gs. No need to save %fs, as it was saved on the
* stack on entry. No need to save %es and %ds, as those are * stack on entry. No need to save %es and %ds, as those are
@ -310,9 +301,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/ */
arch_end_context_switch(next_p); arch_end_context_switch(next_p);
/*
* Reload esp0, kernel_stack, and current_top_of_stack. This changes
* current_thread_info().
*/
load_sp0(tss, next);
this_cpu_write(kernel_stack, this_cpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) + (unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET); THREAD_SIZE);
this_cpu_write(cpu_current_top_of_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE);
/* /*
* Restore %gs if needed (which is common) * Restore %gs if needed (which is common)

View File

@ -52,7 +52,7 @@
asmlinkage extern void ret_from_fork(void); asmlinkage extern void ret_from_fork(void);
__visible DEFINE_PER_CPU(unsigned long, old_rsp); __visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
/* Prints also some state that isn't saved in the pt_regs */ /* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs *regs, int all) void __show_regs(struct pt_regs *regs, int all)
@ -161,7 +161,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
childregs = task_pt_regs(p); childregs = task_pt_regs(p);
p->thread.sp = (unsigned long) childregs; p->thread.sp = (unsigned long) childregs;
p->thread.usersp = me->thread.usersp;
set_tsk_thread_flag(p, TIF_FORK); set_tsk_thread_flag(p, TIF_FORK);
p->thread.io_bitmap_ptr = NULL; p->thread.io_bitmap_ptr = NULL;
@ -207,7 +206,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
*/ */
if (clone_flags & CLONE_SETTLS) { if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_IA32_EMULATION #ifdef CONFIG_IA32_EMULATION
if (test_thread_flag(TIF_IA32)) if (is_ia32_task())
err = do_set_thread_area(p, -1, err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0); (struct user_desc __user *)childregs->si, 0);
else else
@ -235,13 +234,12 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
loadsegment(es, _ds); loadsegment(es, _ds);
loadsegment(ds, _ds); loadsegment(ds, _ds);
load_gs_index(0); load_gs_index(0);
current->thread.usersp = new_sp;
regs->ip = new_ip; regs->ip = new_ip;
regs->sp = new_sp; regs->sp = new_sp;
this_cpu_write(old_rsp, new_sp);
regs->cs = _cs; regs->cs = _cs;
regs->ss = _ss; regs->ss = _ss;
regs->flags = X86_EFLAGS_IF; regs->flags = X86_EFLAGS_IF;
force_iret();
} }
void void
@ -277,15 +275,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct thread_struct *prev = &prev_p->thread; struct thread_struct *prev = &prev_p->thread;
struct thread_struct *next = &next_p->thread; struct thread_struct *next = &next_p->thread;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu); struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
unsigned fsindex, gsindex; unsigned fsindex, gsindex;
fpu_switch_t fpu; fpu_switch_t fpu;
fpu = switch_fpu_prepare(prev_p, next_p, cpu); fpu = switch_fpu_prepare(prev_p, next_p, cpu);
/* Reload esp0 and ss1. */
load_sp0(tss, next);
/* We must save %fs and %gs before load_TLS() because /* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS(). * %fs and %gs may be cleared by load_TLS().
* *
@ -401,8 +396,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* /*
* Switch the PDA and FPU contexts. * Switch the PDA and FPU contexts.
*/ */
prev->usersp = this_cpu_read(old_rsp);
this_cpu_write(old_rsp, next->usersp);
this_cpu_write(current_task, next_p); this_cpu_write(current_task, next_p);
/* /*
@ -413,9 +406,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
/* Reload esp0 and ss1. This changes current_thread_info(). */
load_sp0(tss, next);
this_cpu_write(kernel_stack, this_cpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) + (unsigned long)task_stack_page(next_p) + THREAD_SIZE);
THREAD_SIZE - KERNEL_STACK_OFFSET);
/* /*
* Now maybe reload the debug registers and handle I/O bitmaps * Now maybe reload the debug registers and handle I/O bitmaps
@ -602,6 +597,5 @@ long sys_arch_prctl(int code, unsigned long addr)
unsigned long KSTK_ESP(struct task_struct *task) unsigned long KSTK_ESP(struct task_struct *task)
{ {
return (test_tsk_thread_flag(task, TIF_IA32)) ? return task_pt_regs(task)->sp;
(task_pt_regs(task)->sp) : ((task)->thread.usersp);
} }

View File

@ -364,18 +364,12 @@ static int set_segment_reg(struct task_struct *task,
case offsetof(struct user_regs_struct,cs): case offsetof(struct user_regs_struct,cs):
if (unlikely(value == 0)) if (unlikely(value == 0))
return -EIO; return -EIO;
#ifdef CONFIG_IA32_EMULATION task_pt_regs(task)->cs = value;
if (test_tsk_thread_flag(task, TIF_IA32))
task_pt_regs(task)->cs = value;
#endif
break; break;
case offsetof(struct user_regs_struct,ss): case offsetof(struct user_regs_struct,ss):
if (unlikely(value == 0)) if (unlikely(value == 0))
return -EIO; return -EIO;
#ifdef CONFIG_IA32_EMULATION task_pt_regs(task)->ss = value;
if (test_tsk_thread_flag(task, TIF_IA32))
task_pt_regs(task)->ss = value;
#endif
break; break;
} }
@ -1421,7 +1415,7 @@ static void fill_sigtrap_info(struct task_struct *tsk,
memset(info, 0, sizeof(*info)); memset(info, 0, sizeof(*info));
info->si_signo = SIGTRAP; info->si_signo = SIGTRAP;
info->si_code = si_code; info->si_code = si_code;
info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL; info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
} }
void user_single_step_siginfo(struct task_struct *tsk, void user_single_step_siginfo(struct task_struct *tsk,

View File

@ -226,23 +226,23 @@ swap_pages:
movl (%ebx), %ecx movl (%ebx), %ecx
addl $4, %ebx addl $4, %ebx
1: 1:
testl $0x1, %ecx /* is it a destination page */ testb $0x1, %cl /* is it a destination page */
jz 2f jz 2f
movl %ecx, %edi movl %ecx, %edi
andl $0xfffff000, %edi andl $0xfffff000, %edi
jmp 0b jmp 0b
2: 2:
testl $0x2, %ecx /* is it an indirection page */ testb $0x2, %cl /* is it an indirection page */
jz 2f jz 2f
movl %ecx, %ebx movl %ecx, %ebx
andl $0xfffff000, %ebx andl $0xfffff000, %ebx
jmp 0b jmp 0b
2: 2:
testl $0x4, %ecx /* is it the done indicator */ testb $0x4, %cl /* is it the done indicator */
jz 2f jz 2f
jmp 3f jmp 3f
2: 2:
testl $0x8, %ecx /* is it the source indicator */ testb $0x8, %cl /* is it the source indicator */
jz 0b /* Ignore it otherwise */ jz 0b /* Ignore it otherwise */
movl %ecx, %esi /* For every source page do a copy */ movl %ecx, %esi /* For every source page do a copy */
andl $0xfffff000, %esi andl $0xfffff000, %esi

View File

@ -123,7 +123,7 @@ identity_mapped:
* Set cr4 to a known state: * Set cr4 to a known state:
* - physical address extension enabled * - physical address extension enabled
*/ */
movq $X86_CR4_PAE, %rax movl $X86_CR4_PAE, %eax
movq %rax, %cr4 movq %rax, %cr4
jmp 1f jmp 1f
@ -221,23 +221,23 @@ swap_pages:
movq (%rbx), %rcx movq (%rbx), %rcx
addq $8, %rbx addq $8, %rbx
1: 1:
testq $0x1, %rcx /* is it a destination page? */ testb $0x1, %cl /* is it a destination page? */
jz 2f jz 2f
movq %rcx, %rdi movq %rcx, %rdi
andq $0xfffffffffffff000, %rdi andq $0xfffffffffffff000, %rdi
jmp 0b jmp 0b
2: 2:
testq $0x2, %rcx /* is it an indirection page? */ testb $0x2, %cl /* is it an indirection page? */
jz 2f jz 2f
movq %rcx, %rbx movq %rcx, %rbx
andq $0xfffffffffffff000, %rbx andq $0xfffffffffffff000, %rbx
jmp 0b jmp 0b
2: 2:
testq $0x4, %rcx /* is it the done indicator? */ testb $0x4, %cl /* is it the done indicator? */
jz 2f jz 2f
jmp 3f jmp 3f
2: 2:
testq $0x8, %rcx /* is it the source indicator? */ testb $0x8, %cl /* is it the source indicator? */
jz 0b /* Ignore it otherwise */ jz 0b /* Ignore it otherwise */
movq %rcx, %rsi /* For ever source page do a copy */ movq %rcx, %rsi /* For ever source page do a copy */
andq $0xfffffffffffff000, %rsi andq $0xfffffffffffff000, %rsi
@ -246,17 +246,17 @@ swap_pages:
movq %rsi, %rax movq %rsi, %rax
movq %r10, %rdi movq %r10, %rdi
movq $512, %rcx movl $512, %ecx
rep ; movsq rep ; movsq
movq %rax, %rdi movq %rax, %rdi
movq %rdx, %rsi movq %rdx, %rsi
movq $512, %rcx movl $512, %ecx
rep ; movsq rep ; movsq
movq %rdx, %rdi movq %rdx, %rdi
movq %r10, %rsi movq %r10, %rsi
movq $512, %rcx movl $512, %ecx
rep ; movsq rep ; movsq
lea PAGE_SIZE(%rax), %rsi lea PAGE_SIZE(%rax), %rsi

View File

@ -832,10 +832,15 @@ static void __init trim_low_memory_range(void)
static int static int
dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
{ {
pr_emerg("Kernel Offset: 0x%lx from 0x%lx " if (kaslr_enabled()) {
"(relocation range: 0x%lx-0x%lx)\n", pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n",
(unsigned long)&_text - __START_KERNEL, __START_KERNEL, (unsigned long)&_text - __START_KERNEL,
__START_KERNEL_map, MODULES_VADDR-1); __START_KERNEL,
__START_KERNEL_map,
MODULES_VADDR-1);
} else {
pr_emerg("Kernel Offset: disabled\n");
}
return 0; return 0;
} }

View File

@ -61,8 +61,7 @@
regs->seg = GET_SEG(seg) | 3; \ regs->seg = GET_SEG(seg) | 3; \
} while (0) } while (0)
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
unsigned long *pax)
{ {
void __user *buf; void __user *buf;
unsigned int tmpflags; unsigned int tmpflags;
@ -81,7 +80,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
#endif /* CONFIG_X86_32 */ #endif /* CONFIG_X86_32 */
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip); COPY(dx); COPY(cx); COPY(ip); COPY(ax);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
COPY(r8); COPY(r8);
@ -94,27 +93,20 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
COPY(r15); COPY(r15);
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_32
COPY_SEG_CPL3(cs); COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss); COPY_SEG_CPL3(ss);
#else /* !CONFIG_X86_32 */
/* Kernel saves and restores only the CS segment register on signals,
* which is the bare minimum needed to allow mixed 32/64-bit code.
* App's signal handler can save/restore other segments if needed. */
COPY_SEG_CPL3(cs);
#endif /* CONFIG_X86_32 */
get_user_ex(tmpflags, &sc->flags); get_user_ex(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */ regs->orig_ax = -1; /* disable syscall checks */
get_user_ex(buf, &sc->fpstate); get_user_ex(buf, &sc->fpstate);
get_user_ex(*pax, &sc->ax);
} get_user_catch(err); } get_user_catch(err);
err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
force_iret();
return err; return err;
} }
@ -162,8 +154,9 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
#else /* !CONFIG_X86_32 */ #else /* !CONFIG_X86_32 */
put_user_ex(regs->flags, &sc->flags); put_user_ex(regs->flags, &sc->flags);
put_user_ex(regs->cs, &sc->cs); put_user_ex(regs->cs, &sc->cs);
put_user_ex(0, &sc->gs); put_user_ex(0, &sc->__pad2);
put_user_ex(0, &sc->fs); put_user_ex(0, &sc->__pad1);
put_user_ex(regs->ss, &sc->ss);
#endif /* CONFIG_X86_32 */ #endif /* CONFIG_X86_32 */
put_user_ex(fpstate, &sc->fpstate); put_user_ex(fpstate, &sc->fpstate);
@ -457,9 +450,19 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
regs->sp = (unsigned long)frame; regs->sp = (unsigned long)frame;
/* Set up the CS register to run signal handlers in 64-bit mode, /*
even if the handler happens to be interrupting 32-bit code. */ * Set up the CS and SS registers to run signal handlers in
* 64-bit mode, even if the handler happens to be interrupting
* 32-bit or 16-bit code.
*
* SS is subtle. In 64-bit mode, we don't need any particular
* SS descriptor, but we do need SS to be valid. It's possible
* that the old SS is entirely bogus -- this can happen if the
* signal we're trying to deliver is #GP or #SS caused by a bad
* SS value.
*/
regs->cs = __USER_CS; regs->cs = __USER_CS;
regs->ss = __USER_DS;
return 0; return 0;
} }
@ -539,7 +542,6 @@ asmlinkage unsigned long sys_sigreturn(void)
{ {
struct pt_regs *regs = current_pt_regs(); struct pt_regs *regs = current_pt_regs();
struct sigframe __user *frame; struct sigframe __user *frame;
unsigned long ax;
sigset_t set; sigset_t set;
frame = (struct sigframe __user *)(regs->sp - 8); frame = (struct sigframe __user *)(regs->sp - 8);
@ -553,9 +555,9 @@ asmlinkage unsigned long sys_sigreturn(void)
set_current_blocked(&set); set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->sc, &ax)) if (restore_sigcontext(regs, &frame->sc))
goto badframe; goto badframe;
return ax; return regs->ax;
badframe: badframe:
signal_fault(regs, frame, "sigreturn"); signal_fault(regs, frame, "sigreturn");
@ -568,7 +570,6 @@ asmlinkage long sys_rt_sigreturn(void)
{ {
struct pt_regs *regs = current_pt_regs(); struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame; struct rt_sigframe __user *frame;
unsigned long ax;
sigset_t set; sigset_t set;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
@ -579,13 +580,13 @@ asmlinkage long sys_rt_sigreturn(void)
set_current_blocked(&set); set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe; goto badframe;
if (restore_altstack(&frame->uc.uc_stack)) if (restore_altstack(&frame->uc.uc_stack))
goto badframe; goto badframe;
return ax; return regs->ax;
badframe: badframe:
signal_fault(regs, frame, "rt_sigreturn"); signal_fault(regs, frame, "rt_sigreturn");
@ -780,7 +781,6 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs(); struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_x32 __user *frame; struct rt_sigframe_x32 __user *frame;
sigset_t set; sigset_t set;
unsigned long ax;
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
@ -791,13 +791,13 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
set_current_blocked(&set); set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe; goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack)) if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe; goto badframe;
return ax; return regs->ax;
badframe: badframe:
signal_fault(regs, frame, "x32 rt_sigreturn"); signal_fault(regs, frame, "x32 rt_sigreturn");

View File

@ -779,6 +779,26 @@ out:
return boot_error; return boot_error;
} }
void common_cpu_up(unsigned int cpu, struct task_struct *idle)
{
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
per_cpu(current_task, cpu) = idle;
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
per_cpu(cpu_current_top_of_stack, cpu) =
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
#else
clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
#endif
per_cpu(kernel_stack, cpu) =
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
}
/* /*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID. * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@ -796,23 +816,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
int cpu0_nmi_registered = 0; int cpu0_nmi_registered = 0;
unsigned long timeout; unsigned long timeout;
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
idle->thread.sp = (unsigned long) (((struct pt_regs *) idle->thread.sp = (unsigned long) (((struct pt_regs *)
(THREAD_SIZE + task_stack_page(idle))) - 1); (THREAD_SIZE + task_stack_page(idle))) - 1);
per_cpu(current_task, cpu) = idle;
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
#else
clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
#endif
per_cpu(kernel_stack, cpu) =
(unsigned long)task_stack_page(idle) -
KERNEL_STACK_OFFSET + THREAD_SIZE;
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary; initial_code = (unsigned long)start_secondary;
stack_start = idle->thread.sp; stack_start = idle->thread.sp;
@ -953,6 +959,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
/* the FPU context is blank, nobody can own it */ /* the FPU context is blank, nobody can own it */
__cpu_disable_lazy_restore(cpu); __cpu_disable_lazy_restore(cpu);
common_cpu_up(cpu, tidle);
err = do_boot_cpu(apicid, cpu, tidle); err = do_boot_cpu(apicid, cpu, tidle);
if (err) { if (err) {
pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);

View File

@ -5,21 +5,29 @@
#include <linux/cache.h> #include <linux/cache.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; #ifdef CONFIG_IA32_EMULATION
#define SYM(sym, compat) compat
#else
#define SYM(sym, compat) sym
#define ia32_sys_call_table sys_call_table
#define __NR_ia32_syscall_max __NR_syscall_max
#endif
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ;
#include <asm/syscalls_32.h> #include <asm/syscalls_32.h>
#undef __SYSCALL_I386 #undef __SYSCALL_I386
#define __SYSCALL_I386(nr, sym, compat) [nr] = sym, #define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
typedef asmlinkage void (*sys_call_ptr_t)(void); typedef asmlinkage void (*sys_call_ptr_t)(void);
extern asmlinkage void sys_ni_syscall(void); extern asmlinkage void sys_ni_syscall(void);
__visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { __visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
/* /*
* Smells like a compiler bug -- it doesn't work * Smells like a compiler bug -- it doesn't work
* when the & below is removed. * when the & below is removed.
*/ */
[0 ... __NR_syscall_max] = &sys_ni_syscall, [0 ... __NR_ia32_syscall_max] = &sys_ni_syscall,
#include <asm/syscalls_32.h> #include <asm/syscalls_32.h>
}; };

View File

@ -30,7 +30,7 @@ unsigned long profile_pc(struct pt_regs *regs)
{ {
unsigned long pc = instruction_pointer(regs); unsigned long pc = instruction_pointer(regs);
if (!user_mode_vm(regs) && in_lock_functions(pc)) { if (!user_mode(regs) && in_lock_functions(pc)) {
#ifdef CONFIG_FRAME_POINTER #ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->bp + sizeof(long)); return *(unsigned long *)(regs->bp + sizeof(long));
#else #else

View File

@ -112,7 +112,7 @@ enum ctx_state ist_enter(struct pt_regs *regs)
{ {
enum ctx_state prev_state; enum ctx_state prev_state;
if (user_mode_vm(regs)) { if (user_mode(regs)) {
/* Other than that, we're just an exception. */ /* Other than that, we're just an exception. */
prev_state = exception_enter(); prev_state = exception_enter();
} else { } else {
@ -146,7 +146,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
/* Must be before exception_exit. */ /* Must be before exception_exit. */
preempt_count_sub(HARDIRQ_OFFSET); preempt_count_sub(HARDIRQ_OFFSET);
if (user_mode_vm(regs)) if (user_mode(regs))
return exception_exit(prev_state); return exception_exit(prev_state);
else else
rcu_nmi_exit(); rcu_nmi_exit();
@ -158,7 +158,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
* *
* IST exception handlers normally cannot schedule. As a special * IST exception handlers normally cannot schedule. As a special
* exception, if the exception interrupted userspace code (i.e. * exception, if the exception interrupted userspace code (i.e.
* user_mode_vm(regs) would return true) and the exception was not * user_mode(regs) would return true) and the exception was not
* a double fault, it can be safe to schedule. ist_begin_non_atomic() * a double fault, it can be safe to schedule. ist_begin_non_atomic()
* begins a non-atomic section within an ist_enter()/ist_exit() region. * begins a non-atomic section within an ist_enter()/ist_exit() region.
* Callers are responsible for enabling interrupts themselves inside * Callers are responsible for enabling interrupts themselves inside
@ -167,15 +167,15 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
*/ */
void ist_begin_non_atomic(struct pt_regs *regs) void ist_begin_non_atomic(struct pt_regs *regs)
{ {
BUG_ON(!user_mode_vm(regs)); BUG_ON(!user_mode(regs));
/* /*
* Sanity check: we need to be on the normal thread stack. This * Sanity check: we need to be on the normal thread stack. This
* will catch asm bugs and any attempt to use ist_preempt_enable * will catch asm bugs and any attempt to use ist_preempt_enable
* from double_fault. * from double_fault.
*/ */
BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack)) BUG_ON((unsigned long)(current_top_of_stack() -
& ~(THREAD_SIZE - 1)) != 0); current_stack_pointer()) >= THREAD_SIZE);
preempt_count_sub(HARDIRQ_OFFSET); preempt_count_sub(HARDIRQ_OFFSET);
} }
@ -194,8 +194,7 @@ static nokprobe_inline int
do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
struct pt_regs *regs, long error_code) struct pt_regs *regs, long error_code)
{ {
#ifdef CONFIG_X86_32 if (v8086_mode(regs)) {
if (regs->flags & X86_VM_MASK) {
/* /*
* Traps 0, 1, 3, 4, and 5 should be forwarded to vm86. * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
* On nmi (interrupt 2), do_trap should not be called. * On nmi (interrupt 2), do_trap should not be called.
@ -207,7 +206,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
} }
return -1; return -1;
} }
#endif
if (!user_mode(regs)) { if (!user_mode(regs)) {
if (!fixup_exception(regs)) { if (!fixup_exception(regs)) {
tsk->thread.error_code = error_code; tsk->thread.error_code = error_code;
@ -384,7 +383,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
goto exit; goto exit;
conditional_sti(regs); conditional_sti(regs);
if (!user_mode_vm(regs)) if (!user_mode(regs))
die("bounds", regs, error_code); die("bounds", regs, error_code);
if (!cpu_feature_enabled(X86_FEATURE_MPX)) { if (!cpu_feature_enabled(X86_FEATURE_MPX)) {
@ -462,13 +461,11 @@ do_general_protection(struct pt_regs *regs, long error_code)
prev_state = exception_enter(); prev_state = exception_enter();
conditional_sti(regs); conditional_sti(regs);
#ifdef CONFIG_X86_32 if (v8086_mode(regs)) {
if (regs->flags & X86_VM_MASK) {
local_irq_enable(); local_irq_enable();
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
goto exit; goto exit;
} }
#endif
tsk = current; tsk = current;
if (!user_mode(regs)) { if (!user_mode(regs)) {
@ -587,7 +584,7 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
/* Copy the remainder of the stack from the current stack. */ /* Copy the remainder of the stack from the current stack. */
memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
BUG_ON(!user_mode_vm(&new_stack->regs)); BUG_ON(!user_mode(&new_stack->regs));
return new_stack; return new_stack;
} }
NOKPROBE_SYMBOL(fixup_bad_iret); NOKPROBE_SYMBOL(fixup_bad_iret);
@ -637,7 +634,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
* then it's very likely the result of an icebp/int01 trap. * then it's very likely the result of an icebp/int01 trap.
* User wants a sigtrap for that. * User wants a sigtrap for that.
*/ */
if (!dr6 && user_mode_vm(regs)) if (!dr6 && user_mode(regs))
user_icebp = 1; user_icebp = 1;
/* Catch kmemcheck conditions first of all! */ /* Catch kmemcheck conditions first of all! */
@ -673,7 +670,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
/* It's safe to allow irq's after DR6 has been saved */ /* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs); preempt_conditional_sti(regs);
if (regs->flags & X86_VM_MASK) { if (v8086_mode(regs)) {
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
X86_TRAP_DB); X86_TRAP_DB);
preempt_conditional_cli(regs); preempt_conditional_cli(regs);
@ -721,7 +718,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
return; return;
conditional_sti(regs); conditional_sti(regs);
if (!user_mode_vm(regs)) if (!user_mode(regs))
{ {
if (!fixup_exception(regs)) { if (!fixup_exception(regs)) {
task->thread.error_code = error_code; task->thread.error_code = error_code;
@ -925,9 +922,21 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
/* Set of traps needed for early debugging. */ /* Set of traps needed for early debugging. */
void __init early_trap_init(void) void __init early_trap_init(void)
{ {
set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); /*
* Don't use IST to set DEBUG_STACK as it doesn't work until TSS
* is ready in cpu_init() <-- trap_init(). Before trap_init(),
* CPU runs at ring 0 so it is impossible to hit an invalid
* stack. Using the original stack works well enough at this
* early stage. DEBUG_STACK will be equipped after cpu_init() in
* trap_init().
*
* We don't need to set trace_idt_table like set_intr_gate(),
* since we don't have trace_debug and it will be reset to
* 'debug' in trap_init() by set_intr_gate_ist().
*/
set_intr_gate_notrace(X86_TRAP_DB, debug);
/* int3 can be called from all */ /* int3 can be called from all */
set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); set_system_intr_gate(X86_TRAP_BP, &int3);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
set_intr_gate(X86_TRAP_PF, page_fault); set_intr_gate(X86_TRAP_PF, page_fault);
#endif #endif
@ -1005,6 +1014,15 @@ void __init trap_init(void)
*/ */
cpu_init(); cpu_init();
/*
* X86_TRAP_DB and X86_TRAP_BP have been set
* in early_trap_init(). However, ITS works only after
* cpu_init() loads TSS. See comments in early_trap_init().
*/
set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
/* int3 can be called from all */
set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
x86_init.irqs.trap_init(); x86_init.irqs.trap_init();
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64

View File

@ -912,7 +912,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
int ret = NOTIFY_DONE; int ret = NOTIFY_DONE;
/* We are only interested in userspace traps */ /* We are only interested in userspace traps */
if (regs && !user_mode_vm(regs)) if (regs && !user_mode(regs))
return NOTIFY_DONE; return NOTIFY_DONE;
switch (val) { switch (val) {

View File

@ -150,7 +150,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
do_exit(SIGSEGV); do_exit(SIGSEGV);
} }
tss = &per_cpu(init_tss, get_cpu()); tss = &per_cpu(cpu_tss, get_cpu());
current->thread.sp0 = current->thread.saved_sp0; current->thread.sp0 = current->thread.saved_sp0;
current->thread.sysenter_cs = __KERNEL_CS; current->thread.sysenter_cs = __KERNEL_CS;
load_sp0(tss, &current->thread); load_sp0(tss, &current->thread);
@ -318,7 +318,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
tsk->thread.saved_fs = info->regs32->fs; tsk->thread.saved_fs = info->regs32->fs;
tsk->thread.saved_gs = get_user_gs(info->regs32); tsk->thread.saved_gs = get_user_gs(info->regs32);
tss = &per_cpu(init_tss, get_cpu()); tss = &per_cpu(cpu_tss, get_cpu());
tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
if (cpu_has_sep) if (cpu_has_sep)
tsk->thread.sysenter_cs = 0; tsk->thread.sysenter_cs = 0;

View File

@ -868,7 +868,8 @@ static void __init lguest_init_IRQ(void)
/* Some systems map "vectors" to interrupts weirdly. Not us! */ /* Some systems map "vectors" to interrupts weirdly. Not us! */
__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR); __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
if (i != SYSCALL_VECTOR) if (i != SYSCALL_VECTOR)
set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); set_intr_gate(i, irq_entries_start +
8 * (i - FIRST_EXTERNAL_VECTOR));
} }
/* /*
@ -1076,6 +1077,7 @@ static void lguest_load_sp0(struct tss_struct *tss,
{ {
lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0, lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0,
THREAD_SIZE / PAGE_SIZE); THREAD_SIZE / PAGE_SIZE);
tss->x86_tss.sp0 = thread->sp0;
} }
/* Let's just say, I wouldn't do debugging under a Guest. */ /* Let's just say, I wouldn't do debugging under a Guest. */

View File

@ -13,16 +13,6 @@
#include <asm/alternative-asm.h> #include <asm/alternative-asm.h>
#include <asm/dwarf2.h> #include <asm/dwarf2.h>
.macro SAVE reg
pushl_cfi %\reg
CFI_REL_OFFSET \reg, 0
.endm
.macro RESTORE reg
popl_cfi %\reg
CFI_RESTORE \reg
.endm
.macro read64 reg .macro read64 reg
movl %ebx, %eax movl %ebx, %eax
movl %ecx, %edx movl %ecx, %edx
@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8)
.macro addsub_return func ins insc .macro addsub_return func ins insc
ENTRY(atomic64_\func\()_return_cx8) ENTRY(atomic64_\func\()_return_cx8)
CFI_STARTPROC CFI_STARTPROC
SAVE ebp pushl_cfi_reg ebp
SAVE ebx pushl_cfi_reg ebx
SAVE esi pushl_cfi_reg esi
SAVE edi pushl_cfi_reg edi
movl %eax, %esi movl %eax, %esi
movl %edx, %edi movl %edx, %edi
@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8)
10: 10:
movl %ebx, %eax movl %ebx, %eax
movl %ecx, %edx movl %ecx, %edx
RESTORE edi popl_cfi_reg edi
RESTORE esi popl_cfi_reg esi
RESTORE ebx popl_cfi_reg ebx
RESTORE ebp popl_cfi_reg ebp
ret ret
CFI_ENDPROC CFI_ENDPROC
ENDPROC(atomic64_\func\()_return_cx8) ENDPROC(atomic64_\func\()_return_cx8)
@ -104,7 +94,7 @@ addsub_return sub sub sbb
.macro incdec_return func ins insc .macro incdec_return func ins insc
ENTRY(atomic64_\func\()_return_cx8) ENTRY(atomic64_\func\()_return_cx8)
CFI_STARTPROC CFI_STARTPROC
SAVE ebx pushl_cfi_reg ebx
read64 %esi read64 %esi
1: 1:
@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8)
10: 10:
movl %ebx, %eax movl %ebx, %eax
movl %ecx, %edx movl %ecx, %edx
RESTORE ebx popl_cfi_reg ebx
ret ret
CFI_ENDPROC CFI_ENDPROC
ENDPROC(atomic64_\func\()_return_cx8) ENDPROC(atomic64_\func\()_return_cx8)
@ -130,7 +120,7 @@ incdec_return dec sub sbb
ENTRY(atomic64_dec_if_positive_cx8) ENTRY(atomic64_dec_if_positive_cx8)
CFI_STARTPROC CFI_STARTPROC
SAVE ebx pushl_cfi_reg ebx
read64 %esi read64 %esi
1: 1:
@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8)
2: 2:
movl %ebx, %eax movl %ebx, %eax
movl %ecx, %edx movl %ecx, %edx
RESTORE ebx popl_cfi_reg ebx
ret ret
CFI_ENDPROC CFI_ENDPROC
ENDPROC(atomic64_dec_if_positive_cx8) ENDPROC(atomic64_dec_if_positive_cx8)
ENTRY(atomic64_add_unless_cx8) ENTRY(atomic64_add_unless_cx8)
CFI_STARTPROC CFI_STARTPROC
SAVE ebp pushl_cfi_reg ebp
SAVE ebx pushl_cfi_reg ebx
/* these just push these two parameters on the stack */ /* these just push these two parameters on the stack */
SAVE edi pushl_cfi_reg edi
SAVE ecx pushl_cfi_reg ecx
movl %eax, %ebp movl %eax, %ebp
movl %edx, %edi movl %edx, %edi
@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8)
3: 3:
addl $8, %esp addl $8, %esp
CFI_ADJUST_CFA_OFFSET -8 CFI_ADJUST_CFA_OFFSET -8
RESTORE ebx popl_cfi_reg ebx
RESTORE ebp popl_cfi_reg ebp
ret ret
4: 4:
cmpl %edx, 4(%esp) cmpl %edx, 4(%esp)
@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8)
ENTRY(atomic64_inc_not_zero_cx8) ENTRY(atomic64_inc_not_zero_cx8)
CFI_STARTPROC CFI_STARTPROC
SAVE ebx pushl_cfi_reg ebx
read64 %esi read64 %esi
1: 1:
@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8)
movl $1, %eax movl $1, %eax
3: 3:
RESTORE ebx popl_cfi_reg ebx
ret ret
CFI_ENDPROC CFI_ENDPROC
ENDPROC(atomic64_inc_not_zero_cx8) ENDPROC(atomic64_inc_not_zero_cx8)

View File

@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
*/ */
ENTRY(csum_partial) ENTRY(csum_partial)
CFI_STARTPROC CFI_STARTPROC
pushl_cfi %esi pushl_cfi_reg esi
CFI_REL_OFFSET esi, 0 pushl_cfi_reg ebx
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
movl 20(%esp),%eax # Function arg: unsigned int sum movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: unsigned char *buff movl 12(%esp),%esi # Function arg: unsigned char *buff
@ -127,14 +125,12 @@ ENTRY(csum_partial)
6: addl %ecx,%eax 6: addl %ecx,%eax
adcl $0, %eax adcl $0, %eax
7: 7:
testl $1, 12(%esp) testb $1, 12(%esp)
jz 8f jz 8f
roll $8, %eax roll $8, %eax
8: 8:
popl_cfi %ebx popl_cfi_reg ebx
CFI_RESTORE ebx popl_cfi_reg esi
popl_cfi %esi
CFI_RESTORE esi
ret ret
CFI_ENDPROC CFI_ENDPROC
ENDPROC(csum_partial) ENDPROC(csum_partial)
@ -145,10 +141,8 @@ ENDPROC(csum_partial)
ENTRY(csum_partial) ENTRY(csum_partial)
CFI_STARTPROC CFI_STARTPROC
pushl_cfi %esi pushl_cfi_reg esi
CFI_REL_OFFSET esi, 0 pushl_cfi_reg ebx
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
movl 20(%esp),%eax # Function arg: unsigned int sum movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: const unsigned char *buf movl 12(%esp),%esi # Function arg: const unsigned char *buf
@ -251,14 +245,12 @@ ENTRY(csum_partial)
addl %ebx,%eax addl %ebx,%eax
adcl $0,%eax adcl $0,%eax
80: 80:
testl $1, 12(%esp) testb $1, 12(%esp)
jz 90f jz 90f
roll $8, %eax roll $8, %eax
90: 90:
popl_cfi %ebx popl_cfi_reg ebx
CFI_RESTORE ebx popl_cfi_reg esi
popl_cfi %esi
CFI_RESTORE esi
ret ret
CFI_ENDPROC CFI_ENDPROC
ENDPROC(csum_partial) ENDPROC(csum_partial)
@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic)
CFI_STARTPROC CFI_STARTPROC
subl $4,%esp subl $4,%esp
CFI_ADJUST_CFA_OFFSET 4 CFI_ADJUST_CFA_OFFSET 4
pushl_cfi %edi pushl_cfi_reg edi
CFI_REL_OFFSET edi, 0 pushl_cfi_reg esi
pushl_cfi %esi pushl_cfi_reg ebx
CFI_REL_OFFSET esi, 0
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
movl ARGBASE+16(%esp),%eax # sum movl ARGBASE+16(%esp),%eax # sum
movl ARGBASE+12(%esp),%ecx # len movl ARGBASE+12(%esp),%ecx # len
movl ARGBASE+4(%esp),%esi # src movl ARGBASE+4(%esp),%esi # src
@ -412,12 +401,9 @@ DST( movb %cl, (%edi) )
.previous .previous
popl_cfi %ebx popl_cfi_reg ebx
CFI_RESTORE ebx popl_cfi_reg esi
popl_cfi %esi popl_cfi_reg edi
CFI_RESTORE esi
popl_cfi %edi
CFI_RESTORE edi
popl_cfi %ecx # equivalent to addl $4,%esp popl_cfi %ecx # equivalent to addl $4,%esp
ret ret
CFI_ENDPROC CFI_ENDPROC
@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic)
ENTRY(csum_partial_copy_generic) ENTRY(csum_partial_copy_generic)
CFI_STARTPROC CFI_STARTPROC
pushl_cfi %ebx pushl_cfi_reg ebx
CFI_REL_OFFSET ebx, 0 pushl_cfi_reg edi
pushl_cfi %edi pushl_cfi_reg esi
CFI_REL_OFFSET edi, 0
pushl_cfi %esi
CFI_REL_OFFSET esi, 0
movl ARGBASE+4(%esp),%esi #src movl ARGBASE+4(%esp),%esi #src
movl ARGBASE+8(%esp),%edi #dst movl ARGBASE+8(%esp),%edi #dst
movl ARGBASE+12(%esp),%ecx #len movl ARGBASE+12(%esp),%ecx #len
@ -506,12 +489,9 @@ DST( movb %dl, (%edi) )
jmp 7b jmp 7b
.previous .previous
popl_cfi %esi popl_cfi_reg esi
CFI_RESTORE esi popl_cfi_reg edi
popl_cfi %edi popl_cfi_reg ebx
CFI_RESTORE edi
popl_cfi %ebx
CFI_RESTORE ebx
ret ret
CFI_ENDPROC CFI_ENDPROC
ENDPROC(csum_partial_copy_generic) ENDPROC(csum_partial_copy_generic)

View File

@ -1,31 +1,35 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/dwarf2.h> #include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h> #include <asm/alternative-asm.h>
/* /*
* Zero a page. * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
* rdi page * recommended to use this when possible and we do use them by default.
*/ * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
ENTRY(clear_page_c) * Otherwise, use original.
*/
/*
* Zero a page.
* %rdi - page
*/
ENTRY(clear_page)
CFI_STARTPROC CFI_STARTPROC
ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \
"jmp clear_page_c_e", X86_FEATURE_ERMS
movl $4096/8,%ecx movl $4096/8,%ecx
xorl %eax,%eax xorl %eax,%eax
rep stosq rep stosq
ret ret
CFI_ENDPROC CFI_ENDPROC
ENDPROC(clear_page_c) ENDPROC(clear_page)
ENTRY(clear_page_c_e) ENTRY(clear_page_orig)
CFI_STARTPROC CFI_STARTPROC
movl $4096,%ecx
xorl %eax,%eax
rep stosb
ret
CFI_ENDPROC
ENDPROC(clear_page_c_e)
ENTRY(clear_page)
CFI_STARTPROC
xorl %eax,%eax xorl %eax,%eax
movl $4096/64,%ecx movl $4096/64,%ecx
.p2align 4 .p2align 4
@ -45,29 +49,13 @@ ENTRY(clear_page)
nop nop
ret ret
CFI_ENDPROC CFI_ENDPROC
.Lclear_page_end: ENDPROC(clear_page_orig)
ENDPROC(clear_page)
/* ENTRY(clear_page_c_e)
* Some CPUs support enhanced REP MOVSB/STOSB instructions. CFI_STARTPROC
* It is recommended to use this when possible. movl $4096,%ecx
* If enhanced REP MOVSB/STOSB is not available, try to use fast string. xorl %eax,%eax
* Otherwise, use original function. rep stosb
* ret
*/ CFI_ENDPROC
ENDPROC(clear_page_c_e)
#include <asm/cpufeature.h>
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
2: .byte 0xeb /* jmp <disp8> */
.byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */
3:
.previous
.section .altinstructions,"a"
altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
.Lclear_page_end-clear_page, 2b-1b
altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \
.Lclear_page_end-clear_page,3b-2b
.previous

View File

@ -2,23 +2,26 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/dwarf2.h> #include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h> #include <asm/alternative-asm.h>
/*
* Some CPUs run faster using the string copy instructions (sane microcode).
* It is also a lot simpler. Use this when possible. But, don't use streaming
* copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
* prefetch distance based on SMP/UP.
*/
ALIGN ALIGN
copy_page_rep: ENTRY(copy_page)
CFI_STARTPROC CFI_STARTPROC
ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
movl $4096/8, %ecx movl $4096/8, %ecx
rep movsq rep movsq
ret ret
CFI_ENDPROC CFI_ENDPROC
ENDPROC(copy_page_rep) ENDPROC(copy_page)
/* ENTRY(copy_page_regs)
* Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
* Could vary the prefetch distance based on SMP/UP.
*/
ENTRY(copy_page)
CFI_STARTPROC CFI_STARTPROC
subq $2*8, %rsp subq $2*8, %rsp
CFI_ADJUST_CFA_OFFSET 2*8 CFI_ADJUST_CFA_OFFSET 2*8
@ -90,21 +93,5 @@ ENTRY(copy_page)
addq $2*8, %rsp addq $2*8, %rsp
CFI_ADJUST_CFA_OFFSET -2*8 CFI_ADJUST_CFA_OFFSET -2*8
ret ret
.Lcopy_page_end:
CFI_ENDPROC CFI_ENDPROC
ENDPROC(copy_page) ENDPROC(copy_page_regs)
/* Some CPUs run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \
.Lcopy_page_end-copy_page, 2b-1b
.previous

View File

@ -8,9 +8,6 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/dwarf2.h> #include <asm/dwarf2.h>
#define FIX_ALIGNMENT 1
#include <asm/current.h> #include <asm/current.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
@ -19,33 +16,7 @@
#include <asm/asm.h> #include <asm/asm.h>
#include <asm/smap.h> #include <asm/smap.h>
/*
* By placing feature2 after feature1 in altinstructions section, we logically
* implement:
* If CPU has feature2, jmp to alt2 is used
* else if CPU has feature1, jmp to alt1 is used
* else jmp to orig is used.
*/
.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
0:
.byte 0xe9 /* 32bit jump */
.long \orig-1f /* by default jump to orig */
1:
.section .altinstr_replacement,"ax"
2: .byte 0xe9 /* near jump with 32bit immediate */
.long \alt1-1b /* offset */ /* or alternatively to alt1 */
3: .byte 0xe9 /* near jump with 32bit immediate */
.long \alt2-1b /* offset */ /* or alternatively to alt2 */
.previous
.section .altinstructions,"a"
altinstruction_entry 0b,2b,\feature1,5,5
altinstruction_entry 0b,3b,\feature2,5,5
.previous
.endm
.macro ALIGN_DESTINATION .macro ALIGN_DESTINATION
#ifdef FIX_ALIGNMENT
/* check for bad alignment of destination */ /* check for bad alignment of destination */
movl %edi,%ecx movl %edi,%ecx
andl $7,%ecx andl $7,%ecx
@ -67,7 +38,6 @@
_ASM_EXTABLE(100b,103b) _ASM_EXTABLE(100b,103b)
_ASM_EXTABLE(101b,103b) _ASM_EXTABLE(101b,103b)
#endif
.endm .endm
/* Standard copy_to_user with segment limit checking */ /* Standard copy_to_user with segment limit checking */
@ -79,9 +49,11 @@ ENTRY(_copy_to_user)
jc bad_to_user jc bad_to_user
cmpq TI_addr_limit(%rax),%rcx cmpq TI_addr_limit(%rax),%rcx
ja bad_to_user ja bad_to_user
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
copy_user_generic_unrolled,copy_user_generic_string, \ "jmp copy_user_generic_string", \
copy_user_enhanced_fast_string X86_FEATURE_REP_GOOD, \
"jmp copy_user_enhanced_fast_string", \
X86_FEATURE_ERMS
CFI_ENDPROC CFI_ENDPROC
ENDPROC(_copy_to_user) ENDPROC(_copy_to_user)
@ -94,9 +66,11 @@ ENTRY(_copy_from_user)
jc bad_from_user jc bad_from_user
cmpq TI_addr_limit(%rax),%rcx cmpq TI_addr_limit(%rax),%rcx
ja bad_from_user ja bad_from_user
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
copy_user_generic_unrolled,copy_user_generic_string, \ "jmp copy_user_generic_string", \
copy_user_enhanced_fast_string X86_FEATURE_REP_GOOD, \
"jmp copy_user_enhanced_fast_string", \
X86_FEATURE_ERMS
CFI_ENDPROC CFI_ENDPROC
ENDPROC(_copy_from_user) ENDPROC(_copy_from_user)

View File

@ -196,7 +196,7 @@ ENTRY(csum_partial_copy_generic)
/* handle last odd byte */ /* handle last odd byte */
.Lhandle_1: .Lhandle_1:
testl $1, %r10d testb $1, %r10b
jz .Lende jz .Lende
xorl %ebx, %ebx xorl %ebx, %ebx
source source

View File

@ -52,6 +52,13 @@
*/ */
void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
{ {
/*
* Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
* even if the input buffer is long enough to hold them.
*/
if (buf_len > MAX_INSN_SIZE)
buf_len = MAX_INSN_SIZE;
memset(insn, 0, sizeof(*insn)); memset(insn, 0, sizeof(*insn));
insn->kaddr = kaddr; insn->kaddr = kaddr;
insn->end_kaddr = kaddr + buf_len; insn->end_kaddr = kaddr + buf_len;
@ -164,6 +171,12 @@ found:
/* VEX.W overrides opnd_size */ /* VEX.W overrides opnd_size */
insn->opnd_bytes = 8; insn->opnd_bytes = 8;
} else { } else {
/*
* For VEX2, fake VEX3-like byte#2.
* Makes it easier to decode vex.W, vex.vvvv,
* vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
*/
insn->vex_prefix.bytes[2] = b2 & 0x7f;
insn->vex_prefix.nbytes = 2; insn->vex_prefix.nbytes = 2;
insn->next_byte += 2; insn->next_byte += 2;
} }

View File

@ -1,11 +1,19 @@
/* Copyright 2002 Andi Kleen */ /* Copyright 2002 Andi Kleen */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/dwarf2.h> #include <asm/dwarf2.h>
#include <asm/alternative-asm.h> #include <asm/alternative-asm.h>
/*
* We build a jump to memcpy_orig by default which gets NOPped out on
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
*/
.weak memcpy
/* /*
* memcpy - Copy a memory block. * memcpy - Copy a memory block.
* *
@ -17,15 +25,11 @@
* Output: * Output:
* rax original destination * rax original destination
*/ */
ENTRY(__memcpy)
ENTRY(memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
/*
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
*
* This gets patched over the unrolled variant (below) via the
* alternative instructions framework:
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c:
movq %rdi, %rax movq %rdi, %rax
movq %rdx, %rcx movq %rdx, %rcx
shrq $3, %rcx shrq $3, %rcx
@ -34,29 +38,21 @@
movl %edx, %ecx movl %edx, %ecx
rep movsb rep movsb
ret ret
.Lmemcpy_e: ENDPROC(memcpy)
.previous ENDPROC(__memcpy)
/* /*
* memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than * memcpy_erms() - enhanced fast string memcpy. This is faster and
* memcpy_c. Use memcpy_c_e when possible. * simpler than memcpy. Use memcpy_erms when possible.
*
* This gets patched over the unrolled variant (below) via the
* alternative instructions framework:
*/ */
.section .altinstr_replacement, "ax", @progbits ENTRY(memcpy_erms)
.Lmemcpy_c_e:
movq %rdi, %rax movq %rdi, %rax
movq %rdx, %rcx movq %rdx, %rcx
rep movsb rep movsb
ret ret
.Lmemcpy_e_e: ENDPROC(memcpy_erms)
.previous
.weak memcpy ENTRY(memcpy_orig)
ENTRY(__memcpy)
ENTRY(memcpy)
CFI_STARTPROC CFI_STARTPROC
movq %rdi, %rax movq %rdi, %rax
@ -183,26 +179,4 @@ ENTRY(memcpy)
.Lend: .Lend:
retq retq
CFI_ENDPROC CFI_ENDPROC
ENDPROC(memcpy) ENDPROC(memcpy_orig)
ENDPROC(__memcpy)
/*
* Some CPUs are adding enhanced REP MOVSB/STOSB feature
* If the feature is supported, memcpy_c_e() is the first choice.
* If enhanced rep movsb copy is not available, use fast string copy
* memcpy_c() when possible. This is faster and code is simpler than
* original memcpy().
* Otherwise, original memcpy() is used.
* In .altinstructions section, ERMS feature is placed after REG_GOOD
* feature to implement the right patch order.
*
* Replace only beginning, memcpy is used to apply alternatives,
* so it is silly to overwrite itself with nops - reboot is the
* only outcome...
*/
.section .altinstructions, "a"
altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
.Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
.Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
.previous

View File

@ -5,7 +5,6 @@
* This assembly file is re-written from memmove_64.c file. * This assembly file is re-written from memmove_64.c file.
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
*/ */
#define _STRING_C
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/dwarf2.h> #include <asm/dwarf2.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
@ -44,6 +43,8 @@ ENTRY(__memmove)
jg 2f jg 2f
.Lmemmove_begin_forward: .Lmemmove_begin_forward:
ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
/* /*
* movsq instruction have many startup latency * movsq instruction have many startup latency
* so we handle small size by general register. * so we handle small size by general register.
@ -207,21 +208,5 @@ ENTRY(__memmove)
13: 13:
retq retq
CFI_ENDPROC CFI_ENDPROC
.section .altinstr_replacement,"ax"
.Lmemmove_begin_forward_efs:
/* Forward moving data. */
movq %rdx, %rcx
rep movsb
retq
.Lmemmove_end_forward_efs:
.previous
.section .altinstructions,"a"
altinstruction_entry .Lmemmove_begin_forward, \
.Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \
.Lmemmove_end_forward-.Lmemmove_begin_forward, \
.Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
.previous
ENDPROC(__memmove) ENDPROC(__memmove)
ENDPROC(memmove) ENDPROC(memmove)

View File

@ -5,19 +5,30 @@
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/alternative-asm.h> #include <asm/alternative-asm.h>
.weak memset
/* /*
* ISO C memset - set a memory block to a byte value. This function uses fast * ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is * string to get better performance than the original function. The code is
* simpler and shorter than the orignal function as well. * simpler and shorter than the orignal function as well.
* *
* rdi destination * rdi destination
* rsi value (char) * rsi value (char)
* rdx count (bytes) * rdx count (bytes)
* *
* rax original destination * rax original destination
*/ */
.section .altinstr_replacement, "ax", @progbits ENTRY(memset)
.Lmemset_c: ENTRY(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
* to use it when possible. If not available, use fast string instructions.
*
* Otherwise, use original memset function.
*/
ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memset_erms", X86_FEATURE_ERMS
movq %rdi,%r9 movq %rdi,%r9
movq %rdx,%rcx movq %rdx,%rcx
andl $7,%edx andl $7,%edx
@ -31,8 +42,8 @@
rep stosb rep stosb
movq %r9,%rax movq %r9,%rax
ret ret
.Lmemset_e: ENDPROC(memset)
.previous ENDPROC(__memset)
/* /*
* ISO C memset - set a memory block to a byte value. This function uses * ISO C memset - set a memory block to a byte value. This function uses
@ -45,21 +56,16 @@
* *
* rax original destination * rax original destination
*/ */
.section .altinstr_replacement, "ax", @progbits ENTRY(memset_erms)
.Lmemset_c_e:
movq %rdi,%r9 movq %rdi,%r9
movb %sil,%al movb %sil,%al
movq %rdx,%rcx movq %rdx,%rcx
rep stosb rep stosb
movq %r9,%rax movq %r9,%rax
ret ret
.Lmemset_e_e: ENDPROC(memset_erms)
.previous
.weak memset ENTRY(memset_orig)
ENTRY(memset)
ENTRY(__memset)
CFI_STARTPROC CFI_STARTPROC
movq %rdi,%r10 movq %rdi,%r10
@ -134,23 +140,4 @@ ENTRY(__memset)
jmp .Lafter_bad_alignment jmp .Lafter_bad_alignment
.Lfinal: .Lfinal:
CFI_ENDPROC CFI_ENDPROC
ENDPROC(memset) ENDPROC(memset_orig)
ENDPROC(__memset)
/* Some CPUs support enhanced REP MOVSB/STOSB feature.
* It is recommended to use this when possible.
*
* If enhanced REP MOVSB/STOSB feature is not available, use fast string
* instructions.
*
* Otherwise, use original memset function.
*
* In .altinstructions section, ERMS feature is placed after REG_GOOD
* feature to implement the right patch order.
*/
.section .altinstructions,"a"
altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
.Lfinal-__memset,.Lmemset_e-.Lmemset_c
altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
.Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e
.previous

View File

@ -14,8 +14,8 @@
.macro op_safe_regs op .macro op_safe_regs op
ENTRY(\op\()_safe_regs) ENTRY(\op\()_safe_regs)
CFI_STARTPROC CFI_STARTPROC
pushq_cfi %rbx pushq_cfi_reg rbx
pushq_cfi %rbp pushq_cfi_reg rbp
movq %rdi, %r10 /* Save pointer */ movq %rdi, %r10 /* Save pointer */
xorl %r11d, %r11d /* Return value */ xorl %r11d, %r11d /* Return value */
movl (%rdi), %eax movl (%rdi), %eax
@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs)
movl %ebp, 20(%r10) movl %ebp, 20(%r10)
movl %esi, 24(%r10) movl %esi, 24(%r10)
movl %edi, 28(%r10) movl %edi, 28(%r10)
popq_cfi %rbp popq_cfi_reg rbp
popq_cfi %rbx popq_cfi_reg rbx
ret ret
3: 3:
CFI_RESTORE_STATE CFI_RESTORE_STATE
@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs)
.macro op_safe_regs op .macro op_safe_regs op
ENTRY(\op\()_safe_regs) ENTRY(\op\()_safe_regs)
CFI_STARTPROC CFI_STARTPROC
pushl_cfi %ebx pushl_cfi_reg ebx
pushl_cfi %ebp pushl_cfi_reg ebp
pushl_cfi %esi pushl_cfi_reg esi
pushl_cfi %edi pushl_cfi_reg edi
pushl_cfi $0 /* Return value */ pushl_cfi $0 /* Return value */
pushl_cfi %eax pushl_cfi %eax
movl 4(%eax), %ecx movl 4(%eax), %ecx
@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs)
movl %esi, 24(%eax) movl %esi, 24(%eax)
movl %edi, 28(%eax) movl %edi, 28(%eax)
popl_cfi %eax popl_cfi %eax
popl_cfi %edi popl_cfi_reg edi
popl_cfi %esi popl_cfi_reg esi
popl_cfi %ebp popl_cfi_reg ebp
popl_cfi %ebx popl_cfi_reg ebx
ret ret
3: 3:
CFI_RESTORE_STATE CFI_RESTORE_STATE

View File

@ -34,10 +34,10 @@
*/ */
#define save_common_regs \ #define save_common_regs \
pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0 pushl_cfi_reg ecx
#define restore_common_regs \ #define restore_common_regs \
popl_cfi %ecx; CFI_RESTORE ecx popl_cfi_reg ecx
/* Avoid uglifying the argument copying x86-64 needs to do. */ /* Avoid uglifying the argument copying x86-64 needs to do. */
.macro movq src, dst .macro movq src, dst
@ -64,22 +64,22 @@
*/ */
#define save_common_regs \ #define save_common_regs \
pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \ pushq_cfi_reg rdi; \
pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \ pushq_cfi_reg rsi; \
pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \ pushq_cfi_reg rcx; \
pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \ pushq_cfi_reg r8; \
pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \ pushq_cfi_reg r9; \
pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \ pushq_cfi_reg r10; \
pushq_cfi %r11; CFI_REL_OFFSET r11, 0 pushq_cfi_reg r11
#define restore_common_regs \ #define restore_common_regs \
popq_cfi %r11; CFI_RESTORE r11; \ popq_cfi_reg r11; \
popq_cfi %r10; CFI_RESTORE r10; \ popq_cfi_reg r10; \
popq_cfi %r9; CFI_RESTORE r9; \ popq_cfi_reg r9; \
popq_cfi %r8; CFI_RESTORE r8; \ popq_cfi_reg r8; \
popq_cfi %rcx; CFI_RESTORE rcx; \ popq_cfi_reg rcx; \
popq_cfi %rsi; CFI_RESTORE rsi; \ popq_cfi_reg rsi; \
popq_cfi %rdi; CFI_RESTORE rdi popq_cfi_reg rdi
#endif #endif
@ -87,12 +87,10 @@
ENTRY(call_rwsem_down_read_failed) ENTRY(call_rwsem_down_read_failed)
CFI_STARTPROC CFI_STARTPROC
save_common_regs save_common_regs
__ASM_SIZE(push,_cfi) %__ASM_REG(dx) __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
CFI_REL_OFFSET __ASM_REG(dx), 0
movq %rax,%rdi movq %rax,%rdi
call rwsem_down_read_failed call rwsem_down_read_failed
__ASM_SIZE(pop,_cfi) %__ASM_REG(dx) __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
CFI_RESTORE __ASM_REG(dx)
restore_common_regs restore_common_regs
ret ret
CFI_ENDPROC CFI_ENDPROC
@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake)
ENTRY(call_rwsem_downgrade_wake) ENTRY(call_rwsem_downgrade_wake)
CFI_STARTPROC CFI_STARTPROC
save_common_regs save_common_regs
__ASM_SIZE(push,_cfi) %__ASM_REG(dx) __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
CFI_REL_OFFSET __ASM_REG(dx), 0
movq %rax,%rdi movq %rax,%rdi
call rwsem_downgrade_wake call rwsem_downgrade_wake
__ASM_SIZE(pop,_cfi) %__ASM_REG(dx) __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
CFI_RESTORE __ASM_REG(dx)
restore_common_regs restore_common_regs
ret ret
CFI_ENDPROC CFI_ENDPROC

View File

@ -13,12 +13,9 @@
.globl \name .globl \name
\name: \name:
CFI_STARTPROC CFI_STARTPROC
pushl_cfi %eax pushl_cfi_reg eax
CFI_REL_OFFSET eax, 0 pushl_cfi_reg ecx
pushl_cfi %ecx pushl_cfi_reg edx
CFI_REL_OFFSET ecx, 0
pushl_cfi %edx
CFI_REL_OFFSET edx, 0
.if \put_ret_addr_in_eax .if \put_ret_addr_in_eax
/* Place EIP in the arg1 */ /* Place EIP in the arg1 */
@ -26,12 +23,9 @@
.endif .endif
call \func call \func
popl_cfi %edx popl_cfi_reg edx
CFI_RESTORE edx popl_cfi_reg ecx
popl_cfi %ecx popl_cfi_reg eax
CFI_RESTORE ecx
popl_cfi %eax
CFI_RESTORE eax
ret ret
CFI_ENDPROC CFI_ENDPROC
_ASM_NOKPROBE(\name) _ASM_NOKPROBE(\name)

View File

@ -17,9 +17,18 @@
CFI_STARTPROC CFI_STARTPROC
/* this one pushes 9 elems, the next one would be %rIP */ /* this one pushes 9 elems, the next one would be %rIP */
SAVE_ARGS pushq_cfi_reg rdi
pushq_cfi_reg rsi
pushq_cfi_reg rdx
pushq_cfi_reg rcx
pushq_cfi_reg rax
pushq_cfi_reg r8
pushq_cfi_reg r9
pushq_cfi_reg r10
pushq_cfi_reg r11
.if \put_ret_addr_in_rdi .if \put_ret_addr_in_rdi
/* 9*8(%rsp) is return addr on stack */
movq_cfi_restore 9*8, rdi movq_cfi_restore 9*8, rdi
.endif .endif
@ -45,11 +54,22 @@
#endif #endif
#endif #endif
/* SAVE_ARGS below is used only for the .cfi directives it contains. */ #if defined(CONFIG_TRACE_IRQFLAGS) \
|| defined(CONFIG_DEBUG_LOCK_ALLOC) \
|| defined(CONFIG_PREEMPT)
CFI_STARTPROC CFI_STARTPROC
SAVE_ARGS CFI_ADJUST_CFA_OFFSET 9*8
restore: restore:
RESTORE_ARGS popq_cfi_reg r11
popq_cfi_reg r10
popq_cfi_reg r9
popq_cfi_reg r8
popq_cfi_reg rax
popq_cfi_reg rcx
popq_cfi_reg rdx
popq_cfi_reg rsi
popq_cfi_reg rdi
ret ret
CFI_ENDPROC CFI_ENDPROC
_ASM_NOKPROBE(restore) _ASM_NOKPROBE(restore)
#endif

View File

@ -273,6 +273,9 @@ dd: ESC
de: ESC de: ESC
df: ESC df: ESC
# 0xe0 - 0xef # 0xe0 - 0xef
# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
e0: LOOPNE/LOOPNZ Jb (f64) e0: LOOPNE/LOOPNZ Jb (f64)
e1: LOOPE/LOOPZ Jb (f64) e1: LOOPE/LOOPZ Jb (f64)
e2: LOOP Jb (f64) e2: LOOP Jb (f64)
@ -281,6 +284,10 @@ e4: IN AL,Ib
e5: IN eAX,Ib e5: IN eAX,Ib
e6: OUT Ib,AL e6: OUT Ib,AL
e7: OUT Ib,eAX e7: OUT Ib,eAX
# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
# in "near" jumps and calls is 16-bit. For CALL,
# push of return address is 16-bit wide, RSP is decremented by 2
# but is not truncated to 16 bits, unlike RIP.
e8: CALL Jz (f64) e8: CALL Jz (f64)
e9: JMP-near Jz (f64) e9: JMP-near Jz (f64)
ea: JMP-far Ap (i64) ea: JMP-far Ap (i64)
@ -456,6 +463,7 @@ AVXcode: 1
7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
# 0x0f 0x80-0x8f # 0x0f 0x80-0x8f
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
80: JO Jz (f64) 80: JO Jz (f64)
81: JNO Jz (f64) 81: JNO Jz (f64)
82: JB/JC/JNAE Jz (f64) 82: JB/JC/JNAE Jz (f64)
@ -842,6 +850,7 @@ EndTable
GrpTable: Grp5 GrpTable: Grp5
0: INC Ev 0: INC Ev
1: DEC Ev 1: DEC Ev
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
2: CALLN Ev (f64) 2: CALLN Ev (f64)
3: CALLF Ep 3: CALLF Ep
4: JMPN Ev (f64) 4: JMPN Ev (f64)

View File

@ -59,7 +59,7 @@ static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
int ret = 0; int ret = 0;
/* kprobe_running() needs smp_processor_id() */ /* kprobe_running() needs smp_processor_id() */
if (kprobes_built_in() && !user_mode_vm(regs)) { if (kprobes_built_in() && !user_mode(regs)) {
preempt_disable(); preempt_disable();
if (kprobe_running() && kprobe_fault_handler(regs, 14)) if (kprobe_running() && kprobe_fault_handler(regs, 14))
ret = 1; ret = 1;
@ -148,7 +148,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
instr = (void *)convert_ip_to_linear(current, regs); instr = (void *)convert_ip_to_linear(current, regs);
max_instr = instr + 15; max_instr = instr + 15;
if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX)
return 0; return 0;
while (instr < max_instr) { while (instr < max_instr) {
@ -1035,7 +1035,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
if (error_code & PF_USER) if (error_code & PF_USER)
return false; return false;
if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC)) if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
return false; return false;
return true; return true;
@ -1140,7 +1140,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
* User-mode registers count as a user access even for any * User-mode registers count as a user access even for any
* potential system fault or CPU buglet: * potential system fault or CPU buglet:
*/ */
if (user_mode_vm(regs)) { if (user_mode(regs)) {
local_irq_enable(); local_irq_enable();
error_code |= PF_USER; error_code |= PF_USER;
flags |= FAULT_FLAG_USER; flags |= FAULT_FLAG_USER;

View File

@ -179,7 +179,8 @@ static void __init probe_page_size_mask(void)
if (cpu_has_pge) { if (cpu_has_pge) {
cr4_set_bits_and_update_boot(X86_CR4_PGE); cr4_set_bits_and_update_boot(X86_CR4_PGE);
__supported_pte_mask |= _PAGE_GLOBAL; __supported_pte_mask |= _PAGE_GLOBAL;
} } else
__supported_pte_mask &= ~_PAGE_GLOBAL;
} }
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32

View File

@ -111,7 +111,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
{ {
struct stack_frame *head = (struct stack_frame *)frame_pointer(regs); struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
if (!user_mode_vm(regs)) { if (!user_mode(regs)) {
unsigned long stack = kernel_stack_pointer(regs); unsigned long stack = kernel_stack_pointer(regs);
if (depth) if (depth)
dump_trace(NULL, regs, (unsigned long *)stack, 0, dump_trace(NULL, regs, (unsigned long *)stack, 0,

View File

@ -134,7 +134,7 @@ static void do_fpu_end(void)
static void fix_processor_context(void) static void fix_processor_context(void)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
struct tss_struct *t = &per_cpu(init_tss, cpu); struct tss_struct *t = &per_cpu(cpu_tss, cpu);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
struct desc_struct *desc = get_cpu_gdt_table(cpu); struct desc_struct *desc = get_cpu_gdt_table(cpu);
tss_desc tss; tss_desc tss;

View File

@ -119,7 +119,7 @@
110 i386 iopl sys_iopl 110 i386 iopl sys_iopl
111 i386 vhangup sys_vhangup 111 i386 vhangup sys_vhangup
112 i386 idle 112 i386 idle
113 i386 vm86old sys_vm86old sys32_vm86_warning 113 i386 vm86old sys_vm86old sys_ni_syscall
114 i386 wait4 sys_wait4 compat_sys_wait4 114 i386 wait4 sys_wait4 compat_sys_wait4
115 i386 swapoff sys_swapoff 115 i386 swapoff sys_swapoff
116 i386 sysinfo sys_sysinfo compat_sys_sysinfo 116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
@ -172,7 +172,7 @@
163 i386 mremap sys_mremap 163 i386 mremap sys_mremap
164 i386 setresuid sys_setresuid16 164 i386 setresuid sys_setresuid16
165 i386 getresuid sys_getresuid16 165 i386 getresuid sys_getresuid16
166 i386 vm86 sys_vm86 sys32_vm86_warning 166 i386 vm86 sys_vm86 sys_ni_syscall
167 i386 query_module 167 i386 query_module
168 i386 poll sys_poll 168 i386 poll sys_poll
169 i386 nfsservctl 169 i386 nfsservctl

Some files were not shown because too many files have changed in this diff Show More