diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index fcb7604172ce..353ed09f5fba 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -305,8 +305,6 @@ config DEBUG_ENTRY Some of these sanity checks may slow down kernel entries and exits or otherwise impact performance. - This is currently used to help test NMI code. - If unsure, say N. config DEBUG_NMI_SELFTEST diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a9a8027a6c0e..0d4483ae6360 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -447,6 +447,59 @@ ENTRY(irq_entries_start) .endr END(irq_entries_start) +.macro DEBUG_ENTRY_ASSERT_IRQS_OFF +#ifdef CONFIG_DEBUG_ENTRY + pushfq + testl $X86_EFLAGS_IF, (%rsp) + jz .Lokay_\@ + ud2 +.Lokay_\@: + addq $8, %rsp +#endif +.endm + +/* + * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers + * flags and puts old RSP into old_rsp, and leaves all other GPRs alone. + * Requires kernel GSBASE. + * + * The invariant is that, if irq_count != -1, then the IRQ stack is in use. + */ +.macro ENTER_IRQ_STACK old_rsp + DEBUG_ENTRY_ASSERT_IRQS_OFF + movq %rsp, \old_rsp + incl PER_CPU_VAR(irq_count) + + /* + * Right now, if we just incremented irq_count to zero, we've + * claimed the IRQ stack but we haven't switched to it yet. + * + * If anything is added that can interrupt us here without using IST, + * it must be *extremely* careful to limit its stack usage. This + * could include kprobes and a hypothetical future IST-less #DB + * handler. + */ + + cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp + pushq \old_rsp +.endm + +/* + * Undoes ENTER_IRQ_STACK. + */ +.macro LEAVE_IRQ_STACK + DEBUG_ENTRY_ASSERT_IRQS_OFF + /* We need to be off the IRQ stack before decrementing irq_count. */ + popq %rsp + + /* + * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming + * the irq stack but we're not on it. + */ + + decl PER_CPU_VAR(irq_count) +.endm + /* * Interrupt entry/exit. * @@ -485,17 +538,7 @@ END(irq_entries_start) CALL_enter_from_user_mode 1: - /* - * Save previous stack pointer, optionally switch to interrupt stack. - * irq_count is used to check if a CPU is already on an interrupt stack - * or not. While this is essentially redundant with preempt_count it is - * a little cheaper to use a separate counter in the PDA (short of - * moving irq_enter into assembly, which would be too much work) - */ - movq %rsp, %rdi - incl PER_CPU_VAR(irq_count) - cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp - pushq %rdi + ENTER_IRQ_STACK old_rsp=%rdi /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF @@ -515,10 +558,8 @@ common_interrupt: ret_from_intr: DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF - decl PER_CPU_VAR(irq_count) - /* Restore saved previous stack */ - popq %rsp + LEAVE_IRQ_STACK testb $3, CS(%rsp) jz retint_kernel @@ -891,12 +932,10 @@ bad_gs: ENTRY(do_softirq_own_stack) pushq %rbp mov %rsp, %rbp - incl PER_CPU_VAR(irq_count) - cmove PER_CPU_VAR(irq_stack_ptr), %rsp - push %rbp /* frame pointer backlink */ + ENTER_IRQ_STACK old_rsp=%r11 call __do_softirq + LEAVE_IRQ_STACK leaveq - decl PER_CPU_VAR(irq_count) ret END(do_softirq_own_stack) @@ -923,13 +962,11 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ * see the correct pointer to the pt_regs */ movq %rdi, %rsp /* we don't return, adjust the stack frame */ -11: incl PER_CPU_VAR(irq_count) - movq %rsp, %rbp - cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp - pushq %rbp /* frame pointer backlink */ + + ENTER_IRQ_STACK old_rsp=%r10 call xen_evtchn_do_upcall - popq %rsp - decl PER_CPU_VAR(irq_count) + LEAVE_IRQ_STACK + #ifndef CONFIG_PREEMPT call xen_maybe_preempt_hcall #endif diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c3169be4c596..2987e3991c2b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -279,6 +279,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct tss_struct *tss = &per_cpu(cpu_tss, cpu); unsigned prev_fsindex, prev_gsindex; + WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && + this_cpu_read(irq_count) != -1); + switch_fpu_prepare(prev_fpu, cpu); /* We must save %fs and %gs before load_TLS() because