From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752408AbdF3P4p (ORCPT ); Fri, 30 Jun 2017 11:56:45 -0400 Received: from mail.kernel.org ([198.145.29.99]:59310 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752377AbdF3P4m (ORCPT ); Fri, 30 Jun 2017 11:56:42 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 2584B22BE2 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=kernel.org Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=luto@kernel.org From: Andy Lutomirski To: X86 ML Cc: "linux-kernel@vger.kernel.org" , Borislav Petkov , Josh Poimboeuf , Andy Lutomirski Subject: [PATCH 1/2] x86/entry/64: Refactor IRQ stacks and make them NMI-safe Date: Fri, 30 Jun 2017 08:56:32 -0700 Message-Id: <00128b1fdadc10321589b4bd8dd2ddaa20e6e38f.1498838018.git.luto@kernel.org> X-Mailer: git-send-email 2.9.4 In-Reply-To: References: In-Reply-To: References: Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org This will allow IRQ stacks to nest inside NMIs or similar entries that can happen during IRQ stack setup or teardown. The new macros won't work correctly if they're invoked with IRQs on. Add a check under CONFIG_DEBUG_ENTRY to detect that. Signed-off-by: Andy Lutomirski --- arch/x86/Kconfig.debug | 2 -- arch/x86/entry/entry_64.S | 85 +++++++++++++++++++++++++++++++------------- arch/x86/kernel/process_64.c | 3 ++ 3 files changed, 64 insertions(+), 26 deletions(-) diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index fcb7604172ce..353ed09f5fba 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -305,8 +305,6 @@ config DEBUG_ENTRY Some of these sanity checks may slow down kernel entries and exits or otherwise impact performance. - This is currently used to help test NMI code. - If unsure, say N. config DEBUG_NMI_SELFTEST diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4a4c0834f965..3ace85965242 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -446,6 +446,59 @@ ENTRY(irq_entries_start) .endr END(irq_entries_start) +.macro DEBUG_ENTRY_ASSERT_IRQS_OFF +#ifdef CONFIG_DEBUG_ENTRY + pushfq + testl $X86_EFLAGS_IF, (%rsp) + jz .Lokay_\@ + ud2 +.Lokay_\@: + addq $8, %rsp +#endif +.endm + +/* + * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers + * flags and puts old RSP into old_rsp, and leaves all other GPRs alone. + * Requires kernel GSBASE. + * + * The invariant is that, if irq_count != -1, then the IRQ stack is in use. + */ +.macro ENTER_IRQ_STACK old_rsp + DEBUG_ENTRY_ASSERT_IRQS_OFF + movq %rsp, \old_rsp + incl PER_CPU_VAR(irq_count) + + /* + * Right now, if we just incremented irq_count to zero, we've + * claimed the IRQ stack but we haven't switched to it yet. + * + * If anything is added that can interrupt us here without using IST, + * it must be *extremely* careful to limit its stack usage. This + * could include kprobes and a hypothetical future IST-less #DB + * handler. + */ + + cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp + pushq \old_rsp +.endm + +/* + * Undoes ENTER_IRQ_STACK. + */ +.macro LEAVE_IRQ_STACK + DEBUG_ENTRY_ASSERT_IRQS_OFF + /* We need to be off the IRQ stack before decrementing irq_count. */ + popq %rsp + + /* + * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming + * the irq stack but we're not on it. + */ + + decl PER_CPU_VAR(irq_count) +.endm + /* * Interrupt entry/exit. * @@ -484,17 +537,7 @@ END(irq_entries_start) CALL_enter_from_user_mode 1: - /* - * Save previous stack pointer, optionally switch to interrupt stack. - * irq_count is used to check if a CPU is already on an interrupt stack - * or not. While this is essentially redundant with preempt_count it is - * a little cheaper to use a separate counter in the PDA (short of - * moving irq_enter into assembly, which would be too much work) - */ - movq %rsp, %rdi - incl PER_CPU_VAR(irq_count) - cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp - pushq %rdi + ENTER_IRQ_STACK old_rsp=%rdi /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF @@ -514,10 +557,8 @@ common_interrupt: ret_from_intr: DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF - decl PER_CPU_VAR(irq_count) - /* Restore saved previous stack */ - popq %rsp + LEAVE_IRQ_STACK testb $3, CS(%rsp) jz retint_kernel @@ -890,12 +931,10 @@ bad_gs: ENTRY(do_softirq_own_stack) pushq %rbp mov %rsp, %rbp - incl PER_CPU_VAR(irq_count) - cmove PER_CPU_VAR(irq_stack_ptr), %rsp - push %rbp /* frame pointer backlink */ + ENTER_IRQ_STACK old_rsp=%r11 call __do_softirq + LEAVE_IRQ_STACK leaveq - decl PER_CPU_VAR(irq_count) ret END(do_softirq_own_stack) @@ -922,13 +961,11 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ * see the correct pointer to the pt_regs */ movq %rdi, %rsp /* we don't return, adjust the stack frame */ -11: incl PER_CPU_VAR(irq_count) - movq %rsp, %rbp - cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp - pushq %rbp /* frame pointer backlink */ + + ENTER_IRQ_STACK old_rsp=%r11 call xen_evtchn_do_upcall - popq %rsp - decl PER_CPU_VAR(irq_count) + LEAVE_IRQ_STACK + #ifndef CONFIG_PREEMPT call xen_maybe_preempt_hcall #endif diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b6840bf3940b..1d910d97c1c6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -279,6 +279,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct tss_struct *tss = &per_cpu(cpu_tss, cpu); unsigned prev_fsindex, prev_gsindex; + WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && + this_cpu_read(irq_count) != -1); + switch_fpu_prepare(prev_fpu, cpu); /* We must save %fs and %gs before load_TLS() because -- 2.9.4