All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/6 v3] [GIT PULL] x86: Workaround for NMI iret woes
@ 2011-12-22  3:13 Steven Rostedt
  2011-12-22  3:13 ` [PATCH 1/6 v3] x86: Do not schedule while still in NMI context Steven Rostedt
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Steven Rostedt @ 2011-12-22  3:13 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, Peter Zijlstra,
	Frederic Weisbecker, Linus Torvalds, H. Peter Anvin,
	Mathieu Desnoyers, Andi Kleen

[-- Attachment #1: Type: text/plain, Size: 10288 bytes --]


Ingo,

I updated the patches to reflect your comments and ran them through
all my tests again. Below is the full diff between v2 and v3.

I renamed nmi_preprocess() and nmi_postprocess() to
nmi_nesting_preprocess() and nmi_nesting_postprocess() to at least
note that this has to do with nmi nesting. I also commented the
fact that i386 may loop back to the preprocess.

I had to move the debug_usage functions to debugreg.h, because
smp.h includes processor.h where making the inc/dec inline
caused include header issues.

-- Steve

Please pull the latest tip/x86/core-3 tree, which can be found at:

  git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git
tip/x86/core-3

Head SHA1: 42181186ad4db986fcaa40ca95c6e407e9e79372


Linus Torvalds (1):
      x86: Do not schedule while still in NMI context

Steven Rostedt (5):
      x86: Document the NMI handler about not using paranoid_exit
      x86: Add workaround to NMI iret woes
      x86: Keep current stack in NMI breakpoints
      x86: Allow NMIs to hit breakpoints in i386
      x86: Add counter when debug stack is used with interrupts enabled

----
 arch/x86/include/asm/debugreg.h |   22 ++++
 arch/x86/include/asm/desc.h     |   12 ++
 arch/x86/kernel/cpu/common.c    |   24 +++++
 arch/x86/kernel/entry_64.S      |  218 +++++++++++++++++++++++++++++++++------
 arch/x86/kernel/head_64.S       |    4 +
 arch/x86/kernel/nmi.c           |  102 ++++++++++++++++++
 arch/x86/kernel/traps.c         |   20 ++++
 7 files changed, 369 insertions(+), 33 deletions(-)

diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 078ad0c..b903d5e 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump);
 
 extern void hw_breakpoint_restore(void);
 
+#ifdef CONFIG_X86_64
+DECLARE_PER_CPU(int, debug_stack_usage);
+static inline void debug_stack_usage_inc(void)
+{
+	__get_cpu_var(debug_stack_usage)++;
+}
+static inline void debug_stack_usage_dec(void)
+{
+	__get_cpu_var(debug_stack_usage)--;
+}
+int is_debug_stack(unsigned long addr);
+void debug_stack_set_zero(void);
+void debug_stack_reset(void);
+#else /* !X86_64 */
+static inline int is_debug_stack(unsigned long addr) { return 0; }
+static inline void debug_stack_set_zero(void) { }
+static inline void debug_stack_reset(void) { }
+static inline void debug_stack_usage_inc(void) { }
+static inline void debug_stack_usage_dec(void) { }
+#endif /* X86_64 */
+
+
 #endif	/* __KERNEL__ */
 
 #endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2fef5ba..b650435 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -402,11 +402,6 @@ DECLARE_PER_CPU(char *, irq_stack_ptr);
 DECLARE_PER_CPU(unsigned int, irq_count);
 extern unsigned long kernel_eflags;
 extern asmlinkage void ignore_sysret(void);
-void inc_debug_stack_usage(void);
-void dec_debug_stack_usage(void);
-int is_debug_stack(unsigned long addr);
-void zero_debug_stack(void);
-void reset_debug_stack(void);
 #else	/* X86_64 */
 #ifdef CONFIG_CC_STACKPROTECTOR
 /*
@@ -421,11 +416,6 @@ struct stack_canary {
 };
 DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
-static inline int is_debug_stack(unsigned long addr) { return 0; }
-static inline void inc_debug_stack_usage(void) { }
-static inline void dec_debug_stack_usage(void) { }
-static inline void zero_debug_stack(void) { }
-static inline void reset_debug_stack(void) { }
 #endif	/* X86_64 */
 
 extern unsigned int xstate_size;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1ec612..266e464 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1093,17 +1093,7 @@ unsigned long kernel_eflags;
 DEFINE_PER_CPU(struct orig_ist, orig_ist);
 
 static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
-static DEFINE_PER_CPU(int, debug_stack_usage);
-
-void inc_debug_stack_usage(void)
-{
-	__get_cpu_var(debug_stack_usage)++;
-}
-
-void dec_debug_stack_usage(void)
-{
-	__get_cpu_var(debug_stack_usage)--;
-}
+DEFINE_PER_CPU(int, debug_stack_usage);
 
 int is_debug_stack(unsigned long addr)
 {
@@ -1112,12 +1102,12 @@ int is_debug_stack(unsigned long addr)
 		 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
 }
 
-void zero_debug_stack(void)
+void debug_stack_set_zero(void)
 {
 	load_idt((const struct desc_ptr *)&nmi_idt_descr);
 }
 
-void reset_debug_stack(void)
+void debug_stack_reset(void)
 {
 	load_idt((const struct desc_ptr *)&idt_descr);
 }
@@ -1240,7 +1230,7 @@ void __cpuinit cpu_init(void)
 			estacks += exception_stack_sizes[v];
 			oist->ist[v] = t->x86_tss.ist[v] =
 					(unsigned long)estacks;
-			if (v == DEBUG_STACK - 1)
+			if (v == DEBUG_STACK-1)
 				per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
 		}
 	}
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b2dea00..b62aa29 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1560,14 +1560,16 @@ nested_nmi:
 	/* Set up the interrupted NMIs stack to jump to repeat_nmi */
 	leaq -6*8(%rsp), %rdx
 	movq %rdx, %rsp
-	pushq $__KERNEL_DS
-	pushq %rdx
-	pushfq
-	pushq $__KERNEL_CS
+	CFI_ADJUST_CFA_OFFSET 6*8
+	pushq_cfi $__KERNEL_DS
+	pushq_cfi %rdx
+	pushfq_cfi
+	pushq_cfi $__KERNEL_CS
 	pushq_cfi $repeat_nmi
 
 	/* Put stack back */
 	addq $(11*8), %rsp
+	CFI_ADJUST_CFA_OFFSET -11*8
 
 nested_nmi_out:
 	popq_cfi %rdx
@@ -1578,7 +1580,7 @@ nested_nmi_out:
 first_nmi:
 	/*
 	 * Because nested NMIs will use the pushed location that we
-	 * stored rdx, we must keep that space available.
+	 * stored in rdx, we must keep that space available.
 	 * Here's what our stack frame will look like:
 	 * +-------------------------+
 	 * | original SS             |
@@ -1661,16 +1663,24 @@ nmi_restore:
 	CFI_ENDPROC
 END(nmi)
 
+	/*
+	 * If an NMI hit an iret because of an exception or breakpoint,
+	 * it can lose its NMI context, and a nested NMI may come in.
+	 * In that case, the nested NMI will change the preempted NMI's
+	 * stack to jump to here when it does the final iret.
+	 */
 repeat_nmi:
+	INTR_FRAME
 	/* Update the stack variable to say we are still in NMI */
 	movq $1, 5*8(%rsp)
 
 	/* copy the saved stack back to copy stack */
 	.rept 5
-	pushq 4*8(%rsp)
+	pushq_cfi 4*8(%rsp)
 	.endr
 
 	jmp restart_nmi
+	CFI_ENDPROC
 end_repeat_nmi:
 
 ENTRY(ignore_sysret)
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 3cb659c..47acaf3 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -442,7 +442,7 @@ enum nmi_states {
 };
 static DEFINE_PER_CPU(enum nmi_states, nmi_state);
 
-#define nmi_preprocess(regs)						\
+#define nmi_nesting_preprocess(regs)					\
 	do {								\
 		if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) {	\
 			__get_cpu_var(nmi_state) = NMI_LATCHED;		\
@@ -452,7 +452,7 @@ static DEFINE_PER_CPU(enum nmi_states, nmi_state);
 		__get_cpu_var(nmi_state) = NMI_EXECUTING;		\
 	} while (0)
 
-#define nmi_postprocess()						\
+#define nmi_nesting_postprocess()					\
 	do {								\
 		if (cmpxchg(&__get_cpu_var(nmi_state),			\
 		    NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING)	\
@@ -481,7 +481,7 @@ static DEFINE_PER_CPU(enum nmi_states, nmi_state);
  */
 static DEFINE_PER_CPU(int, update_debug_stack);
 
-static inline void nmi_preprocess(struct pt_regs *regs)
+static inline void nmi_nesting_preprocess(struct pt_regs *regs)
 {
 	/*
 	 * If we interrupted a breakpoint, it is possible that
@@ -490,22 +490,22 @@ static inline void nmi_preprocess(struct pt_regs *regs)
 	 * continue to use the NMI stack.
 	 */
 	if (unlikely(is_debug_stack(regs->sp))) {
-		zero_debug_stack();
+		debug_stack_set_zero();
 		__get_cpu_var(update_debug_stack) = 1;
 	}
 }
 
-static inline void nmi_postprocess(void)
+static inline void nmi_nesting_postprocess(void)
 {
 	if (unlikely(__get_cpu_var(update_debug_stack)))
-		reset_debug_stack();
+		debug_stack_reset();
 }
 #endif
 
 dotraplinkage notrace __kprobes void
 do_nmi(struct pt_regs *regs, long error_code)
 {
-	nmi_preprocess(regs);
+	nmi_nesting_preprocess(regs);
 
 	nmi_enter();
 
@@ -516,7 +516,8 @@ do_nmi(struct pt_regs *regs, long error_code)
 
 	nmi_exit();
 
-	nmi_postprocess();
+	/* On i386, may loop back to preprocess */
+	nmi_nesting_postprocess();
 }
 
 void stop_nmi(void)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d2510e7..0072b38 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -320,11 +320,11 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 	 * Let others (NMI) know that the debug stack is in use
 	 * as we may switch to the interrupt stack.
 	 */
-	inc_debug_stack_usage();
+	debug_stack_usage_inc();
 	preempt_conditional_sti(regs);
 	do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
 	preempt_conditional_cli(regs);
-	dec_debug_stack_usage();
+	debug_stack_usage_dec();
 }
 
 #ifdef CONFIG_X86_64
@@ -421,7 +421,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 	 * Let others (NMI) know that the debug stack is in use
 	 * as we may switch to the interrupt stack.
 	 */
-	inc_debug_stack_usage();
+	debug_stack_usage_inc();
 
 	/* It's safe to allow irq's after DR6 has been saved */
 	preempt_conditional_sti(regs);
@@ -430,7 +430,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 		handle_vm86_trap((struct kernel_vm86_regs *) regs,
 				error_code, 1);
 		preempt_conditional_cli(regs);
-		dec_debug_stack_usage();
+		debug_stack_usage_dec();
 		return;
 	}
 
@@ -450,7 +450,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 	if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
 		send_sigtrap(tsk, regs, error_code, si_code);
 	preempt_conditional_cli(regs);
-	dec_debug_stack_usage();
+	debug_stack_usage_dec();
 
 	return;
 }

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2012-01-08  8:51 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-12-22  3:13 [PATCH 0/6 v3] [GIT PULL] x86: Workaround for NMI iret woes Steven Rostedt
2011-12-22  3:13 ` [PATCH 1/6 v3] x86: Do not schedule while still in NMI context Steven Rostedt
2011-12-22  3:13 ` [PATCH 2/6 v3] x86: Document the NMI handler about not using paranoid_exit Steven Rostedt
2011-12-22  3:14 ` [PATCH 3/6 v3] x86: Add workaround to NMI iret woes Steven Rostedt
2011-12-22  3:14 ` [PATCH 4/6 v3] x86: Keep current stack in NMI breakpoints Steven Rostedt
2011-12-22  3:14 ` [PATCH 5/6 v3] x86: Allow NMIs to hit breakpoints in i386 Steven Rostedt
2011-12-22  3:14 ` [PATCH 6/6 v3] x86: Add counter when debug stack is used with interrupts enabled Steven Rostedt
2012-01-08  8:48 ` [PATCH 0/6 v3] [GIT PULL] x86: Workaround for NMI iret woes Ingo Molnar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.