linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/8] x86/debug: Untangle handle_debug()
@ 2020-08-21  9:39 Peter Zijlstra
  2020-08-21  9:39 ` [PATCH v2 1/8] x86/debug: Allow a single level of #DB recursion Peter Zijlstra
                   ` (8 more replies)
  0 siblings, 9 replies; 21+ messages in thread
From: Peter Zijlstra @ 2020-08-21  9:39 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson, Peter Zijlstra

Hi,

handle_debug() is a mess, and now that we have separate user and kernel paths,
try and clean it up a bit.

Include's amluto's fix for convenience.

Since v1:

 - Changelogs!
 - fixed notifier order (Josh, Daniel)
 - tested kgdb



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 1/8] x86/debug: Allow a single level of #DB recursion
  2020-08-21  9:39 [PATCH v2 0/8] x86/debug: Untangle handle_debug() Peter Zijlstra
@ 2020-08-21  9:39 ` Peter Zijlstra
  2020-08-21  9:39 ` [PATCH v2 2/8] x86/debug: Sync BTF earlier Peter Zijlstra
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 21+ messages in thread
From: Peter Zijlstra @ 2020-08-21  9:39 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson, Peter Zijlstra, stable

From: Andy Lutomirski <luto@kernel.org>

Trying to clear DR7 around a #DB from usermode malfunctions if we
schedule when delivering SIGTRAP.  Rather than trying to define a
special no-recursion region, just allow a single level of recursion.
We do the same thing for NMI, and it hasn't caused any problems yet.

Fixes: 9f58fdde95c9 ("x86/db: Split out dr6/7 handling")
Reported-by: Kyle Huey <me@kylehuey.com>
Debugged-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/8b9bd05f187231df008d48cf818a6a311cbd5c98.1597882384.git.luto@kernel.org
---
 arch/x86/kernel/traps.c |   65 ++++++++++++++++++++++--------------------------
 1 file changed, 31 insertions(+), 34 deletions(-)

--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -729,20 +729,9 @@ static bool is_sysenter_singlestep(struc
 #endif
 }
 
-static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
+static __always_inline unsigned long debug_read_clear_dr6(void)
 {
-	/*
-	 * Disable breakpoints during exception handling; recursive exceptions
-	 * are exceedingly 'fun'.
-	 *
-	 * Since this function is NOKPROBE, and that also applies to
-	 * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
-	 * HW_BREAKPOINT_W on our stack)
-	 *
-	 * Entry text is excluded for HW_BP_X and cpu_entry_area, which
-	 * includes the entry stack is excluded for everything.
-	 */
-	*dr7 = local_db_save();
+	unsigned long dr6;
 
 	/*
 	 * The Intel SDM says:
@@ -755,15 +744,12 @@ static __always_inline void debug_enter(
 	 *
 	 * Keep it simple: clear DR6 immediately.
 	 */
-	get_debugreg(*dr6, 6);
+	get_debugreg(dr6, 6);
 	set_debugreg(0, 6);
 	/* Filter out all the reserved bits which are preset to 1 */
-	*dr6 &= ~DR6_RESERVED;
-}
+	dr6 &= ~DR6_RESERVED;
 
-static __always_inline void debug_exit(unsigned long dr7)
-{
-	local_db_restore(dr7);
+	return dr6;
 }
 
 /*
@@ -863,6 +849,18 @@ static void handle_debug(struct pt_regs
 static __always_inline void exc_debug_kernel(struct pt_regs *regs,
 					     unsigned long dr6)
 {
+	/*
+	 * Disable breakpoints during exception handling; recursive exceptions
+	 * are exceedingly 'fun'.
+	 *
+	 * Since this function is NOKPROBE, and that also applies to
+	 * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
+	 * HW_BREAKPOINT_W on our stack)
+	 *
+	 * Entry text is excluded for HW_BP_X and cpu_entry_area, which
+	 * includes the entry stack is excluded for everything.
+	 */
+	unsigned long dr7 = local_db_save();
 	bool irq_state = idtentry_enter_nmi(regs);
 	instrumentation_begin();
 
@@ -883,6 +881,8 @@ static __always_inline void exc_debug_ke
 
 	instrumentation_end();
 	idtentry_exit_nmi(regs, irq_state);
+
+	local_db_restore(dr7);
 }
 
 static __always_inline void exc_debug_user(struct pt_regs *regs,
@@ -894,6 +894,15 @@ static __always_inline void exc_debug_us
 	 */
 	WARN_ON_ONCE(!user_mode(regs));
 
+	/*
+	 * NB: We can't easily clear DR7 here because
+	 * idtentry_exit_to_usermode() can invoke ptrace, schedule, access
+	 * user memory, etc.  This means that a recursive #DB is possible.  If
+	 * this happens, that #DB will hit exc_debug_kernel() and clear DR7.
+	 * Since we're not on the IST stack right now, everything will be
+	 * fine.
+	 */
+
 	irqentry_enter_from_user_mode(regs);
 	instrumentation_begin();
 
@@ -907,36 +916,24 @@ static __always_inline void exc_debug_us
 /* IST stack entry */
 DEFINE_IDTENTRY_DEBUG(exc_debug)
 {
-	unsigned long dr6, dr7;
-
-	debug_enter(&dr6, &dr7);
-	exc_debug_kernel(regs, dr6);
-	debug_exit(dr7);
+	exc_debug_kernel(regs, debug_read_clear_dr6());
 }
 
 /* User entry, runs on regular task stack */
 DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
 {
-	unsigned long dr6, dr7;
-
-	debug_enter(&dr6, &dr7);
-	exc_debug_user(regs, dr6);
-	debug_exit(dr7);
+	exc_debug_user(regs, debug_read_clear_dr6());
 }
 #else
 /* 32 bit does not have separate entry points. */
 DEFINE_IDTENTRY_RAW(exc_debug)
 {
-	unsigned long dr6, dr7;
-
-	debug_enter(&dr6, &dr7);
+	unsigned long dr6 = debug_read_clear_dr6();
 
 	if (user_mode(regs))
 		exc_debug_user(regs, dr6);
 	else
 		exc_debug_kernel(regs, dr6);
-
-	debug_exit(dr7);
 }
 #endif
 



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 2/8] x86/debug: Sync BTF earlier
  2020-08-21  9:39 [PATCH v2 0/8] x86/debug: Untangle handle_debug() Peter Zijlstra
  2020-08-21  9:39 ` [PATCH v2 1/8] x86/debug: Allow a single level of #DB recursion Peter Zijlstra
@ 2020-08-21  9:39 ` Peter Zijlstra
  2020-08-23 23:03   ` Andy Lutomirski
  2020-08-21  9:39 ` [PATCH v2 3/8] x86/debug: Move kprobe_debug_handler() into exc_debug_kernel() Peter Zijlstra
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 21+ messages in thread
From: Peter Zijlstra @ 2020-08-21  9:39 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson, Peter Zijlstra

Move the BTF sync near the DR6 load, as this will be the only common
code guaranteed to run on every #DB.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/traps.c |   14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -749,6 +749,13 @@ static __always_inline unsigned long deb
 	/* Filter out all the reserved bits which are preset to 1 */
 	dr6 &= ~DR6_RESERVED;
 
+	/*
+	 * The SDM says "The processor clears the BTF flag when it
+	 * generates a debug exception."  Clear TIF_BLOCKSTEP to keep
+	 * TIF_BLOCKSTEP in sync with the hardware BTF flag.
+	 */
+	clear_thread_flag(TIF_BLOCKSTEP);
+
 	return dr6;
 }
 
@@ -783,13 +790,6 @@ static void handle_debug(struct pt_regs
 	int si_code;
 
 	/*
-	 * The SDM says "The processor clears the BTF flag when it
-	 * generates a debug exception."  Clear TIF_BLOCKSTEP to keep
-	 * TIF_BLOCKSTEP in sync with the hardware BTF flag.
-	 */
-	clear_thread_flag(TIF_BLOCKSTEP);
-
-	/*
 	 * If DR6 is zero, no point in trying to handle it. The kernel is
 	 * not using INT1.
 	 */



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 3/8] x86/debug: Move kprobe_debug_handler() into exc_debug_kernel()
  2020-08-21  9:39 [PATCH v2 0/8] x86/debug: Untangle handle_debug() Peter Zijlstra
  2020-08-21  9:39 ` [PATCH v2 1/8] x86/debug: Allow a single level of #DB recursion Peter Zijlstra
  2020-08-21  9:39 ` [PATCH v2 2/8] x86/debug: Sync BTF earlier Peter Zijlstra
@ 2020-08-21  9:39 ` Peter Zijlstra
  2020-08-23 23:01   ` Andy Lutomirski
  2020-08-21  9:39 ` [PATCH v2 4/8] x86/debug: Remove handle_debug(.user) argument Peter Zijlstra
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 21+ messages in thread
From: Peter Zijlstra @ 2020-08-21  9:39 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson, Peter Zijlstra

Kprobes are on kernel text, and thus only matter for #DB-from-kernel.
Kprobes are ordered before the generic notifier, preserve that order.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/kprobes.h |    4 ++++
 arch/x86/kernel/traps.c        |   10 ++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -106,5 +106,9 @@ extern int kprobe_exceptions_notify(stru
 extern int kprobe_int3_handler(struct pt_regs *regs);
 extern int kprobe_debug_handler(struct pt_regs *regs);
 
+#else
+
+static inline int kprobe_debug_handler(struct pt_regs *regs) { return 0; }
+
 #endif /* CONFIG_KPROBES */
 #endif /* _ASM_X86_KPROBES_H */
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -806,12 +806,6 @@ static void handle_debug(struct pt_regs
 	/* Store the virtualized DR6 value */
 	tsk->thread.debugreg6 = dr6;
 
-#ifdef CONFIG_KPROBES
-	if (kprobe_debug_handler(regs)) {
-		return;
-	}
-#endif
-
 	if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, 0,
 		       SIGTRAP) == NOTIFY_STOP) {
 		return;
@@ -877,8 +871,12 @@ static __always_inline void exc_debug_ke
 	if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs))
 		dr6 &= ~DR_STEP;
 
+	if (kprobe_debug_handler(regs))
+		goto out;
+
 	handle_debug(regs, dr6, false);
 
+out:
 	instrumentation_end();
 	idtentry_exit_nmi(regs, irq_state);
 



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 4/8] x86/debug: Remove handle_debug(.user) argument
  2020-08-21  9:39 [PATCH v2 0/8] x86/debug: Untangle handle_debug() Peter Zijlstra
                   ` (2 preceding siblings ...)
  2020-08-21  9:39 ` [PATCH v2 3/8] x86/debug: Move kprobe_debug_handler() into exc_debug_kernel() Peter Zijlstra
@ 2020-08-21  9:39 ` Peter Zijlstra
  2020-08-23 23:05   ` Andy Lutomirski
  2020-08-21  9:39 ` [PATCH v2 5/8] x86/debug: Simplify #DB signal code Peter Zijlstra
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 21+ messages in thread
From: Peter Zijlstra @ 2020-08-21  9:39 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson, Peter Zijlstra

The handle_debug(.user) argument is used to terminate the #DB handler
early for the INT1-from-kernel case, since the kernel doesn't use
INT1.

Remove the argument and handle this explicitly in #DB-from-kernel.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/traps.c |   21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -783,25 +783,18 @@ static __always_inline unsigned long deb
  *
  * May run on IST stack.
  */
-static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user)
+static void handle_debug(struct pt_regs *regs, unsigned long dr6)
 {
 	struct task_struct *tsk = current;
 	bool user_icebp;
 	int si_code;
 
 	/*
-	 * If DR6 is zero, no point in trying to handle it. The kernel is
-	 * not using INT1.
-	 */
-	if (!user && !dr6)
-		return;
-
-	/*
 	 * If dr6 has no reason to give us about the origin of this trap,
 	 * then it's very likely the result of an icebp/int01 trap.
 	 * User wants a sigtrap for that.
 	 */
-	user_icebp = user && !dr6;
+	user_icebp = !dr6;
 
 	/* Store the virtualized DR6 value */
 	tsk->thread.debugreg6 = dr6;
@@ -874,7 +867,13 @@ static __always_inline void exc_debug_ke
 	if (kprobe_debug_handler(regs))
 		goto out;
 
-	handle_debug(regs, dr6, false);
+	/*
+	 * The kernel doesn't use INT1
+	 */
+	if (!dr6)
+		goto out;
+
+	handle_debug(regs, dr6);
 
 out:
 	instrumentation_end();
@@ -904,7 +903,7 @@ static __always_inline void exc_debug_us
 	irqentry_enter_from_user_mode(regs);
 	instrumentation_begin();
 
-	handle_debug(regs, dr6, true);
+	handle_debug(regs, dr6);
 
 	instrumentation_end();
 	irqentry_exit_to_user_mode(regs);



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 5/8] x86/debug: Simplify #DB signal code
  2020-08-21  9:39 [PATCH v2 0/8] x86/debug: Untangle handle_debug() Peter Zijlstra
                   ` (3 preceding siblings ...)
  2020-08-21  9:39 ` [PATCH v2 4/8] x86/debug: Remove handle_debug(.user) argument Peter Zijlstra
@ 2020-08-21  9:39 ` Peter Zijlstra
  2020-08-23 23:09   ` Andy Lutomirski
  2020-08-21  9:39 ` [PATCH v2 6/8] x86/debug: Move historical SYSENTER junk into exc_debug_kernel() Peter Zijlstra
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 21+ messages in thread
From: Peter Zijlstra @ 2020-08-21  9:39 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson, Peter Zijlstra

Get rid of the two variables, avoid computing si_code when not needed
and be consistent about which dr6 value is used.

This makes it easier to shuffle code around later.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/traps.c |   24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -786,15 +786,6 @@ static __always_inline unsigned long deb
 static void handle_debug(struct pt_regs *regs, unsigned long dr6)
 {
 	struct task_struct *tsk = current;
-	bool user_icebp;
-	int si_code;
-
-	/*
-	 * If dr6 has no reason to give us about the origin of this trap,
-	 * then it's very likely the result of an icebp/int01 trap.
-	 * User wants a sigtrap for that.
-	 */
-	user_icebp = !dr6;
 
 	/* Store the virtualized DR6 value */
 	tsk->thread.debugreg6 = dr6;
@@ -813,6 +804,11 @@ static void handle_debug(struct pt_regs
 		goto out;
 	}
 
+	/*
+	 * Reload dr6, the notifier might have changed it.
+	 */
+	dr6 = tsk->thread.debugreg6;
+
 	if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
 		/*
 		 * Historical junk that used to handle SYSENTER single-stepping.
@@ -825,9 +821,13 @@ static void handle_debug(struct pt_regs
 		regs->flags &= ~X86_EFLAGS_TF;
 	}
 
-	si_code = get_si_code(tsk->thread.debugreg6);
-	if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
-		send_sigtrap(regs, 0, si_code);
+	/*
+	 * If dr6 has no reason to give us about the origin of this trap,
+	 * then it's very likely the result of an icebp/int01 trap.
+	 * User wants a sigtrap for that.
+	 */
+	if (dr6 & (DR_STEP | DR_TRAP_BITS) || !dr6)
+		send_sigtrap(regs, 0, get_si_code(dr6));
 
 out:
 	cond_local_irq_disable(regs);



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 6/8] x86/debug: Move historical SYSENTER junk into exc_debug_kernel()
  2020-08-21  9:39 [PATCH v2 0/8] x86/debug: Untangle handle_debug() Peter Zijlstra
                   ` (4 preceding siblings ...)
  2020-08-21  9:39 ` [PATCH v2 5/8] x86/debug: Simplify #DB signal code Peter Zijlstra
@ 2020-08-21  9:39 ` Peter Zijlstra
  2020-08-21  9:39 ` [PATCH v2 7/8] x86/debug: Move cond_local_irq_enable() block into exc_debug_user() Peter Zijlstra
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 21+ messages in thread
From: Peter Zijlstra @ 2020-08-21  9:39 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson, Peter Zijlstra

The historical SYSENTER junk is explicitly for from-kernel, so move it
to the #DB-from-kernel handler.

It is ordered after the notifier, this is important for KGDB which
uses TF single-step and needs to consume the event before we get here.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/traps.c |   47 ++++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -783,17 +783,15 @@ static __always_inline unsigned long deb
  *
  * May run on IST stack.
  */
-static void handle_debug(struct pt_regs *regs, unsigned long dr6)
+static bool handle_debug(struct pt_regs *regs, unsigned long *dr6)
 {
 	struct task_struct *tsk = current;
 
 	/* Store the virtualized DR6 value */
-	tsk->thread.debugreg6 = dr6;
+	tsk->thread.debugreg6 = *dr6;
 
-	if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, 0,
-		       SIGTRAP) == NOTIFY_STOP) {
-		return;
-	}
+	if (notify_die(DIE_DEBUG, "debug", regs, (long)dr6, 0, SIGTRAP) == NOTIFY_STOP)
+		return true;
 
 	/* It's safe to allow irq's after DR6 has been saved */
 	cond_local_irq_enable(regs);
@@ -807,30 +805,20 @@ static void handle_debug(struct pt_regs
 	/*
 	 * Reload dr6, the notifier might have changed it.
 	 */
-	dr6 = tsk->thread.debugreg6;
-
-	if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
-		/*
-		 * Historical junk that used to handle SYSENTER single-stepping.
-		 * This should be unreachable now.  If we survive for a while
-		 * without anyone hitting this warning, we'll turn this into
-		 * an oops.
-		 */
-		tsk->thread.debugreg6 &= ~DR_STEP;
-		set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
-		regs->flags &= ~X86_EFLAGS_TF;
-	}
+	*dr6 = tsk->thread.debugreg6;
 
 	/*
 	 * If dr6 has no reason to give us about the origin of this trap,
 	 * then it's very likely the result of an icebp/int01 trap.
 	 * User wants a sigtrap for that.
 	 */
-	if (dr6 & (DR_STEP | DR_TRAP_BITS) || !dr6)
-		send_sigtrap(regs, 0, get_si_code(dr6));
+	if (*dr6 & (DR_STEP | DR_TRAP_BITS) || !*dr6)
+		send_sigtrap(regs, 0, get_si_code(*dr6));
 
 out:
 	cond_local_irq_disable(regs);
+
+	return false;
 }
 
 static __always_inline void exc_debug_kernel(struct pt_regs *regs,
@@ -873,7 +861,20 @@ static __always_inline void exc_debug_ke
 	if (!dr6)
 		goto out;
 
-	handle_debug(regs, dr6);
+	if (handle_debug(regs, &dr6))
+		goto out;
+
+	if (WARN_ON_ONCE(dr6 & DR_STEP)) {
+		/*
+		 * Historical junk that used to handle SYSENTER single-stepping.
+		 * This should be unreachable now.  If we survive for a while
+		 * without anyone hitting this warning, we'll turn this into
+		 * an oops.
+		 */
+		dr6 &= ~DR_STEP;
+		set_thread_flag(TIF_SINGLESTEP);
+		regs->flags &= ~X86_EFLAGS_TF;
+	}
 
 out:
 	instrumentation_end();
@@ -903,7 +904,7 @@ static __always_inline void exc_debug_us
 	irqentry_enter_from_user_mode(regs);
 	instrumentation_begin();
 
-	handle_debug(regs, dr6);
+	handle_debug(regs, &dr6);
 
 	instrumentation_end();
 	irqentry_exit_to_user_mode(regs);



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 7/8] x86/debug: Move cond_local_irq_enable() block into exc_debug_user()
  2020-08-21  9:39 [PATCH v2 0/8] x86/debug: Untangle handle_debug() Peter Zijlstra
                   ` (5 preceding siblings ...)
  2020-08-21  9:39 ` [PATCH v2 6/8] x86/debug: Move historical SYSENTER junk into exc_debug_kernel() Peter Zijlstra
@ 2020-08-21  9:39 ` Peter Zijlstra
  2020-08-21  9:39 ` [PATCH v2 8/8] x86/debug: Remove the historical junk Peter Zijlstra
  2020-08-21 12:56 ` [PATCH v2 0/8] x86/debug: Untangle handle_debug() peterz
  8 siblings, 0 replies; 21+ messages in thread
From: Peter Zijlstra @ 2020-08-21  9:39 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson, Peter Zijlstra

The cond_local_irq_enable() block, dealing with vm86 and sending
signals is only relevant for #DB-from-user, move it there.

This then reduces handle_debug() to only the notifier call, so rename
it to notify_debug().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/traps.c |   51 +++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -783,7 +783,8 @@ static __always_inline unsigned long deb
  *
  * May run on IST stack.
  */
-static bool handle_debug(struct pt_regs *regs, unsigned long *dr6)
+
+static bool notify_debug(struct pt_regs *regs, unsigned long *dr6)
 {
 	struct task_struct *tsk = current;
 
@@ -793,31 +794,9 @@ static bool handle_debug(struct pt_regs
 	if (notify_die(DIE_DEBUG, "debug", regs, (long)dr6, 0, SIGTRAP) == NOTIFY_STOP)
 		return true;
 
-	/* It's safe to allow irq's after DR6 has been saved */
-	cond_local_irq_enable(regs);
-
-	if (v8086_mode(regs)) {
-		handle_vm86_trap((struct kernel_vm86_regs *) regs, 0,
-				 X86_TRAP_DB);
-		goto out;
-	}
-
-	/*
-	 * Reload dr6, the notifier might have changed it.
-	 */
+	/* Reload the DR6 value, the notifier might have changed it */
 	*dr6 = tsk->thread.debugreg6;
 
-	/*
-	 * If dr6 has no reason to give us about the origin of this trap,
-	 * then it's very likely the result of an icebp/int01 trap.
-	 * User wants a sigtrap for that.
-	 */
-	if (*dr6 & (DR_STEP | DR_TRAP_BITS) || !*dr6)
-		send_sigtrap(regs, 0, get_si_code(*dr6));
-
-out:
-	cond_local_irq_disable(regs);
-
 	return false;
 }
 
@@ -861,7 +840,7 @@ static __always_inline void exc_debug_ke
 	if (!dr6)
 		goto out;
 
-	if (handle_debug(regs, &dr6))
+	if (notify_debug(regs, &dr6))
 		goto out;
 
 	if (WARN_ON_ONCE(dr6 & DR_STEP)) {
@@ -904,8 +883,28 @@ static __always_inline void exc_debug_us
 	irqentry_enter_from_user_mode(regs);
 	instrumentation_begin();
 
-	handle_debug(regs, &dr6);
+	if (notify_debug(regs, &dr6))
+		goto out;
+
+	/* It's safe to allow irq's after DR6 has been saved */
+	local_irq_enable();
+
+	if (v8086_mode(regs)) {
+		handle_vm86_trap((struct kernel_vm86_regs *)regs, 0, X86_TRAP_DB);
+		goto out_irq;
+	}
 
+	/*
+	 * If dr6 has no reason to give us about the origin of this trap,
+	 * then it's very likely the result of an icebp/int01 trap.
+	 * User wants a sigtrap for that.
+	 */
+	if (dr6 & (DR_STEP | DR_TRAP_BITS) || !dr6)
+		send_sigtrap(regs, 0, get_si_code(dr6));
+
+out_irq:
+	local_irq_disable();
+out:
 	instrumentation_end();
 	irqentry_exit_to_user_mode(regs);
 }



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 8/8] x86/debug: Remove the historical junk
  2020-08-21  9:39 [PATCH v2 0/8] x86/debug: Untangle handle_debug() Peter Zijlstra
                   ` (6 preceding siblings ...)
  2020-08-21  9:39 ` [PATCH v2 7/8] x86/debug: Move cond_local_irq_enable() block into exc_debug_user() Peter Zijlstra
@ 2020-08-21  9:39 ` Peter Zijlstra
  2020-08-21 12:22   ` [PATCH v2.1 " peterz
  2020-08-23 23:11   ` [PATCH v2 " Andy Lutomirski
  2020-08-21 12:56 ` [PATCH v2 0/8] x86/debug: Untangle handle_debug() peterz
  8 siblings, 2 replies; 21+ messages in thread
From: Peter Zijlstra @ 2020-08-21  9:39 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson, Peter Zijlstra, Andy Lutomirski

Remove the historical junk and replace it with a WARN and a comment.

The problem is that even though the kernel only uses TF single-step in
kprobes and KGDB, both of which consume the event before this,
QEMU/KVM has bugs in this area that can trigger this state so we have
to deal with it.

Suggested-by: Brian Gerst <brgerst@gmail.com>
Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/traps.c |   24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -839,22 +839,18 @@ static __always_inline void exc_debug_ke
 		goto out;
 
 	/*
-	 * Reload dr6, the notifier might have changed it.
+	 * The kernel doesn't use TF single-step outside of:
+	 *
+	 *  - Kprobes, consumed through kprobe_debug_handler()
+	 *  - KGDB, consumed through notify_debug()
+	 *
+	 * So if we get here with DR_STEP set, something is wonky.
+	 *
+	 * A known way to trigger this is through QEMU's GDB stub,
+	 * which leaks #DB into the guest and causes IST recursion.
 	 */
-	dr6 = current->thread.debugreg6;
-
-	if (WARN_ON_ONCE(dr6 & DR_STEP)) {
-		/*
-		 * Historical junk that used to handle SYSENTER single-stepping.
-		 * This should be unreachable now.  If we survive for a while
-		 * without anyone hitting this warning, we'll turn this into
-		 * an oops.
-		 */
-		dr6 &= ~DR_STEP;
-		set_thread_flag(TIF_SINGLESTEP);
+	if (WARN_ON_ONCE(current->thread.debugreg6 & DR_STEP))
 		regs->flags &= ~X86_EFLAGS_TF;
-	}
-
 out:
 	instrumentation_end();
 	idtentry_exit_nmi(regs, irq_state);



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2.1 8/8] x86/debug: Remove the historical junk
  2020-08-21  9:39 ` [PATCH v2 8/8] x86/debug: Remove the historical junk Peter Zijlstra
@ 2020-08-21 12:22   ` peterz
  2020-08-23 23:11   ` [PATCH v2 " Andy Lutomirski
  1 sibling, 0 replies; 21+ messages in thread
From: peterz @ 2020-08-21 12:22 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson, Andy Lutomirski

On Fri, Aug 21, 2020 at 11:39:20AM +0200, Peter Zijlstra wrote:
> Remove the historical junk and replace it with a WARN and a comment.
> 
> The problem is that even though the kernel only uses TF single-step in
> kprobes and KGDB, both of which consume the event before this,
> QEMU/KVM has bugs in this area that can trigger this state so we have
> to deal with it.
> 
> Suggested-by: Brian Gerst <brgerst@gmail.com>
> Suggested-by: Andy Lutomirski <luto@amacapital.net>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Damn, I lost a refresh.

---
 arch/x86/kernel/traps.c |   23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -843,18 +843,19 @@ static __always_inline void exc_debug_ke
 	if (notify_debug(regs, &dr6))
 		goto out;
 
-	if (WARN_ON_ONCE(dr6 & DR_STEP)) {
-		/*
-		 * Historical junk that used to handle SYSENTER single-stepping.
-		 * This should be unreachable now.  If we survive for a while
-		 * without anyone hitting this warning, we'll turn this into
-		 * an oops.
-		 */
-		dr6 &= ~DR_STEP;
-		set_thread_flag(TIF_SINGLESTEP);
+	/*
+	 * The kernel doesn't use TF single-step outside of:
+	 *
+	 *  - Kprobes, consumed through kprobe_debug_handler()
+	 *  - KGDB, consumed through notify_debug()
+	 *
+	 * So if we get here with DR_STEP set, something is wonky.
+	 *
+	 * A known way to trigger this is through QEMU's GDB stub,
+	 * which leaks #DB into the guest and causes IST recursion.
+	 */
+	if (WARN_ON_ONCE(current->thread.debugreg6 & DR_STEP))
 		regs->flags &= ~X86_EFLAGS_TF;
-	}
-
 out:
 	instrumentation_end();
 	idtentry_exit_nmi(regs, irq_state);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 0/8] x86/debug: Untangle handle_debug()
  2020-08-21  9:39 [PATCH v2 0/8] x86/debug: Untangle handle_debug() Peter Zijlstra
                   ` (7 preceding siblings ...)
  2020-08-21  9:39 ` [PATCH v2 8/8] x86/debug: Remove the historical junk Peter Zijlstra
@ 2020-08-21 12:56 ` peterz
  8 siblings, 0 replies; 21+ messages in thread
From: peterz @ 2020-08-21 12:56 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson

On Fri, Aug 21, 2020 at 11:39:12AM +0200, Peter Zijlstra wrote:
> Hi,
> 
> handle_debug() is a mess, and now that we have separate user and kernel paths,
> try and clean it up a bit.
> 
> Include's amluto's fix for convenience.
> 
> Since v1:
> 
>  - Changelogs!
>  - fixed notifier order (Josh, Daniel)
>  - tested kgdb

Whole set also available at:

  git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git x86/debug

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 3/8] x86/debug: Move kprobe_debug_handler() into exc_debug_kernel()
  2020-08-21  9:39 ` [PATCH v2 3/8] x86/debug: Move kprobe_debug_handler() into exc_debug_kernel() Peter Zijlstra
@ 2020-08-23 23:01   ` Andy Lutomirski
  0 siblings, 0 replies; 21+ messages in thread
From: Andy Lutomirski @ 2020-08-23 23:01 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: X86 ML, LKML, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson

On Fri, Aug 21, 2020 at 3:21 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> Kprobes are on kernel text, and thus only matter for #DB-from-kernel.
> Kprobes are ordered before the generic notifier, preserve that order.
>

Acked-by: Andy Lutomirski <luto@kernel.org>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 2/8] x86/debug: Sync BTF earlier
  2020-08-21  9:39 ` [PATCH v2 2/8] x86/debug: Sync BTF earlier Peter Zijlstra
@ 2020-08-23 23:03   ` Andy Lutomirski
  0 siblings, 0 replies; 21+ messages in thread
From: Andy Lutomirski @ 2020-08-23 23:03 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: X86 ML, LKML, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson

On Fri, Aug 21, 2020 at 3:21 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> Move the BTF sync near the DR6 load, as this will be the only common
> code guaranteed to run on every #DB.

I don't think this code is strictly correct, but I think your change is
still an improvement.

Acked-by: Andy Lutomirski <luto@kernel.org>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 4/8] x86/debug: Remove handle_debug(.user) argument
  2020-08-21  9:39 ` [PATCH v2 4/8] x86/debug: Remove handle_debug(.user) argument Peter Zijlstra
@ 2020-08-23 23:05   ` Andy Lutomirski
  0 siblings, 0 replies; 21+ messages in thread
From: Andy Lutomirski @ 2020-08-23 23:05 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: X86 ML, LKML, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson

On Fri, Aug 21, 2020 at 3:21 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> The handle_debug(.user) argument is used to terminate the #DB handler
> early for the INT1-from-kernel case, since the kernel doesn't use
> INT1.
>
> Remove the argument and handle this explicitly in #DB-from-kernel.

Acked-by: Andy Lutomirski <luto@kernel.org>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 5/8] x86/debug: Simplify #DB signal code
  2020-08-21  9:39 ` [PATCH v2 5/8] x86/debug: Simplify #DB signal code Peter Zijlstra
@ 2020-08-23 23:09   ` Andy Lutomirski
  2020-08-24 11:05     ` peterz
  0 siblings, 1 reply; 21+ messages in thread
From: Andy Lutomirski @ 2020-08-23 23:09 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: X86 ML, LKML, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson

On Fri, Aug 21, 2020 at 3:21 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> Get rid of the two variables, avoid computing si_code when not needed
> and be consistent about which dr6 value is used.
>

> -       if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
> -               send_sigtrap(regs, 0, si_code);
> +       /*
> +        * If dr6 has no reason to give us about the origin of this trap,
> +        * then it's very likely the result of an icebp/int01 trap.
> +        * User wants a sigtrap for that.
> +        */
> +       if (dr6 & (DR_STEP | DR_TRAP_BITS) || !dr6)
> +               send_sigtrap(regs, 0, get_si_code(dr6));

The old condition was ... || (actual DR6 value) and the new condition
was ... || (stupid notifier-modified DR6 value).  I think the old code
was more correct.

The right fix is to get rid of the notifier garbage.  Want to pick up
these two changes into your series:

https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/commit/?h=x86/entry&id=b695a5adfdd661a10eead1eebd4002d08400e7ef
https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/commit/?h=x86/entry&id=40ca016606bd39a465feaf5802a8dc3e937aaa06

And then there is no code left that modifies ->debugreg6 out from under you.

--Andy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 8/8] x86/debug: Remove the historical junk
  2020-08-21  9:39 ` [PATCH v2 8/8] x86/debug: Remove the historical junk Peter Zijlstra
  2020-08-21 12:22   ` [PATCH v2.1 " peterz
@ 2020-08-23 23:11   ` Andy Lutomirski
  1 sibling, 0 replies; 21+ messages in thread
From: Andy Lutomirski @ 2020-08-23 23:11 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: X86 ML, LKML, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Andy Lutomirski, Josh Poimboeuf,
	Daniel Thompson

On Fri, Aug 21, 2020 at 3:21 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> Remove the historical junk and replace it with a WARN and a comment.
>
> The problem is that even though the kernel only uses TF single-step in
> kprobes and KGDB, both of which consume the event before this,
> QEMU/KVM has bugs in this area that can trigger this state so we have
> to deal with it.

Almost acked by me.

If you make the change I suggested earlier, then the ->debugreg6
garbage can actually die.

--Andy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 5/8] x86/debug: Simplify #DB signal code
  2020-08-23 23:09   ` Andy Lutomirski
@ 2020-08-24 11:05     ` peterz
  2020-08-24 11:26       ` Andrew Cooper
  2020-08-24 23:09       ` Andy Lutomirski
  0 siblings, 2 replies; 21+ messages in thread
From: peterz @ 2020-08-24 11:05 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, LKML, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Josh Poimboeuf, Daniel Thompson

On Sun, Aug 23, 2020 at 04:09:42PM -0700, Andy Lutomirski wrote:
> On Fri, Aug 21, 2020 at 3:21 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > Get rid of the two variables, avoid computing si_code when not needed
> > and be consistent about which dr6 value is used.
> >
> 
> > -       if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
> > -               send_sigtrap(regs, 0, si_code);
> > +       /*
> > +        * If dr6 has no reason to give us about the origin of this trap,
> > +        * then it's very likely the result of an icebp/int01 trap.
> > +        * User wants a sigtrap for that.
> > +        */
> > +       if (dr6 & (DR_STEP | DR_TRAP_BITS) || !dr6)
> > +               send_sigtrap(regs, 0, get_si_code(dr6));
> 
> The old condition was ... || (actual DR6 value) and the new condition
> was ... || (stupid notifier-modified DR6 value).  I think the old code
> was more correct.

Hurmph.. /me goes re-read the SDM.

INT1 is a trap,
instruction breakpoint is a fault

So if you have:

	INT1
1:	some-instr

and set an X breakpoint on 1, we'll loose the INT1, right?

And because INT1 is a trap, we can't easily decode the instruction
stream either :-(

Now, OTOH, I suppose you're right in that looking at DR6 early (before
we let people muck about with it) is more reliable for detecting INT1.
Esp since the hw_breakpoint crud can consume all bits.

So I'll go fix that. What a giant pain in the ass all this is.

> The right fix is to get rid of the notifier garbage.  Want to pick up
> these two changes into your series:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/commit/?h=x86/entry&id=b695a5adfdd661a10eead1eebd4002d08400e7ef
> https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/commit/?h=x86/entry&id=40ca016606bd39a465feaf5802a8dc3e937aaa06
> 
> And then there is no code left that modifies ->debugreg6 out from under you.

I'm not convinced. Even with those patches applied we have to use
debugreg6, and that code very much relies on clearing DR_TRAP_BITS
early, and then having ptrace_triggered() re-set bits in it.

This is so that hw_breakpoint handlers can use breakpoints in userspace
without causing send_sigtrap() on them.

So while I hate notifiers as much as the next person, I'm not sure those
patches actually help anything at all in this particular case.

We can't actually remove the notifier, we can't remove debugreg6 and
debugreg6 will still get modified.



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 5/8] x86/debug: Simplify #DB signal code
  2020-08-24 11:05     ` peterz
@ 2020-08-24 11:26       ` Andrew Cooper
  2020-08-24 12:00         ` peterz
  2020-08-24 23:07         ` Andy Lutomirski
  2020-08-24 23:09       ` Andy Lutomirski
  1 sibling, 2 replies; 21+ messages in thread
From: Andrew Cooper @ 2020-08-24 11:26 UTC (permalink / raw)
  To: peterz, Andy Lutomirski
  Cc: X86 ML, LKML, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Josh Poimboeuf, Daniel Thompson

On 24/08/2020 12:05, peterz@infradead.org wrote:
> On Sun, Aug 23, 2020 at 04:09:42PM -0700, Andy Lutomirski wrote:
>> On Fri, Aug 21, 2020 at 3:21 AM Peter Zijlstra <peterz@infradead.org> wrote:
>>> Get rid of the two variables, avoid computing si_code when not needed
>>> and be consistent about which dr6 value is used.
>>>
>>> -       if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
>>> -               send_sigtrap(regs, 0, si_code);
>>> +       /*
>>> +        * If dr6 has no reason to give us about the origin of this trap,
>>> +        * then it's very likely the result of an icebp/int01 trap.
>>> +        * User wants a sigtrap for that.
>>> +        */
>>> +       if (dr6 & (DR_STEP | DR_TRAP_BITS) || !dr6)
>>> +               send_sigtrap(regs, 0, get_si_code(dr6));
>> The old condition was ... || (actual DR6 value) and the new condition
>> was ... || (stupid notifier-modified DR6 value).  I think the old code
>> was more correct.
> Hurmph.. /me goes re-read the SDM.
>
> INT1 is a trap,
> instruction breakpoint is a fault
>
> So if you have:
>
> 	INT1
> 1:	some-instr
>
> and set an X breakpoint on 1, we'll loose the INT1, right?

You should get two.  First with a dr6 of 0 (ICEBP, RIP pointing at 1:),
and a second with dr6 indicating the X breakpoint (again, RIP pointing
at 1:).

SDM Vol3 6.9 PRIORITY AMONG SIMULTANEOUS EXCEPTIONS AND INTERRUPTS

Traps on previous instructions are at priority 4, because they still
"part" of the previous instruction.  X breakpoints are priority 7.

The two #DB's shouldn't merge because nothing inhibits delivery[1] of
the trap at priority 4, and on return from the handler, RF isn't set so
the instruction breakpoint will trigger.

~Andrew

[1] Anyone playing with MovSS gets to keep all resulting pieces.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 5/8] x86/debug: Simplify #DB signal code
  2020-08-24 11:26       ` Andrew Cooper
@ 2020-08-24 12:00         ` peterz
  2020-08-24 23:07         ` Andy Lutomirski
  1 sibling, 0 replies; 21+ messages in thread
From: peterz @ 2020-08-24 12:00 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Andy Lutomirski, X86 ML, LKML, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Josh Poimboeuf, Daniel Thompson

On Mon, Aug 24, 2020 at 12:26:01PM +0100, Andrew Cooper wrote:

> > INT1 is a trap,
> > instruction breakpoint is a fault
> >
> > So if you have:
> >
> > 	INT1
> > 1:	some-instr
> >
> > and set an X breakpoint on 1, we'll loose the INT1, right?
> 
> You should get two.  First with a dr6 of 0 (ICEBP, RIP pointing at 1:),
> and a second with dr6 indicating the X breakpoint (again, RIP pointing
> at 1:).
> 
> SDM Vol3 6.9 PRIORITY AMONG SIMULTANEOUS EXCEPTIONS AND INTERRUPTS

Ooh, shiny. Clearly I didn't read enough SDM this morning.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 5/8] x86/debug: Simplify #DB signal code
  2020-08-24 11:26       ` Andrew Cooper
  2020-08-24 12:00         ` peterz
@ 2020-08-24 23:07         ` Andy Lutomirski
  1 sibling, 0 replies; 21+ messages in thread
From: Andy Lutomirski @ 2020-08-24 23:07 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Peter Zijlstra, Andy Lutomirski, X86 ML, LKML, Kyle Huey,
	Alexandre Chartre, Robert O'Callahan, Paul E. McKenney,
	Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
	Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
	Boris Ostrovsky, Juergen Gross, Brian Gerst, Josh Poimboeuf,
	Daniel Thompson

On Mon, Aug 24, 2020 at 4:26 AM Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>
> On 24/08/2020 12:05, peterz@infradead.org wrote:
> > On Sun, Aug 23, 2020 at 04:09:42PM -0700, Andy Lutomirski wrote:
> >> On Fri, Aug 21, 2020 at 3:21 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >>> Get rid of the two variables, avoid computing si_code when not needed
> >>> and be consistent about which dr6 value is used.
> >>>
> >>> -       if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
> >>> -               send_sigtrap(regs, 0, si_code);
> >>> +       /*
> >>> +        * If dr6 has no reason to give us about the origin of this trap,
> >>> +        * then it's very likely the result of an icebp/int01 trap.
> >>> +        * User wants a sigtrap for that.
> >>> +        */
> >>> +       if (dr6 & (DR_STEP | DR_TRAP_BITS) || !dr6)
> >>> +               send_sigtrap(regs, 0, get_si_code(dr6));
> >> The old condition was ... || (actual DR6 value) and the new condition
> >> was ... || (stupid notifier-modified DR6 value).  I think the old code
> >> was more correct.
> > Hurmph.. /me goes re-read the SDM.
> >
> > INT1 is a trap,
> > instruction breakpoint is a fault
> >
> > So if you have:
> >
> >       INT1
> > 1:    some-instr
> >
> > and set an X breakpoint on 1, we'll loose the INT1, right?
>
> You should get two.  First with a dr6 of 0 (ICEBP, RIP pointing at 1:),
> and a second with dr6 indicating the X breakpoint (again, RIP pointing
> at 1:).
>
> SDM Vol3 6.9 PRIORITY AMONG SIMULTANEOUS EXCEPTIONS AND INTERRUPTS
>
> Traps on previous instructions are at priority 4, because they still
> "part" of the previous instruction.  X breakpoints are priority 7.
>
> The two #DB's shouldn't merge because nothing inhibits delivery[1] of
> the trap at priority 4, and on return from the handler, RF isn't set so
> the instruction breakpoint will trigger.
>

I think that the whole "priority among simultaneous exceptions and
interrupts" section is largely BS.  An instruction that traps will
deliver the exception when it finishes executing unless some other
event prevents the instruction from retiring or unless some other trap
preempts it.  This will happen before the following instruction is
decoded, let alone before any possible faults from its execution will
occur.


Correct me if I'm wrong, but I don't think I am.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 5/8] x86/debug: Simplify #DB signal code
  2020-08-24 11:05     ` peterz
  2020-08-24 11:26       ` Andrew Cooper
@ 2020-08-24 23:09       ` Andy Lutomirski
  1 sibling, 0 replies; 21+ messages in thread
From: Andy Lutomirski @ 2020-08-24 23:09 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andy Lutomirski, X86 ML, LKML, Kyle Huey, Alexandre Chartre,
	Robert O'Callahan, Paul E. McKenney, Frederic Weisbecker,
	Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
	Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
	Juergen Gross, Brian Gerst, Josh Poimboeuf, Daniel Thompson

On Mon, Aug 24, 2020 at 4:05 AM <peterz@infradead.org> wrote:
>
> On Sun, Aug 23, 2020 at 04:09:42PM -0700, Andy Lutomirski wrote:
> > On Fri, Aug 21, 2020 at 3:21 AM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > Get rid of the two variables, avoid computing si_code when not needed
> > > and be consistent about which dr6 value is used.
> > >
> >
> > > -       if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
> > > -               send_sigtrap(regs, 0, si_code);
> > > +       /*
> > > +        * If dr6 has no reason to give us about the origin of this trap,
> > > +        * then it's very likely the result of an icebp/int01 trap.
> > > +        * User wants a sigtrap for that.
> > > +        */
> > > +       if (dr6 & (DR_STEP | DR_TRAP_BITS) || !dr6)
> > > +               send_sigtrap(regs, 0, get_si_code(dr6));
> >
> > The old condition was ... || (actual DR6 value) and the new condition
> > was ... || (stupid notifier-modified DR6 value).  I think the old code
> > was more correct.
>
> Hurmph.. /me goes re-read the SDM.
>
> INT1 is a trap,
> instruction breakpoint is a fault
>
> So if you have:
>
>         INT1
> 1:      some-instr
>
> and set an X breakpoint on 1, we'll loose the INT1, right?
>
> And because INT1 is a trap, we can't easily decode the instruction
> stream either :-(
>
> Now, OTOH, I suppose you're right in that looking at DR6 early (before
> we let people muck about with it) is more reliable for detecting INT1.
> Esp since the hw_breakpoint crud can consume all bits.
>
> So I'll go fix that. What a giant pain in the ass all this is.
>
> > The right fix is to get rid of the notifier garbage.  Want to pick up
> > these two changes into your series:
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/commit/?h=x86/entry&id=b695a5adfdd661a10eead1eebd4002d08400e7ef
> > https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/commit/?h=x86/entry&id=40ca016606bd39a465feaf5802a8dc3e937aaa06
> >
> > And then there is no code left that modifies ->debugreg6 out from under you.
>
> I'm not convinced. Even with those patches applied we have to use
> debugreg6, and that code very much relies on clearing DR_TRAP_BITS
> early, and then having ptrace_triggered() re-set bits in it.
>
> This is so that hw_breakpoint handlers can use breakpoints in userspace
> without causing send_sigtrap() on them.

Ick.

Maybe we can rename this to thread->virtual_dr6.  So the traps.c
machinery would process dr6 (the actual value from hardware) and
generate virtual_dr6 to report to userspace.  And no one gets confused
about which is which, and no one ever consumes bits from the virtual
one.

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2020-08-24 23:09 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-21  9:39 [PATCH v2 0/8] x86/debug: Untangle handle_debug() Peter Zijlstra
2020-08-21  9:39 ` [PATCH v2 1/8] x86/debug: Allow a single level of #DB recursion Peter Zijlstra
2020-08-21  9:39 ` [PATCH v2 2/8] x86/debug: Sync BTF earlier Peter Zijlstra
2020-08-23 23:03   ` Andy Lutomirski
2020-08-21  9:39 ` [PATCH v2 3/8] x86/debug: Move kprobe_debug_handler() into exc_debug_kernel() Peter Zijlstra
2020-08-23 23:01   ` Andy Lutomirski
2020-08-21  9:39 ` [PATCH v2 4/8] x86/debug: Remove handle_debug(.user) argument Peter Zijlstra
2020-08-23 23:05   ` Andy Lutomirski
2020-08-21  9:39 ` [PATCH v2 5/8] x86/debug: Simplify #DB signal code Peter Zijlstra
2020-08-23 23:09   ` Andy Lutomirski
2020-08-24 11:05     ` peterz
2020-08-24 11:26       ` Andrew Cooper
2020-08-24 12:00         ` peterz
2020-08-24 23:07         ` Andy Lutomirski
2020-08-24 23:09       ` Andy Lutomirski
2020-08-21  9:39 ` [PATCH v2 6/8] x86/debug: Move historical SYSENTER junk into exc_debug_kernel() Peter Zijlstra
2020-08-21  9:39 ` [PATCH v2 7/8] x86/debug: Move cond_local_irq_enable() block into exc_debug_user() Peter Zijlstra
2020-08-21  9:39 ` [PATCH v2 8/8] x86/debug: Remove the historical junk Peter Zijlstra
2020-08-21 12:22   ` [PATCH v2.1 " peterz
2020-08-23 23:11   ` [PATCH v2 " Andy Lutomirski
2020-08-21 12:56 ` [PATCH v2 0/8] x86/debug: Untangle handle_debug() peterz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).