All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86/kgdb: Allow removal of early BPs
@ 2020-12-14 14:13 Stefan Saecherl
  2020-12-18 16:44 ` Daniel Thompson
  2021-01-26 11:39 ` Peter Zijlstra
  0 siblings, 2 replies; 5+ messages in thread
From: Stefan Saecherl @ 2020-12-14 14:13 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Stefan Saecherl, Lorena Kretzschmar,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
	Peter Zijlstra, Alexandre Chartre, Mike Rapoport, Ira Weiny,
	Adrian Hunter, Gustavo A. R. Silva, linux-kernel

The problem is that breakpoints that are set early (e.g. via kgdbwait)
cannot be deleted after boot completed (to be precise after mark_rodata_ro
ran).

When setting a breakpoint early there are executable pages that are
writable so the copy_to_kernel_nofault call in kgdb_arch_set_breakpoint
succeeds and the breakpoint is saved as type BP_BREAKPOINT.

Later in the boot write access to these pages is restricted. So when
removing the breakpoint the copy_to_kernel_nofault call in
kgdb_arch_remove_breakpoint is destined to fail and the breakpoint removal
fails. So after copy_to_kernel_nofault failed try to text_poke_kgdb which
can work around nonwriteability.

One thing to consider when doing this is that code can go away during boot
(e.g. .init.text). Previously kgdb_arch_remove_breakpoint handled this case
gracefully by just having copy_to_kernel_nofault fail but if one then calls
text_poke_kgdb the system dies due to the BUG_ON we moved out of
__text_poke.  To avoid this __text_poke now returns an error in case of a
nonpresent code page and the error is handled at call site.

Checkpatch complains about two uses of BUG_ON but the new code should not
trigger BUG_ON in cases where the old didn't.

Co-developed-by: Lorena Kretzschmar <qy15sije@cip.cs.fau.de>
Signed-off-by: Lorena Kretzschmar <qy15sije@cip.cs.fau.de>
Signed-off-by: Stefan Saecherl <stefan.saecherl@fau.de>
---
 arch/x86/kernel/alternative.c | 16 +++++++----
 arch/x86/kernel/kgdb.c        | 54 ++++++++++++++++++++++++-----------
 2 files changed, 48 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 2400ad62f330..0f145d837885 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -878,11 +878,9 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
 		if (cross_page_boundary)
 			pages[1] = virt_to_page(addr + PAGE_SIZE);
 	}
-	/*
-	 * If something went wrong, crash and burn since recovery paths are not
-	 * implemented.
-	 */
-	BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
+
+	if (!pages[0] || (cross_page_boundary && !pages[1]))
+		return ERR_PTR(-EFAULT);
 
 	/*
 	 * Map the page without the global bit, as TLB flushing is done with
@@ -976,7 +974,13 @@ void *text_poke(void *addr, const void *opcode, size_t len)
 {
 	lockdep_assert_held(&text_mutex);
 
-	return __text_poke(addr, opcode, len);
+	addr = __text_poke(addr, opcode, len);
+	/*
+	 * If something went wrong, crash and burn since recovery paths are not
+	 * implemented.
+	 */
+	BUG_ON(IS_ERR(addr));
+	return addr;
 }
 
 /**
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index ff7878df96b4..e98c9c43db7c 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -731,6 +731,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 {
 	int err;
+	void *addr;
 
 	bpt->type = BP_BREAKPOINT;
 	err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr,
@@ -747,8 +748,14 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 	 */
 	if (mutex_is_locked(&text_mutex))
 		return -EBUSY;
-	text_poke_kgdb((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
-		       BREAK_INSTR_SIZE);
+
+	addr = text_poke_kgdb((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
+				BREAK_INSTR_SIZE);
+	/* This should never trigger because the above call to copy_from_kernel_nofault
+	 * already succeeded.
+	 */
+	BUG_ON(IS_ERR(addr));
+
 	bpt->type = BP_POKE_BREAKPOINT;
 
 	return 0;
@@ -756,21 +763,36 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 
 int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
 {
-	if (bpt->type != BP_POKE_BREAKPOINT)
-		goto knl_write;
-	/*
-	 * It is safe to call text_poke_kgdb() because normal kernel execution
-	 * is stopped on all cores, so long as the text_mutex is not locked.
-	 */
-	if (mutex_is_locked(&text_mutex))
-		goto knl_write;
-	text_poke_kgdb((void *)bpt->bpt_addr, bpt->saved_instr,
-		       BREAK_INSTR_SIZE);
-	return 0;
+	void *addr;
+	int err;
 
-knl_write:
-	return copy_to_kernel_nofault((char *)bpt->bpt_addr,
-				  (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
+	if (bpt->type == BP_POKE_BREAKPOINT) {
+		if (mutex_is_locked(&text_mutex)) {
+			err = copy_to_kernel_nofault((char *)bpt->bpt_addr,
+							(char *)bpt->saved_instr,
+							BREAK_INSTR_SIZE);
+		} else {
+			/*
+			 * It is safe to call text_poke_kgdb() because normal kernel execution
+			 * is stopped on all cores, so long as the text_mutex is not locked.
+			 */
+			addr = text_poke_kgdb((void *)bpt->bpt_addr,
+							bpt->saved_instr,
+							BREAK_INSTR_SIZE);
+			err = PTR_ERR_OR_ZERO(addr);
+		}
+	} else {
+		err = copy_to_kernel_nofault((char *)bpt->bpt_addr,
+						(char *)bpt->saved_instr,
+						BREAK_INSTR_SIZE);
+		if (err == -EFAULT && !mutex_is_locked(&text_mutex)) {
+			addr = text_poke_kgdb((void *)bpt->bpt_addr,
+						bpt->saved_instr,
+						BREAK_INSTR_SIZE);
+			err = PTR_ERR_OR_ZERO(addr);
+		}
+	}
+	return err;
 }
 
 const struct kgdb_arch arch_kgdb_ops = {
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86/kgdb: Allow removal of early BPs
  2020-12-14 14:13 [PATCH] x86/kgdb: Allow removal of early BPs Stefan Saecherl
@ 2020-12-18 16:44 ` Daniel Thompson
  2021-01-25 13:17   ` Lorena Kretzschmar
  2021-01-26 11:39 ` Peter Zijlstra
  1 sibling, 1 reply; 5+ messages in thread
From: Daniel Thompson @ 2020-12-18 16:44 UTC (permalink / raw)
  To: Stefan Saecherl
  Cc: x86, linux-kernel, Lorena Kretzschmar, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, H. Peter Anvin, Peter Zijlstra,
	Alexandre Chartre, Mike Rapoport, Ira Weiny, Adrian Hunter,
	Gustavo A. R. Silva, linux-kernel

Hi Stefan

On Mon, Dec 14, 2020 at 03:13:12PM +0100, Stefan Saecherl wrote:
> The problem is that breakpoints that are set early (e.g. via kgdbwait)
> cannot be deleted after boot completed (to be precise after mark_rodata_ro
> ran).
> 
> When setting a breakpoint early there are executable pages that are
> writable so the copy_to_kernel_nofault call in kgdb_arch_set_breakpoint
> succeeds and the breakpoint is saved as type BP_BREAKPOINT.
> 
> Later in the boot write access to these pages is restricted. So when
> removing the breakpoint the copy_to_kernel_nofault call in
> kgdb_arch_remove_breakpoint is destined to fail and the breakpoint removal
> fails. So after copy_to_kernel_nofault failed try to text_poke_kgdb which
> can work around nonwriteability.
> 
> One thing to consider when doing this is that code can go away during boot
> (e.g. .init.text). Previously kgdb_arch_remove_breakpoint handled this case
> gracefully by just having copy_to_kernel_nofault fail but if one then calls
> text_poke_kgdb the system dies due to the BUG_ON we moved out of
> __text_poke.  To avoid this __text_poke now returns an error in case of a
> nonpresent code page and the error is handled at call site.
> 
> Checkpatch complains about two uses of BUG_ON but the new code should not
> trigger BUG_ON in cases where the old didn't.
> 
> Co-developed-by: Lorena Kretzschmar <qy15sije@cip.cs.fau.de>
> Signed-off-by: Lorena Kretzschmar <qy15sije@cip.cs.fau.de>
> Signed-off-by: Stefan Saecherl <stefan.saecherl@fau.de>

I took this to be a gap in the kgdbtest suite so I added a couple of
tests that cover this area. Before this patch they failed now they
pass (at least they do for ARCH=x86).

I don't see any new failures either, so:

Tested-by: Daniel Thompson <daniel.thompson@linaro.org>


Daniel.



> ---
>  arch/x86/kernel/alternative.c | 16 +++++++----
>  arch/x86/kernel/kgdb.c        | 54 ++++++++++++++++++++++++-----------
>  2 files changed, 48 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> index 2400ad62f330..0f145d837885 100644
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -878,11 +878,9 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
>  		if (cross_page_boundary)
>  			pages[1] = virt_to_page(addr + PAGE_SIZE);
>  	}
> -	/*
> -	 * If something went wrong, crash and burn since recovery paths are not
> -	 * implemented.
> -	 */
> -	BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
> +
> +	if (!pages[0] || (cross_page_boundary && !pages[1]))
> +		return ERR_PTR(-EFAULT);
>  
>  	/*
>  	 * Map the page without the global bit, as TLB flushing is done with
> @@ -976,7 +974,13 @@ void *text_poke(void *addr, const void *opcode, size_t len)
>  {
>  	lockdep_assert_held(&text_mutex);
>  
> -	return __text_poke(addr, opcode, len);
> +	addr = __text_poke(addr, opcode, len);
> +	/*
> +	 * If something went wrong, crash and burn since recovery paths are not
> +	 * implemented.
> +	 */
> +	BUG_ON(IS_ERR(addr));
> +	return addr;
>  }
>  
>  /**
> diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
> index ff7878df96b4..e98c9c43db7c 100644
> --- a/arch/x86/kernel/kgdb.c
> +++ b/arch/x86/kernel/kgdb.c
> @@ -731,6 +731,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
>  int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
>  {
>  	int err;
> +	void *addr;
>  
>  	bpt->type = BP_BREAKPOINT;
>  	err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr,
> @@ -747,8 +748,14 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
>  	 */
>  	if (mutex_is_locked(&text_mutex))
>  		return -EBUSY;
> -	text_poke_kgdb((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
> -		       BREAK_INSTR_SIZE);
> +
> +	addr = text_poke_kgdb((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
> +				BREAK_INSTR_SIZE);
> +	/* This should never trigger because the above call to copy_from_kernel_nofault
> +	 * already succeeded.
> +	 */
> +	BUG_ON(IS_ERR(addr));
> +
>  	bpt->type = BP_POKE_BREAKPOINT;
>  
>  	return 0;
> @@ -756,21 +763,36 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
>  
>  int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
>  {
> -	if (bpt->type != BP_POKE_BREAKPOINT)
> -		goto knl_write;
> -	/*
> -	 * It is safe to call text_poke_kgdb() because normal kernel execution
> -	 * is stopped on all cores, so long as the text_mutex is not locked.
> -	 */
> -	if (mutex_is_locked(&text_mutex))
> -		goto knl_write;
> -	text_poke_kgdb((void *)bpt->bpt_addr, bpt->saved_instr,
> -		       BREAK_INSTR_SIZE);
> -	return 0;
> +	void *addr;
> +	int err;
>  
> -knl_write:
> -	return copy_to_kernel_nofault((char *)bpt->bpt_addr,
> -				  (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
> +	if (bpt->type == BP_POKE_BREAKPOINT) {
> +		if (mutex_is_locked(&text_mutex)) {
> +			err = copy_to_kernel_nofault((char *)bpt->bpt_addr,
> +							(char *)bpt->saved_instr,
> +							BREAK_INSTR_SIZE);
> +		} else {
> +			/*
> +			 * It is safe to call text_poke_kgdb() because normal kernel execution
> +			 * is stopped on all cores, so long as the text_mutex is not locked.
> +			 */
> +			addr = text_poke_kgdb((void *)bpt->bpt_addr,
> +							bpt->saved_instr,
> +							BREAK_INSTR_SIZE);
> +			err = PTR_ERR_OR_ZERO(addr);
> +		}
> +	} else {
> +		err = copy_to_kernel_nofault((char *)bpt->bpt_addr,
> +						(char *)bpt->saved_instr,
> +						BREAK_INSTR_SIZE);
> +		if (err == -EFAULT && !mutex_is_locked(&text_mutex)) {
> +			addr = text_poke_kgdb((void *)bpt->bpt_addr,
> +						bpt->saved_instr,
> +						BREAK_INSTR_SIZE);
> +			err = PTR_ERR_OR_ZERO(addr);
> +		}
> +	}
> +	return err;
>  }
>  
>  const struct kgdb_arch arch_kgdb_ops = {
> -- 
> 2.20.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86/kgdb: Allow removal of early BPs
  2020-12-18 16:44 ` Daniel Thompson
@ 2021-01-25 13:17   ` Lorena Kretzschmar
  0 siblings, 0 replies; 5+ messages in thread
From: Lorena Kretzschmar @ 2021-01-25 13:17 UTC (permalink / raw)
  To: x86, Daniel Thompson
  Cc: Stefan Saecherl, linux-kernel, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, H. Peter Anvin, Peter Zijlstra,
	Alexandre Chartre, Mike Rapoport, Ira Weiny, Adrian Hunter,
	Gustavo A. R. Silva, linux-kernel

Hi,

I wanted to ask about the status of the patch. Let us know if there are any
other steps we can undertake.

Kind regards
Lorena

On Fri 2020-12-18 16:44:21, Daniel Thompson wrote:
> Hi Stefan
> 
> On Mon, Dec 14, 2020 at 03:13:12PM +0100, Stefan Saecherl wrote:
> > The problem is that breakpoints that are set early (e.g. via kgdbwait)
> > cannot be deleted after boot completed (to be precise after mark_rodata_ro
> > ran).
> > 
> > When setting a breakpoint early there are executable pages that are
> > writable so the copy_to_kernel_nofault call in kgdb_arch_set_breakpoint
> > succeeds and the breakpoint is saved as type BP_BREAKPOINT.
> > 
> > Later in the boot write access to these pages is restricted. So when
> > removing the breakpoint the copy_to_kernel_nofault call in
> > kgdb_arch_remove_breakpoint is destined to fail and the breakpoint removal
> > fails. So after copy_to_kernel_nofault failed try to text_poke_kgdb which
> > can work around nonwriteability.
> > 
> > One thing to consider when doing this is that code can go away during boot
> > (e.g. .init.text). Previously kgdb_arch_remove_breakpoint handled this case
> > gracefully by just having copy_to_kernel_nofault fail but if one then calls
> > text_poke_kgdb the system dies due to the BUG_ON we moved out of
> > __text_poke.  To avoid this __text_poke now returns an error in case of a
> > nonpresent code page and the error is handled at call site.
> > 
> > Checkpatch complains about two uses of BUG_ON but the new code should not
> > trigger BUG_ON in cases where the old didn't.
> > 
> > Co-developed-by: Lorena Kretzschmar <qy15sije@cip.cs.fau.de>
> > Signed-off-by: Lorena Kretzschmar <qy15sije@cip.cs.fau.de>
> > Signed-off-by: Stefan Saecherl <stefan.saecherl@fau.de>
> 
> I took this to be a gap in the kgdbtest suite so I added a couple of
> tests that cover this area. Before this patch they failed now they
> pass (at least they do for ARCH=x86).
> 
> I don't see any new failures either, so:
> 
> Tested-by: Daniel Thompson <daniel.thompson@linaro.org>
> 
> 
> Daniel.
> 
> 
> 
> > ---
> >  arch/x86/kernel/alternative.c | 16 +++++++----
> >  arch/x86/kernel/kgdb.c        | 54 ++++++++++++++++++++++++-----------
> >  2 files changed, 48 insertions(+), 22 deletions(-)
> > 
> > diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> > index 2400ad62f330..0f145d837885 100644
> > --- a/arch/x86/kernel/alternative.c
> > +++ b/arch/x86/kernel/alternative.c
> > @@ -878,11 +878,9 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
> >  		if (cross_page_boundary)
> >  			pages[1] = virt_to_page(addr + PAGE_SIZE);
> >  	}
> > -	/*
> > -	 * If something went wrong, crash and burn since recovery paths are not
> > -	 * implemented.
> > -	 */
> > -	BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
> > +
> > +	if (!pages[0] || (cross_page_boundary && !pages[1]))
> > +		return ERR_PTR(-EFAULT);
> >  
> >  	/*
> >  	 * Map the page without the global bit, as TLB flushing is done with
> > @@ -976,7 +974,13 @@ void *text_poke(void *addr, const void *opcode, size_t len)
> >  {
> >  	lockdep_assert_held(&text_mutex);
> >  
> > -	return __text_poke(addr, opcode, len);
> > +	addr = __text_poke(addr, opcode, len);
> > +	/*
> > +	 * If something went wrong, crash and burn since recovery paths are not
> > +	 * implemented.
> > +	 */
> > +	BUG_ON(IS_ERR(addr));
> > +	return addr;
> >  }
> >  
> >  /**
> > diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
> > index ff7878df96b4..e98c9c43db7c 100644
> > --- a/arch/x86/kernel/kgdb.c
> > +++ b/arch/x86/kernel/kgdb.c
> > @@ -731,6 +731,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
> >  int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
> >  {
> >  	int err;
> > +	void *addr;
> >  
> >  	bpt->type = BP_BREAKPOINT;
> >  	err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr,
> > @@ -747,8 +748,14 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
> >  	 */
> >  	if (mutex_is_locked(&text_mutex))
> >  		return -EBUSY;
> > -	text_poke_kgdb((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
> > -		       BREAK_INSTR_SIZE);
> > +
> > +	addr = text_poke_kgdb((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
> > +				BREAK_INSTR_SIZE);
> > +	/* This should never trigger because the above call to copy_from_kernel_nofault
> > +	 * already succeeded.
> > +	 */
> > +	BUG_ON(IS_ERR(addr));
> > +
> >  	bpt->type = BP_POKE_BREAKPOINT;
> >  
> >  	return 0;
> > @@ -756,21 +763,36 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
> >  
> >  int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
> >  {
> > -	if (bpt->type != BP_POKE_BREAKPOINT)
> > -		goto knl_write;
> > -	/*
> > -	 * It is safe to call text_poke_kgdb() because normal kernel execution
> > -	 * is stopped on all cores, so long as the text_mutex is not locked.
> > -	 */
> > -	if (mutex_is_locked(&text_mutex))
> > -		goto knl_write;
> > -	text_poke_kgdb((void *)bpt->bpt_addr, bpt->saved_instr,
> > -		       BREAK_INSTR_SIZE);
> > -	return 0;
> > +	void *addr;
> > +	int err;
> >  
> > -knl_write:
> > -	return copy_to_kernel_nofault((char *)bpt->bpt_addr,
> > -				  (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
> > +	if (bpt->type == BP_POKE_BREAKPOINT) {
> > +		if (mutex_is_locked(&text_mutex)) {
> > +			err = copy_to_kernel_nofault((char *)bpt->bpt_addr,
> > +							(char *)bpt->saved_instr,
> > +							BREAK_INSTR_SIZE);
> > +		} else {
> > +			/*
> > +			 * It is safe to call text_poke_kgdb() because normal kernel execution
> > +			 * is stopped on all cores, so long as the text_mutex is not locked.
> > +			 */
> > +			addr = text_poke_kgdb((void *)bpt->bpt_addr,
> > +							bpt->saved_instr,
> > +							BREAK_INSTR_SIZE);
> > +			err = PTR_ERR_OR_ZERO(addr);
> > +		}
> > +	} else {
> > +		err = copy_to_kernel_nofault((char *)bpt->bpt_addr,
> > +						(char *)bpt->saved_instr,
> > +						BREAK_INSTR_SIZE);
> > +		if (err == -EFAULT && !mutex_is_locked(&text_mutex)) {
> > +			addr = text_poke_kgdb((void *)bpt->bpt_addr,
> > +						bpt->saved_instr,
> > +						BREAK_INSTR_SIZE);
> > +			err = PTR_ERR_OR_ZERO(addr);
> > +		}
> > +	}
> > +	return err;
> >  }
> >  
> >  const struct kgdb_arch arch_kgdb_ops = {
> > -- 
> > 2.20.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86/kgdb: Allow removal of early BPs
  2020-12-14 14:13 [PATCH] x86/kgdb: Allow removal of early BPs Stefan Saecherl
  2020-12-18 16:44 ` Daniel Thompson
@ 2021-01-26 11:39 ` Peter Zijlstra
  2021-01-27 14:17   ` Borislav Petkov
  1 sibling, 1 reply; 5+ messages in thread
From: Peter Zijlstra @ 2021-01-26 11:39 UTC (permalink / raw)
  To: Stefan Saecherl
  Cc: x86, linux-kernel, Lorena Kretzschmar, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, H. Peter Anvin, Alexandre Chartre,
	Mike Rapoport, Ira Weiny, Adrian Hunter, Gustavo A. R. Silva,
	linux-kernel

On Mon, Dec 14, 2020 at 03:13:12PM +0100, Stefan Saecherl wrote:

> One thing to consider when doing this is that code can go away during boot
> (e.g. .init.text). Previously kgdb_arch_remove_breakpoint handled this case
> gracefully by just having copy_to_kernel_nofault fail but if one then calls
> text_poke_kgdb the system dies due to the BUG_ON we moved out of
> __text_poke.  To avoid this __text_poke now returns an error in case of a
> nonpresent code page and the error is handled at call site.

So what if the page is reused and now exists again?

We keep track of the init state, how about you look at that and not poke
at .init.text after it's freed instead?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86/kgdb: Allow removal of early BPs
  2021-01-26 11:39 ` Peter Zijlstra
@ 2021-01-27 14:17   ` Borislav Petkov
  0 siblings, 0 replies; 5+ messages in thread
From: Borislav Petkov @ 2021-01-27 14:17 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Stefan Saecherl, x86, linux-kernel, Lorena Kretzschmar,
	Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Alexandre Chartre,
	Mike Rapoport, Ira Weiny, Adrian Hunter, Gustavo A. R. Silva,
	linux-kernel

On Tue, Jan 26, 2021 at 12:39:17PM +0100, Peter Zijlstra wrote:
> On Mon, Dec 14, 2020 at 03:13:12PM +0100, Stefan Saecherl wrote:
> 
> > One thing to consider when doing this is that code can go away during boot
> > (e.g. .init.text). Previously kgdb_arch_remove_breakpoint handled this case
> > gracefully by just having copy_to_kernel_nofault fail but if one then calls
> > text_poke_kgdb the system dies due to the BUG_ON we moved out of
> > __text_poke.  To avoid this __text_poke now returns an error in case of a
> > nonpresent code page and the error is handled at call site.
> 
> So what if the page is reused and now exists again?
> 
> We keep track of the init state, how about you look at that and not poke
> at .init.text after it's freed instead?

Dunno if this is related but kgdb doesn't like one of its selftests
either. It triggers in a vm.

[    2.036914] [drm] Initialized cirrus 2.0.0 2019 for 0000:00:02.0 on minor 0
[    2.037803] fbcon: cirrusdrmfb (fb0) is primary device
[    2.180326] Console: switching to colour frame buffer device 128x48
[    2.225910] cirrus 0000:00:02.0: [drm] fb0: cirrusdrmfb frame buffer device
[    2.229397] KGDB: Registered I/O driver kgdbts
[    2.230931] kgdbts:RUN hw breakpoint test

Entering kdb (current=0xffff88800c1f0000, pid 1) on processor 1 due to NonMaskable Interrupt @ 0xffffffff81121490
[1]kdb> [    2.234788] INFO: NMI handler (kgdb_nmi_handler) took too long to run: 3.308 msecs
[    2.234994] INFO: NMI handler (kgdb_nmi_handler) took too long to run: 3.303 msecs
[    2.235003] INFO: NMI handler (kgdb_nmi_handler) took too long to run: 3.306 msecs
[    2.237092] kgdbts:RUN hw write breakpoint test
[    2.245435] kgdbts:RUN access write breakpoint test
[    2.248446] kgdbts: ERROR hw_access_break_test test failed
[    2.249430] ------------[ cut here ]------------
[    2.250233] WARNING: CPU: 1 PID: 1 at drivers/misc/kgdbts.c:903 run_hw_break_test.cold+0x13/0x30
[    2.251790] Modules linked in:
[    2.252341] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc5+ #2
[    2.253444] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
[    2.254925] RIP: 0010:run_hw_break_test.cold+0x13/0x30
[    2.255820] Code: 00 48 8b 35 0f e3 f9 09 48 c7 c7 18 15 12 82 e8 96 1f fe ff 0f 0b c3 48 8b 35 f9 e2 f9 09 48 c7 c7 18 15 12 82 e8 80 1f fe ff <0f> 0b c3 48 8b 35 e3 e2 f9 09 48 c7 c7 38 15 12 82 e8 6a 1f fe ff
[    2.259062] RSP: 0018:ffffc90000013e78 EFLAGS: 00010286
[    2.259971] RAX: 000000000000002e RBX: 0000000000000000 RCX: 00000000ffefffff
[    2.261198] RDX: 00000000ffffffea RSI: ffffc90000013d18 RDI: ffffffff8978d0ec
[    2.263054] RBP: 0000000000000000 R08: ffffffff89249128 R09: 0000000000000058
[    2.264848] R10: ffffffff87a49140 R11: 00000000fff00000 R12: 0000000000000000
[    2.266677] R13: 00000000000003e8 R14: 0000000000000000 R15: 0000000000000064
[    2.268509] FS:  0000000000000000(0000) GS:ffff88807da40000(0000) knlGS:0000000000000000
[    2.271591] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    2.273193] CR2: 0000000000000000 CR3: 000000000220a000 CR4: 00000000003506e0
[    2.275051] DR0: ffffffff8b7fc8a4 DR1: 0000000000000000 DR2: 0000000000000000
[    2.276841] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600
[    2.278629] Call Trace:
[    2.279669]  configure_kgdbts+0x2c8/0x560
[    2.280930]  ? regmap_initcall+0xd/0xd
[    2.282144]  do_one_initcall+0x44/0x200
[    2.283404]  kernel_init_freeable+0x16d/0x1b2
[    2.284705]  ? rest_init+0xb9/0xb9
[    2.285845]  kernel_init+0xa/0x107
[    2.287012]  ret_from_fork+0x22/0x30
[    2.288163] ---[ end trace eb784077898119a0 ]---
[    2.289486] kgdbts:RUN plant and detach test
[    2.291842] kgdbts:RUN sw breakpoint test
[    2.296227] kgdbts:RUN bad memory access test
[    2.298555] kgdbts:RUN singlestep test 1000 iterations
[    2.304355] kgdbts:RUN singlestep [0/1000]
[    2.374077] perf: interrupt took too long (2502 > 2500), lowering kernel.perf_event_max_sample_rate to 79750
[    2.411530] perf: interrupt took too long (3581 > 3127), lowering kernel.perf_event_max_sample_rate to 55750
[    2.425941] INFO: NMI handler (kgdb_nmi_handler) took too long to run: 3.975 msecs
[    2.425948] INFO: NMI handler (kgdb_nmi_handler) took too long to run: 3.968 msecs
[    2.425954] INFO: NMI handler (kgdb_nmi_handler) took too long to run: 3.975 msecs
[    2.425960] INFO: NMI handler (kgdb_nmi_handler) took too long to run: 3.973 msecs
[    2.721821] kgdbts:RUN singlestep [100/1000]
[    3.135615] kgdbts:RUN singlestep [200/1000]
[    3.548892] kgdbts:RUN singlestep [300/1000]
[    3.960696] kgdbts:RUN singlestep [400/1000]

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-01-27 14:20 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-14 14:13 [PATCH] x86/kgdb: Allow removal of early BPs Stefan Saecherl
2020-12-18 16:44 ` Daniel Thompson
2021-01-25 13:17   ` Lorena Kretzschmar
2021-01-26 11:39 ` Peter Zijlstra
2021-01-27 14:17   ` Borislav Petkov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.