linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Regression bisected to f2f84b05e02b (bug: consolidate warn_slowpath_fmt() usage)
@ 2020-06-02  2:48 Matt Turner
  2020-06-02 18:03 ` Kees Cook
  2024-05-21 18:46 ` John Paul Adrian Glaubitz
  0 siblings, 2 replies; 7+ messages in thread
From: Matt Turner @ 2020-06-02  2:48 UTC (permalink / raw)
  To: Kees Cook; +Cc: linux-arch, linux-kernel, linux-alpha

[-- Attachment #1: Type: text/plain, Size: 4010 bytes --]

I bisected a regression on alpha to f2f84b05e02b (bug: consolidate
warn_slowpath_fmt() usage) which looks totally innocuous.

Reverting it on master confirms that it somehow is the trigger. At or a
little after starting userspace, I'll see an oops like this:

Unable to handle kernel paging request at virtual address 0000000000000000
CPU 0
kworker/u2:5(98): Oops -1
pc = [<0000000000000000>]  ra = [<0000000000000000>]  ps = 0000    Not tainted
pc is at 0x0
ra is at 0x0
v0 = 0000000000000007  t0 = 0000000000000001  t1 = 0000000000000001
t2 = 0000000000000000  t3 = fffffc00bfe68780  t4 = 0000000000000001
t5 = fffffc00bf8cc780  t6 = 00000000026f8000  t7 = fffffc00bfe70000
s0 = fffffc000250d310  s1 = fffffc000250d310  s2 = fffffc000250d310
s3 = fffffc000250ca40  s4 = fffffc000250caa0  s5 = 0000000000000000
s6 = fffffc000250ca40
a0 = fffffc00024f0488  a1 = fffffc00bfe73d98  a2 = fffffc00bfe68800
a3 = fffffc00bf881400  a4 = 0001000000000000  a5 = 0000000000000002
t8 = 0000000000000000  t9 = 0000000000000000  t10= 0000000001321800
t11= 000000000000ba4e  pv = fffffc000189ca00  at = 0000000000000000
gp = fffffc000253e430  sp = 0000000043a83c2e
Disabling lock debugging due to kernel taint
Trace:
[<fffffc000105c8ac>] process_one_work+0x25c/0x5a0
[<fffffc000105cc4c>] worker_thread+0x5c/0x7d0
[<fffffc0001066c88>] kthread+0x188/0x1f0
[<fffffc0001011b48>] ret_from_kernel_thread+0x18/0x20
[<fffffc0001066b00>] kthread+0x0/0x1f0
[<fffffc000105cbf0>] worker_thread+0x0/0x7d0

Code:
  00000000
  00000000
  00063301
  000012e2
  00001111
  0005ffde

It seems to cause a hard lock on an SMP system, but not on a system with
a single CPU. Similarly, if I boot the SMP system (2 CPUs) with
maxcpus=1 the oops doesn't happen. Until I tested on a non-SMP system
today I suspected that it was unaffected, but I saw the oops there too.
With the revert applied, I don't see a warning or an oops.

Any clues how this patch could have triggered the oops?

Here's the revert, with a trivial conflict resolved, that I've used in
testing:

 From fdbdd0f606f0f412ee06c1152e33a22ca17102bc Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Sun, 24 May 2020 20:46:00 -0700
Subject: [PATCH] Revert "bug: consolidate warn_slowpath_fmt() usage"

This reverts commit f2f84b05e02b7710a201f0017b3272ad7ef703d1.
---
  include/asm-generic/bug.h |  3 ++-
  kernel/panic.c            | 15 +++++++--------
  2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 384b5c835ced..a4a311d4b4b0 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -82,7 +82,8 @@ struct bug_entry {
  extern __printf(4, 5)
  void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
  		       const char *fmt, ...);
-#define __WARN()		__WARN_printf(TAINT_WARN, NULL)
+extern void warn_slowpath_null(const char *file, const int line);
+#define __WARN()		warn_slowpath_null(__FILE__, __LINE__)
  #define __WARN_printf(taint, arg...)					\
  	warn_slowpath_fmt(__FILE__, __LINE__, taint, arg)
  #else
diff --git a/kernel/panic.c b/kernel/panic.c
index b69ee9e76cb2..c8ed8046b484 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -603,20 +603,19 @@ void warn_slowpath_fmt(const char *file, int line, unsigned taint,
  {
  	struct warn_args args;
  
-	pr_warn(CUT_HERE);
-
-	if (!fmt) {
-		__warn(file, line, __builtin_return_address(0), taint,
-		       NULL, NULL);
-		return;
-	}
-
  	args.fmt = fmt;
  	va_start(args.args, fmt);
  	__warn(file, line, __builtin_return_address(0), taint, NULL, &args);
  	va_end(args.args);
  }
  EXPORT_SYMBOL(warn_slowpath_fmt);
+
+void warn_slowpath_null(const char *file, int line)
+{
+	pr_warn(CUT_HERE);
+	__warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, NULL);
+}
+EXPORT_SYMBOL(warn_slowpath_null);
  #else
  void __warn_printk(const char *fmt, ...)
  {
-- 
2.26.2

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 376 bytes --]

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: Regression bisected to f2f84b05e02b (bug: consolidate warn_slowpath_fmt() usage)
  2020-06-02  2:48 Regression bisected to f2f84b05e02b (bug: consolidate warn_slowpath_fmt() usage) Matt Turner
@ 2020-06-02 18:03 ` Kees Cook
  2020-06-12  4:23   ` Matt Turner
  2024-05-21 18:46 ` John Paul Adrian Glaubitz
  1 sibling, 1 reply; 7+ messages in thread
From: Kees Cook @ 2020-06-02 18:03 UTC (permalink / raw)
  To: Matt Turner; +Cc: linux-arch, linux-kernel, linux-alpha

On Mon, Jun 01, 2020 at 07:48:04PM -0700, Matt Turner wrote:
> I bisected a regression on alpha to f2f84b05e02b (bug: consolidate
> warn_slowpath_fmt() usage) which looks totally innocuous.
> 
> Reverting it on master confirms that it somehow is the trigger. At or a
> little after starting userspace, I'll see an oops like this:
> 
> Unable to handle kernel paging request at virtual address 0000000000000000
> CPU 0
> kworker/u2:5(98): Oops -1
> pc = [<0000000000000000>]  ra = [<0000000000000000>]  ps = 0000    Not tainted
> pc is at 0x0

^^^^ so, the instruction pointer is NULL. The only way I can imagine
that happening would be from this line:

        worker->current_func(work);

> ra is at 0x0
> v0 = 0000000000000007  t0 = 0000000000000001  t1 = 0000000000000001
> t2 = 0000000000000000  t3 = fffffc00bfe68780  t4 = 0000000000000001
> t5 = fffffc00bf8cc780  t6 = 00000000026f8000  t7 = fffffc00bfe70000
> s0 = fffffc000250d310  s1 = fffffc000250d310  s2 = fffffc000250d310
> s3 = fffffc000250ca40  s4 = fffffc000250caa0  s5 = 0000000000000000
> s6 = fffffc000250ca40
> a0 = fffffc00024f0488  a1 = fffffc00bfe73d98  a2 = fffffc00bfe68800
> a3 = fffffc00bf881400  a4 = 0001000000000000  a5 = 0000000000000002
> t8 = 0000000000000000  t9 = 0000000000000000  t10= 0000000001321800
> t11= 000000000000ba4e  pv = fffffc000189ca00  at = 0000000000000000
> gp = fffffc000253e430  sp = 0000000043a83c2e
> Disabling lock debugging due to kernel taint
> Trace:
> [<fffffc000105c8ac>] process_one_work+0x25c/0x5a0

Can you verify where this     ^^^^^^^^^^^^^^   is?

> [<fffffc000105cc4c>] worker_thread+0x5c/0x7d0
> [<fffffc0001066c88>] kthread+0x188/0x1f0
> [<fffffc0001011b48>] ret_from_kernel_thread+0x18/0x20
> [<fffffc0001066b00>] kthread+0x0/0x1f0
> [<fffffc000105cbf0>] worker_thread+0x0/0x7d0
> 
> Code:
>  00000000
>  00000000
>  00063301
>  000012e2
>  00001111
>  0005ffde
> 
> It seems to cause a hard lock on an SMP system, but not on a system with
> a single CPU. Similarly, if I boot the SMP system (2 CPUs) with
> maxcpus=1 the oops doesn't happen. Until I tested on a non-SMP system
> today I suspected that it was unaffected, but I saw the oops there too.
> With the revert applied, I don't see a warning or an oops.
> 
> Any clues how this patch could have triggered the oops?

I cannot begin to imagine. :P Compared to other things I've seen like
this in the past maybe it's some kind of effect from the code size
changing the location/alignment or timing of something else?

Various questions ranging in degrees of sanity:

Does alpha use work queues for WARN?

Which work queue is getting a NULL function? (And then things like "if
WARN was much slower or much faster, is there a race to something
setting itself to NULL?")

Was there a WARN before the above Oops?

Does WARN have side-effects on alpha?

Does __WARN_printf() do something bad that warn_slowpath_null() doesn't?

Does making incremental changes narrow anything down? (e.g. instead of
this revert, remove the __warn() call in warn_slowpath_fmt() that was
added? (I mean, that'll be quite broken for WARN, but will it not oops?)

Does alpha have hardware breakpoints? When I had to track down a
corruption in the io scheduler, I ended up setting breakpoints on the
thing that went crazy (in this case, I assume the work queue function
pointer) to figure out what touched it.

... I can't think of anything else.

-Kees

> 
> Here's the revert, with a trivial conflict resolved, that I've used in
> testing:
> 
> From fdbdd0f606f0f412ee06c1152e33a22ca17102bc Mon Sep 17 00:00:00 2001
> From: Matt Turner <mattst88@gmail.com>
> Date: Sun, 24 May 2020 20:46:00 -0700
> Subject: [PATCH] Revert "bug: consolidate warn_slowpath_fmt() usage"
> 
> This reverts commit f2f84b05e02b7710a201f0017b3272ad7ef703d1.
> ---
>  include/asm-generic/bug.h |  3 ++-
>  kernel/panic.c            | 15 +++++++--------
>  2 files changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
> index 384b5c835ced..a4a311d4b4b0 100644
> --- a/include/asm-generic/bug.h
> +++ b/include/asm-generic/bug.h
> @@ -82,7 +82,8 @@ struct bug_entry {
>  extern __printf(4, 5)
>  void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
>  		       const char *fmt, ...);
> -#define __WARN()		__WARN_printf(TAINT_WARN, NULL)
> +extern void warn_slowpath_null(const char *file, const int line);
> +#define __WARN()		warn_slowpath_null(__FILE__, __LINE__)
>  #define __WARN_printf(taint, arg...)					\
>  	warn_slowpath_fmt(__FILE__, __LINE__, taint, arg)
>  #else
> diff --git a/kernel/panic.c b/kernel/panic.c
> index b69ee9e76cb2..c8ed8046b484 100644
> --- a/kernel/panic.c
> +++ b/kernel/panic.c
> @@ -603,20 +603,19 @@ void warn_slowpath_fmt(const char *file, int line, unsigned taint,
>  {
>  	struct warn_args args;
> -	pr_warn(CUT_HERE);
> -
> -	if (!fmt) {
> -		__warn(file, line, __builtin_return_address(0), taint,
> -		       NULL, NULL);
> -		return;
> -	}
> -
>  	args.fmt = fmt;
>  	va_start(args.args, fmt);
>  	__warn(file, line, __builtin_return_address(0), taint, NULL, &args);
>  	va_end(args.args);
>  }
>  EXPORT_SYMBOL(warn_slowpath_fmt);
> +
> +void warn_slowpath_null(const char *file, int line)
> +{
> +	pr_warn(CUT_HERE);
> +	__warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, NULL);
> +}
> +EXPORT_SYMBOL(warn_slowpath_null);
>  #else
>  void __warn_printk(const char *fmt, ...)
>  {
> -- 
> 2.26.2



-- 
Kees Cook

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Regression bisected to f2f84b05e02b (bug: consolidate warn_slowpath_fmt() usage)
  2020-06-02 18:03 ` Kees Cook
@ 2020-06-12  4:23   ` Matt Turner
  2020-06-12  4:47     ` Michael Cree
  0 siblings, 1 reply; 7+ messages in thread
From: Matt Turner @ 2020-06-12  4:23 UTC (permalink / raw)
  To: Kees Cook
  Cc: Linux-Arch, LKML, linux-alpha, Richard Henderson, Ivan Kokshaysky

On Tue, Jun 2, 2020 at 11:03 AM Kees Cook <keescook@chromium.org> wrote:
>
> On Mon, Jun 01, 2020 at 07:48:04PM -0700, Matt Turner wrote:
> > I bisected a regression on alpha to f2f84b05e02b (bug: consolidate
> > warn_slowpath_fmt() usage) which looks totally innocuous.
> >
> > Reverting it on master confirms that it somehow is the trigger. At or a
> > little after starting userspace, I'll see an oops like this:
> >
> > Unable to handle kernel paging request at virtual address 0000000000000000
> > CPU 0
> > kworker/u2:5(98): Oops -1
> > pc = [<0000000000000000>]  ra = [<0000000000000000>]  ps = 0000    Not tainted
> > pc is at 0x0
>
> ^^^^ so, the instruction pointer is NULL. The only way I can imagine
> that happening would be from this line:
>
>         worker->current_func(work);
>
> > ra is at 0x0
> > v0 = 0000000000000007  t0 = 0000000000000001  t1 = 0000000000000001
> > t2 = 0000000000000000  t3 = fffffc00bfe68780  t4 = 0000000000000001
> > t5 = fffffc00bf8cc780  t6 = 00000000026f8000  t7 = fffffc00bfe70000
> > s0 = fffffc000250d310  s1 = fffffc000250d310  s2 = fffffc000250d310
> > s3 = fffffc000250ca40  s4 = fffffc000250caa0  s5 = 0000000000000000
> > s6 = fffffc000250ca40
> > a0 = fffffc00024f0488  a1 = fffffc00bfe73d98  a2 = fffffc00bfe68800
> > a3 = fffffc00bf881400  a4 = 0001000000000000  a5 = 0000000000000002
> > t8 = 0000000000000000  t9 = 0000000000000000  t10= 0000000001321800
> > t11= 000000000000ba4e  pv = fffffc000189ca00  at = 0000000000000000
> > gp = fffffc000253e430  sp = 0000000043a83c2e
> > Disabling lock debugging due to kernel taint
> > Trace:
> > [<fffffc000105c8ac>] process_one_work+0x25c/0x5a0
>
> Can you verify where this     ^^^^^^^^^^^^^^   is?

It is kernel/workqueue.c:2268, which contains

        worker->current_func(work);

as you predicted.

> > [<fffffc000105cc4c>] worker_thread+0x5c/0x7d0
> > [<fffffc0001066c88>] kthread+0x188/0x1f0
> > [<fffffc0001011b48>] ret_from_kernel_thread+0x18/0x20
> > [<fffffc0001066b00>] kthread+0x0/0x1f0
> > [<fffffc000105cbf0>] worker_thread+0x0/0x7d0
> >
> > Code:
> >  00000000
> >  00000000
> >  00063301
> >  000012e2
> >  00001111
> >  0005ffde
> >
> > It seems to cause a hard lock on an SMP system, but not on a system with
> > a single CPU. Similarly, if I boot the SMP system (2 CPUs) with
> > maxcpus=1 the oops doesn't happen. Until I tested on a non-SMP system
> > today I suspected that it was unaffected, but I saw the oops there too.
> > With the revert applied, I don't see a warning or an oops.
> >
> > Any clues how this patch could have triggered the oops?
>
> I cannot begin to imagine. :P Compared to other things I've seen like
> this in the past maybe it's some kind of effect from the code size
> changing the location/alignment or timing of something else?
>
> Various questions ranging in degrees of sanity:
>
> Does alpha use work queues for WARN?

I do not know. I don't see much in a few greps of arch/alpha that
would indicate that it uses work queues.

> Which work queue is getting a NULL function? (And then things like "if
> WARN was much slower or much faster, is there a race to something
> setting itself to NULL?")
>
> Was there a WARN before the above Oops?

No, which I suspect means that your much scarier suggestion that this
is somehow due to code size or alignment is increasingly plausible.

> Does WARN have side-effects on alpha?

alpha just uses the asm-generic implementation of WARN as far as I can
tell, so I think not.

> Does __WARN_printf() do something bad that warn_slowpath_null() doesn't?
>
> Does making incremental changes narrow anything down? (e.g. instead of
> this revert, remove the __warn() call in warn_slowpath_fmt() that was
> added? (I mean, that'll be quite broken for WARN, but will it not oops?)

Commenting out the added __warn does not work around the problem.

Readding warn_slowpath_null and the EXPORT_SYMBOL (but not calling it
from WARN) does not work around the problem.

Calling warn_slowpath_fmt() with fmt=" " instead of fmt=NULL does not
work around the problem.

I also tried GCC-10.1 as a stab in the dark, and that doesn't work
around the problem.

So I'm thinking it's something about code size or alignment. I would
be worried it's to do with memory ordering (since this is on Alpha)
but I'm seeing the problem on a single CPU system, so that should be
ruled out, I think?

Using CONFIG_CC_OPTIMIZE_FOR_SIZE=y doesn't work around the problem.
So that hurts the theory of code size being the trigger.

Since I noticed earlier that using maxcpus=1 on a 2-CPU system
prevented the system from hanging, I tried disabling CONFIG_SMP on my
1-CPU system as well. In doing so, I discovered that the RCU torture
module (RCU_TORTURE_TEST) triggers some null pointer dereferences on
Alpha when CONFIG_SMP is set, but works successfully when CONFIG_SMP
is unset.

That seems likely to be a symptom of the same underlying problem that
started this thread, don't you think? If so, I'll focus my attention
on that.

> Does alpha have hardware breakpoints? When I had to track down a
> corruption in the io scheduler, I ended up setting breakpoints on the
> thing that went crazy (in this case, I assume the work queue function
> pointer) to figure out what touched it.

As far as I know we don't have anything implemented in the kernel, but
they could be implemented by faulting on read/write.

> ... I can't think of anything else.

Thanks for your time and suggestions!

Matt

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Regression bisected to f2f84b05e02b (bug: consolidate warn_slowpath_fmt() usage)
  2020-06-12  4:23   ` Matt Turner
@ 2020-06-12  4:47     ` Michael Cree
  2020-06-12  5:07       ` Kees Cook
  0 siblings, 1 reply; 7+ messages in thread
From: Michael Cree @ 2020-06-12  4:47 UTC (permalink / raw)
  To: Matt Turner
  Cc: Kees Cook, Linux-Arch, LKML, linux-alpha, Richard Henderson,
	Ivan Kokshaysky

On Thu, Jun 11, 2020 at 09:23:52PM -0700, Matt Turner wrote:
> Since I noticed earlier that using maxcpus=1 on a 2-CPU system
> prevented the system from hanging, I tried disabling CONFIG_SMP on my
> 1-CPU system as well. In doing so, I discovered that the RCU torture
> module (RCU_TORTURE_TEST) triggers some null pointer dereferences on
> Alpha when CONFIG_SMP is set, but works successfully when CONFIG_SMP
> is unset.
> 
> That seems likely to be a symptom of the same underlying problem that
> started this thread, don't you think? If so, I'll focus my attention
> on that.

I wonder if that is related to user space segfaults we are now seeing
on SMP systems but not UP systems while building Alpha debian-ports.
It's happening in the test-suites of builds of certain software
(such as autogen and guile) but they always build successfully with
the test suite passing on a UP system.

When investigating I seem to recall it was a NULL (or near NULL)
pointer dereference but couldn't make any sense of how it might
have got into such an obviously wrong state.

Cheers,
Michael.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Regression bisected to f2f84b05e02b (bug: consolidate warn_slowpath_fmt() usage)
  2020-06-12  4:47     ` Michael Cree
@ 2020-06-12  5:07       ` Kees Cook
  0 siblings, 0 replies; 7+ messages in thread
From: Kees Cook @ 2020-06-12  5:07 UTC (permalink / raw)
  To: Michael Cree, Matt Turner, Linux-Arch, LKML, linux-alpha,
	Richard Henderson, Ivan Kokshaysky

On Fri, Jun 12, 2020 at 04:47:57PM +1200, Michael Cree wrote:
> On Thu, Jun 11, 2020 at 09:23:52PM -0700, Matt Turner wrote:
> > Since I noticed earlier that using maxcpus=1 on a 2-CPU system
> > prevented the system from hanging, I tried disabling CONFIG_SMP on my
> > 1-CPU system as well. In doing so, I discovered that the RCU torture
> > module (RCU_TORTURE_TEST) triggers some null pointer dereferences on
> > Alpha when CONFIG_SMP is set, but works successfully when CONFIG_SMP
> > is unset.
> > 
> > That seems likely to be a symptom of the same underlying problem that
> > started this thread, don't you think? If so, I'll focus my attention
> > on that.
> 
> I wonder if that is related to user space segfaults we are now seeing
> on SMP systems but not UP systems while building Alpha debian-ports.
> It's happening in the test-suites of builds of certain software
> (such as autogen and guile) but they always build successfully with
> the test suite passing on a UP system.
> 
> When investigating I seem to recall it was a NULL (or near NULL)
> pointer dereference but couldn't make any sense of how it might
> have got into such an obviously wrong state.

By some miracle, I have avoided any experience with RCU bugs. ;) If
the RCU_TORTURE_TEST Oopses or the segfaults are repeatable and don't
go away with the WARN patch reverted, then perhaps it might be used to
bisect to something closer to the root cause?

Given the similarity to the SMP vs UP stuff and the RCU tests, I'd agree
that does seem like the best path to investigate.

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Regression bisected to f2f84b05e02b (bug: consolidate warn_slowpath_fmt() usage)
  2020-06-02  2:48 Regression bisected to f2f84b05e02b (bug: consolidate warn_slowpath_fmt() usage) Matt Turner
  2020-06-02 18:03 ` Kees Cook
@ 2024-05-21 18:46 ` John Paul Adrian Glaubitz
  2024-05-23 23:49   ` Kees Cook
  1 sibling, 1 reply; 7+ messages in thread
From: John Paul Adrian Glaubitz @ 2024-05-21 18:46 UTC (permalink / raw)
  To: mattst88; +Cc: keescook, linux-alpha, linux-arch, linux-kernel

Hi,

Replacing the calls to raw_smp_processor_id() in __warn() with just "0" fixes the problem for me:

diff --git a/kernel/panic.c b/kernel/panic.c
index 8bff183d6180..12f6cea6b8b0 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -671,11 +671,11 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 
        if (file)
                pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",
-                       raw_smp_processor_id(), current->pid, file, line,
+                       0, current->pid, file, line,
                        caller);
        else
                pr_warn("WARNING: CPU: %d PID: %d at %pS\n",
-                       raw_smp_processor_id(), current->pid, caller);
+                       0, current->pid, caller);
 
 #pragma GCC diagnostic push
 #ifndef __clang__

So, I assume the problem is that SMP support is not fully initialized at this
point yet such that raw_smp_processor_id() causes the zero pointer dereference.

Adrian

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer
`. `'   Physicist
  `-    GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: Regression bisected to f2f84b05e02b (bug: consolidate warn_slowpath_fmt() usage)
  2024-05-21 18:46 ` John Paul Adrian Glaubitz
@ 2024-05-23 23:49   ` Kees Cook
  0 siblings, 0 replies; 7+ messages in thread
From: Kees Cook @ 2024-05-23 23:49 UTC (permalink / raw)
  To: John Paul Adrian Glaubitz; +Cc: mattst88, linux-alpha, linux-arch, linux-kernel

On Tue, May 21, 2024 at 08:46:52PM +0200, John Paul Adrian Glaubitz wrote:
> Hi,
> 
> Replacing the calls to raw_smp_processor_id() in __warn() with just "0" fixes the problem for me:
> 
> diff --git a/kernel/panic.c b/kernel/panic.c
> index 8bff183d6180..12f6cea6b8b0 100644
> --- a/kernel/panic.c
> +++ b/kernel/panic.c
> @@ -671,11 +671,11 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
>  
>         if (file)
>                 pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",
> -                       raw_smp_processor_id(), current->pid, file, line,
> +                       0, current->pid, file, line,
>                         caller);
>         else
>                 pr_warn("WARNING: CPU: %d PID: %d at %pS\n",
> -                       raw_smp_processor_id(), current->pid, caller);
> +                       0, current->pid, caller);
>  
>  #pragma GCC diagnostic push
>  #ifndef __clang__
> 
> So, I assume the problem is that SMP support is not fully initialized at this
> point yet such that raw_smp_processor_id() causes the zero pointer dereference.

But how does the commit change that? It called __warn() before too.

Is this an inlining bug?

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-05-23 23:49 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-02  2:48 Regression bisected to f2f84b05e02b (bug: consolidate warn_slowpath_fmt() usage) Matt Turner
2020-06-02 18:03 ` Kees Cook
2020-06-12  4:23   ` Matt Turner
2020-06-12  4:47     ` Michael Cree
2020-06-12  5:07       ` Kees Cook
2024-05-21 18:46 ` John Paul Adrian Glaubitz
2024-05-23 23:49   ` Kees Cook

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).