All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86/nmi: remove the irqwork from long duration nmi handler
@ 2020-01-01  7:20 Changbin Du
  2020-01-07 14:41 ` Changbin Du
  2020-01-09 20:55 ` Thomas Gleixner
  0 siblings, 2 replies; 7+ messages in thread
From: Changbin Du @ 2020-01-01  7:20 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Ingo Molnar, Borislav Petkov, hpa, x86, Greg Kroah-Hartman,
	linux-kernel, Changbin Du

First, printk is NMI context safe now since the safe printk has been
implemented. The safe printk already has an irqwork to make NMI context
safe.

Second, the NMI irqwork actually does not work if a NMI handler causes
panic by watchdog timeout. This NMI irqwork have no chance to run in such
case, while the safe printk will flush its per-cpu buffer before panic.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
---
 arch/x86/include/asm/nmi.h |  1 -
 arch/x86/kernel/nmi.c      | 20 +++++++++-----------
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 75ded1d13d98..9d5d949e662e 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -41,7 +41,6 @@ struct nmiaction {
 	struct list_head	list;
 	nmi_handler_t		handler;
 	u64			max_duration;
-	struct irq_work		irq_work;
 	unsigned long		flags;
 	const char		*name;
 };
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index e676a9916c49..aa15d4f2340f 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -104,18 +104,22 @@ static int __init nmi_warning_debugfs(void)
 }
 fs_initcall(nmi_warning_debugfs);
 
-static void nmi_max_handler(struct irq_work *w)
+static void nmi_check_duration(struct nmiaction *action, u64 duration)
 {
-	struct nmiaction *a = container_of(w, struct nmiaction, irq_work);
 	int remainder_ns, decimal_msecs;
-	u64 whole_msecs = READ_ONCE(a->max_duration);
+	u64 whole_msecs = READ_ONCE(action->max_duration);
+
+	if (duration < nmi_longest_ns || duration < action->max_duration)
+		return;
+
+	action->max_duration = duration;
 
 	remainder_ns = do_div(whole_msecs, (1000 * 1000));
 	decimal_msecs = remainder_ns / 1000;
 
 	printk_ratelimited(KERN_INFO
 		"INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
-		a->handler, whole_msecs, decimal_msecs);
+		action->handler, whole_msecs, decimal_msecs);
 }
 
 static int nmi_handle(unsigned int type, struct pt_regs *regs)
@@ -142,11 +146,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs)
 		delta = sched_clock() - delta;
 		trace_nmi_handler(a->handler, (int)delta, thishandled);
 
-		if (delta < nmi_longest_ns || delta < a->max_duration)
-			continue;
-
-		a->max_duration = delta;
-		irq_work_queue(&a->irq_work);
+		nmi_check_duration(a, delta);
 	}
 
 	rcu_read_unlock();
@@ -164,8 +164,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
 	if (!action->handler)
 		return -EINVAL;
 
-	init_irq_work(&action->irq_work, nmi_max_handler);
-
 	raw_spin_lock_irqsave(&desc->lock, flags);
 
 	/*
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/nmi: remove the irqwork from long duration nmi handler
  2020-01-01  7:20 [PATCH] x86/nmi: remove the irqwork from long duration nmi handler Changbin Du
@ 2020-01-07 14:41 ` Changbin Du
  2020-01-09 20:55 ` Thomas Gleixner
  1 sibling, 0 replies; 7+ messages in thread
From: Changbin Du @ 2020-01-07 14:41 UTC (permalink / raw)
  To: Changbin Du
  Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, hpa, x86,
	Greg Kroah-Hartman, linux-kernel

Hi, Thomas,
Have you checked this one? I think this even can consider as a fix.

On Wed, Jan 01, 2020 at 03:20:17PM +0800, Changbin Du wrote:
> First, printk is NMI context safe now since the safe printk has been
> implemented. The safe printk already has an irqwork to make NMI context
> safe.
> 
> Second, the NMI irqwork actually does not work if a NMI handler causes
> panic by watchdog timeout. This NMI irqwork have no chance to run in such
> case, while the safe printk will flush its per-cpu buffer before panic.
> 
> Signed-off-by: Changbin Du <changbin.du@gmail.com>
> ---
>  arch/x86/include/asm/nmi.h |  1 -
>  arch/x86/kernel/nmi.c      | 20 +++++++++-----------
>  2 files changed, 9 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
> index 75ded1d13d98..9d5d949e662e 100644
> --- a/arch/x86/include/asm/nmi.h
> +++ b/arch/x86/include/asm/nmi.h
> @@ -41,7 +41,6 @@ struct nmiaction {
>  	struct list_head	list;
>  	nmi_handler_t		handler;
>  	u64			max_duration;
> -	struct irq_work		irq_work;
>  	unsigned long		flags;
>  	const char		*name;
>  };
> diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
> index e676a9916c49..aa15d4f2340f 100644
> --- a/arch/x86/kernel/nmi.c
> +++ b/arch/x86/kernel/nmi.c
> @@ -104,18 +104,22 @@ static int __init nmi_warning_debugfs(void)
>  }
>  fs_initcall(nmi_warning_debugfs);
>  
> -static void nmi_max_handler(struct irq_work *w)
> +static void nmi_check_duration(struct nmiaction *action, u64 duration)
>  {
> -	struct nmiaction *a = container_of(w, struct nmiaction, irq_work);
>  	int remainder_ns, decimal_msecs;
> -	u64 whole_msecs = READ_ONCE(a->max_duration);
> +	u64 whole_msecs = READ_ONCE(action->max_duration);
> +
> +	if (duration < nmi_longest_ns || duration < action->max_duration)
> +		return;
> +
> +	action->max_duration = duration;
>  
>  	remainder_ns = do_div(whole_msecs, (1000 * 1000));
>  	decimal_msecs = remainder_ns / 1000;
>  
>  	printk_ratelimited(KERN_INFO
>  		"INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
> -		a->handler, whole_msecs, decimal_msecs);
> +		action->handler, whole_msecs, decimal_msecs);
>  }
>  
>  static int nmi_handle(unsigned int type, struct pt_regs *regs)
> @@ -142,11 +146,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs)
>  		delta = sched_clock() - delta;
>  		trace_nmi_handler(a->handler, (int)delta, thishandled);
>  
> -		if (delta < nmi_longest_ns || delta < a->max_duration)
> -			continue;
> -
> -		a->max_duration = delta;
> -		irq_work_queue(&a->irq_work);
> +		nmi_check_duration(a, delta);
>  	}
>  
>  	rcu_read_unlock();
> @@ -164,8 +164,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
>  	if (!action->handler)
>  		return -EINVAL;
>  
> -	init_irq_work(&action->irq_work, nmi_max_handler);
> -
>  	raw_spin_lock_irqsave(&desc->lock, flags);
>  
>  	/*
> -- 
> 2.24.0
> 

-- 
Cheers,
Changbin Du

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/nmi: remove the irqwork from long duration nmi handler
  2020-01-01  7:20 [PATCH] x86/nmi: remove the irqwork from long duration nmi handler Changbin Du
  2020-01-07 14:41 ` Changbin Du
@ 2020-01-09 20:55 ` Thomas Gleixner
  2020-01-09 21:02   ` Borislav Petkov
  1 sibling, 1 reply; 7+ messages in thread
From: Thomas Gleixner @ 2020-01-09 20:55 UTC (permalink / raw)
  To: Changbin Du
  Cc: Ingo Molnar, Borislav Petkov, hpa, x86, Greg Kroah-Hartman,
	linux-kernel, Changbin Du

Changbin Du <changbin.du@gmail.com> writes:

> First, printk is NMI context safe now since the safe printk has been
> implemented. The safe printk already has an irqwork to make NMI context
> safe.
>
> Second, the NMI irqwork actually does not work if a NMI handler causes
> panic by watchdog timeout. This NMI irqwork have no chance to run in such
> case, while the safe printk will flush its per-cpu buffer before panic.
>
> Signed-off-by: Changbin Du <changbin.du@gmail.com>

Looks about right.

Acked-by: Thomas Gleixner <tglx@linutronix.de>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/nmi: remove the irqwork from long duration nmi handler
  2020-01-09 20:55 ` Thomas Gleixner
@ 2020-01-09 21:02   ` Borislav Petkov
       [not found]     ` <20200110140549.xqjhrdpxllkvqbuk@mail.google.com>
  0 siblings, 1 reply; 7+ messages in thread
From: Borislav Petkov @ 2020-01-09 21:02 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Changbin Du, Ingo Molnar, hpa, x86, Greg Kroah-Hartman, linux-kernel

On Thu, Jan 09, 2020 at 09:55:51PM +0100, Thomas Gleixner wrote:
> Changbin Du <changbin.du@gmail.com> writes:
> 
> > First, printk is NMI context safe now since the safe printk has been
> > implemented. The safe printk already has an irqwork to make NMI context
> > safe.
> >
> > Second, the NMI irqwork actually does not work if a NMI handler causes
> > panic by watchdog timeout. This NMI irqwork have no chance to run in such
> > case, while the safe printk will flush its per-cpu buffer before panic.
> >
> > Signed-off-by: Changbin Du <changbin.du@gmail.com>
> 
> Looks about right.
> 
> Acked-by: Thomas Gleixner <tglx@linutronix.de>

I'm wondering why is this thing being moved:

-             if (delta < nmi_longest_ns || delta < a->max_duration)
-                     continue;

into nmi_check_duration() and not remaining where it is?

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/nmi: remove the irqwork from long duration nmi handler
       [not found]       ` <20200110151329.GF19453@zn.tnic>
@ 2020-01-10 17:34         ` Changbin Du
  2020-01-10 19:58           ` Borislav Petkov
  0 siblings, 1 reply; 7+ messages in thread
From: Changbin Du @ 2020-01-10 17:34 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Thomas Gleixner, Ingo Molnar, hpa, x86, Greg Kroah-Hartman, linux-kernel

On Fri, Jan 10, 2020 at 04:13:29PM +0100, Borislav Petkov wrote:
> On Fri, Jan 10, 2020 at 10:05:49PM +0800, Changbin Du wrote:
> > I added a new function nmi_check_duration(), so shoudn't this check be
> > done in that function?
> 
> Why should it be done in that function? Your patch is removing irq_work
> - why is it doing additional changes?
> 
Just to move all the check code together and be a standalone function.
yes, this somewhat does code refining after the irqwork is removed but
I think it is normal.

> > Don't worry about performance, this function will be inlined by
> > compiler.
> 
> I'm not worried about that at all.
> 
> Btw, why are you sending private mail and not keeping the discussion on
> the mailing list?
> 
oops, typed wrong key. Just added back.

> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

-- 
Cheers,
Changbin Du

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/nmi: remove the irqwork from long duration nmi handler
  2020-01-10 17:34         ` Changbin Du
@ 2020-01-10 19:58           ` Borislav Petkov
  2020-01-11  0:17             ` Changbin Du
  0 siblings, 1 reply; 7+ messages in thread
From: Borislav Petkov @ 2020-01-10 19:58 UTC (permalink / raw)
  To: Changbin Du
  Cc: Thomas Gleixner, Ingo Molnar, hpa, x86, Greg Kroah-Hartman, linux-kernel

On Fri, Jan 10, 2020 at 05:34:50PM +0000, Changbin Du wrote:
> Just to move all the check code together and be a standalone function.
> yes, this somewhat does code refining after the irqwork is removed but
> I think it is normal.

But it makes review harder because your patch is removing irq_work,
*nothing* in the commit message is talking about *why* you're doing
that additional change. I'd imagine at the end of the commit message
something like:

"While at it, repurpose the IRQ work callback into a function which
concentrates the NMI duration checking."

This lets a reader know know why that additional change is done instead
of going back'n'forth and having to ask you why you're doing this.

Ok?

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] x86/nmi: remove the irqwork from long duration nmi handler
  2020-01-10 19:58           ` Borislav Petkov
@ 2020-01-11  0:17             ` Changbin Du
  0 siblings, 0 replies; 7+ messages in thread
From: Changbin Du @ 2020-01-11  0:17 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Changbin Du, Thomas Gleixner, Ingo Molnar, hpa, x86,
	Greg Kroah-Hartman, linux-kernel

On Fri, Jan 10, 2020 at 08:58:37PM +0100, Borislav Petkov wrote:
> On Fri, Jan 10, 2020 at 05:34:50PM +0000, Changbin Du wrote:
> > Just to move all the check code together and be a standalone function.
> > yes, this somewhat does code refining after the irqwork is removed but
> > I think it is normal.
> 
> But it makes review harder because your patch is removing irq_work,
> *nothing* in the commit message is talking about *why* you're doing
> that additional change. I'd imagine at the end of the commit message
> something like:
> 
> "While at it, repurpose the IRQ work callback into a function which
> concentrates the NMI duration checking."
> 
> This lets a reader know know why that additional change is done instead
> of going back'n'forth and having to ask you why you're doing this.
> 
> Ok?
> 
sure, and thanks for your suggestion. I will send v2 later.

> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

-- 
Cheers,
Changbin Du

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-01-11  0:17 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-01  7:20 [PATCH] x86/nmi: remove the irqwork from long duration nmi handler Changbin Du
2020-01-07 14:41 ` Changbin Du
2020-01-09 20:55 ` Thomas Gleixner
2020-01-09 21:02   ` Borislav Petkov
     [not found]     ` <20200110140549.xqjhrdpxllkvqbuk@mail.google.com>
     [not found]       ` <20200110151329.GF19453@zn.tnic>
2020-01-10 17:34         ` Changbin Du
2020-01-10 19:58           ` Borislav Petkov
2020-01-11  0:17             ` Changbin Du

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.