All of lore.kernel.org
 help / color / mirror / Atom feed
From: Waiman Long <longman@redhat.com>
To: Thomas Gleixner <tglx@linutronix.de>,
	Matthew Wilcox <willy@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Alexey Dobriyan <adobriyan@gmail.com>,
	Kees Cook <keescook@chromium.org>,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	Davidlohr Bueso <dave@stgolabs.net>,
	Miklos Szeredi <miklos@szeredi.hu>,
	Daniel Colascione <dancol@google.com>,
	Dave Chinner <david@fromorbit.com>,
	Randy Dunlap <rdunlap@infradead.org>
Subject: Re: [PATCH v3 4/4] /proc/stat: Call kstat_irqs_usr() only for active IRQs
Date: Mon, 14 Jan 2019 14:04:18 -0500	[thread overview]
Message-ID: <3b863edf-1a3e-1436-93e7-717550c7256f@redhat.com> (raw)
In-Reply-To: <alpine.DEB.2.21.1901112146180.1661@nanos.tec.linutronix.de>

On 01/11/2019 04:02 PM, Thomas Gleixner wrote:
> On Fri, 11 Jan 2019, Matthew Wilcox wrote:
>> On Fri, Jan 11, 2019 at 08:19:33PM +0100, Thomas Gleixner wrote:
>>> On Fri, 11 Jan 2019, Thomas Gleixner wrote:
>>>> --- a/kernel/irq/internals.h
>>>> +++ b/kernel/irq/internals.h
>>>> @@ -246,6 +246,7 @@ static inline void kstat_incr_irqs_this_
>>>>  {
>>>>  	__this_cpu_inc(*desc->kstat_irqs);
>>>>  	__this_cpu_inc(kstat.irqs_sum);
>>>> +	desc->tot_count++;
>>> There is one issue here. True percpu interrupts, like the timer interrupts
>>> on ARM(64), will access that in parallel. But that's not rocket science to
>>> fix.
>> I was wondering about that from an efficiency point of view.  Since
>> interrupts are generally targetted to a single CPU, there's no cacheline
>> bouncing to speak of, except for interrupts like TLB shootdown on x86.
>> It might make sense for the percpu interrupts to still sum them at read
>> time, and not sum them at interrupt time.
> Yes, for regular interrupts the stats are properly serialized and the patch
> works just fine. For true percpu interrupts we just can avoid the update of
> tot_count and sum them up as before. The special vectors on x86 are
> accounted separately anyway and not part of the problem here. That
> show_all_irqs() code only deals with device interrupts which are backed by
> irq descriptors. TLB/IPI/.... are different beasts and handled low level in
> the architecture code. Though ARM and some others have these interrupts as
> regular Linux interrupt numbers. That's why we need to care.
>
> Updated untested patch below.
>
> Thanks,
>
> 	tglx
>
> 8<-------------
>
>  fs/proc/stat.c          |   28 +++++++++++++++++++++++++---
>  include/linux/irqdesc.h |    3 ++-
>  kernel/irq/chip.c       |   12 ++++++++++--
>  kernel/irq/internals.h  |    8 +++++++-
>  kernel/irq/irqdesc.c    |    7 ++++++-
>  5 files changed, 50 insertions(+), 8 deletions(-)
>
> --- a/fs/proc/stat.c
> +++ b/fs/proc/stat.c
> @@ -79,6 +79,30 @@ static u64 get_iowait_time(int cpu)
>  
>  #endif
>  
> +static void show_irq_gap(struct seq_file *p, int gap)
> +{
> +	static const char zeros[] = " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0";
> +
> +	while (gap > 0) {
> +		int inc = min_t(int, gap, ARRAY_SIZE(zeros) / 2);
> +
> +		seq_write(p, zeros, 2 * inc);
> +		gap -= inc;
> +	}
> +}
> +
> +static void show_all_irqs(struct seq_file *p)
> +{
> +	int i, next = 0;
> +
> +	for_each_active_irq(i) {
> +		show_irq_gap(p, i - next);
> +		seq_put_decimal_ull(p, " ", kstat_irqs_usr(i));
> +		next = i + 1;
> +	}
> +	show_irq_gap(p, nr_irqs - next);
> +}
> +
>  static int show_stat(struct seq_file *p, void *v)
>  {
>  	int i, j;
> @@ -156,9 +180,7 @@ static int show_stat(struct seq_file *p,
>  	}
>  	seq_put_decimal_ull(p, "intr ", (unsigned long long)sum);
>  
> -	/* sum again ? it could be updated? */
> -	for_each_irq_nr(j)
> -		seq_put_decimal_ull(p, " ", kstat_irqs_usr(j));
> +	show_all_irqs(p);
>  
>  	seq_printf(p,
>  		"\nctxt %llu\n"
> --- a/include/linux/irqdesc.h
> +++ b/include/linux/irqdesc.h
> @@ -65,9 +65,10 @@ struct irq_desc {
>  	unsigned int		core_internal_state__do_not_mess_with_it;
>  	unsigned int		depth;		/* nested irq disables */
>  	unsigned int		wake_depth;	/* nested wake enables */
> +	unsigned int		tot_count;
>  	unsigned int		irq_count;	/* For detecting broken IRQs */
> -	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
>  	unsigned int		irqs_unhandled;
> +	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
>  	atomic_t		threads_handled;
>  	int			threads_handled_last;
>  	raw_spinlock_t		lock;
> --- a/kernel/irq/chip.c
> +++ b/kernel/irq/chip.c
> @@ -855,7 +855,11 @@ void handle_percpu_irq(struct irq_desc *
>  {
>  	struct irq_chip *chip = irq_desc_get_chip(desc);
>  
> -	kstat_incr_irqs_this_cpu(desc);
> +	/*
> +	 * PER CPU interrupts are not serialized. Do not touch
> +	 * desc->tot_count.
> +	 */
> +	__kstat_incr_irqs_this_cpu(desc);
>  
>  	if (chip->irq_ack)
>  		chip->irq_ack(&desc->irq_data);
> @@ -884,7 +888,11 @@ void handle_percpu_devid_irq(struct irq_
>  	unsigned int irq = irq_desc_get_irq(desc);
>  	irqreturn_t res;
>  
> -	kstat_incr_irqs_this_cpu(desc);
> +	/*
> +	 * PER CPU interrupts are not serialized. Do not touch
> +	 * desc->tot_count.
> +	 */
> +	__kstat_incr_irqs_this_cpu(desc);
>  
>  	if (chip->irq_ack)
>  		chip->irq_ack(&desc->irq_data);
> --- a/kernel/irq/internals.h
> +++ b/kernel/irq/internals.h
> @@ -242,12 +242,18 @@ static inline void irq_state_set_masked(
>  
>  #undef __irqd_to_state
>  
> -static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
> +static inline void __kstat_incr_irqs_this_cpu(struct irq_desc *desc)
>  {
>  	__this_cpu_inc(*desc->kstat_irqs);
>  	__this_cpu_inc(kstat.irqs_sum);
>  }
>  
> +static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
> +{
> +	__kstat_incr_irqs_this_cpu(desc);
> +	desc->tot_count++;
> +}
> +
>  static inline int irq_desc_get_node(struct irq_desc *desc)
>  {
>  	return irq_common_data_get_node(&desc->irq_common_data);
> --- a/kernel/irq/irqdesc.c
> +++ b/kernel/irq/irqdesc.c
> @@ -119,6 +119,7 @@ static void desc_set_defaults(unsigned i
>  	desc->depth = 1;
>  	desc->irq_count = 0;
>  	desc->irqs_unhandled = 0;
> +	desc->tot_count = 0;
>  	desc->name = NULL;
>  	desc->owner = owner;
>  	for_each_possible_cpu(cpu)
> @@ -919,11 +920,15 @@ unsigned int kstat_irqs_cpu(unsigned int
>  unsigned int kstat_irqs(unsigned int irq)
>  {
>  	struct irq_desc *desc = irq_to_desc(irq);
> -	int cpu;
>  	unsigned int sum = 0;
> +	int cpu;
>  
>  	if (!desc || !desc->kstat_irqs)
>  		return 0;
> +	if (!irq_settings_is_per_cpu_devid(desc) &&
> +	    !irq_settings_is_per_cpu(desc))
> +	    return desc->tot_count;
> +
>  	for_each_possible_cpu(cpu)
>  		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
>  	return sum;
>
>
That looks good to me. Thanks for providing a more simple solution.

BTW, if the percpu IRQ is known at allocation time, maybe we should just
not allocate a percpu count for the corresponding descriptor.

Cheers,
Longman



  reply	other threads:[~2019-01-14 19:04 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-09 19:20 [PATCH v3 0/4] /proc/stat: Reduce irqs counting performance overhead Waiman Long
2019-01-09 19:20 ` [PATCH v3 1/4] /proc/stat: Extract irqs counting code into show_stat_irqs() Waiman Long
2019-01-09 19:20 ` [PATCH v3 2/4] /proc/stat: Only do percpu sum of active IRQs Waiman Long
2019-01-09 19:20 ` [PATCH v3 3/4] genirq: Track the number " Waiman Long
2019-01-09 19:20 ` [PATCH v3 4/4] /proc/stat: Call kstat_irqs_usr() only for " Waiman Long
2019-01-11 17:23   ` Thomas Gleixner
2019-01-11 19:19     ` Thomas Gleixner
2019-01-11 19:23       ` Matthew Wilcox
2019-01-11 21:02         ` Thomas Gleixner
2019-01-14 19:04           ` Waiman Long [this message]
2019-01-15  9:24             ` Thomas Gleixner
2019-01-15 15:52               ` Waiman Long

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3b863edf-1a3e-1436-93e7-717550c7256f@redhat.com \
    --to=longman@redhat.com \
    --cc=adobriyan@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=dancol@google.com \
    --cc=dave@stgolabs.net \
    --cc=david@fromorbit.com \
    --cc=keescook@chromium.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=rdunlap@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.