linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 0/2] genirq, proc: Speedup /proc/stat interrupt statistics
@ 2019-01-30 12:31 Thomas Gleixner
  2019-01-30 12:31 ` [patch 1/2] genriq: Avoid summation loops for /proc/stat Thomas Gleixner
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Thomas Gleixner @ 2019-01-30 12:31 UTC (permalink / raw)
  To: LKML
  Cc: Waiman Long, Matthew Wilcox, Andrew Morton, Alexey Dobriyan,
	Kees Cook, linux-fsdevel, Davidlohr Bueso, Miklos Szeredi,
	Daniel Colascione, Dave Chinner, Randy Dunlap, Marc Zyngier

Waiman reported that on large systems with a large amount of interrupts the
readout of /proc/stat takes a long time to sum up the interrupt
statistics. In principle this is not a problem. but for unknown reasons
some enterprise quality software reads /proc/stat with a high frequency.

The reason for this is that interrupt statistics are accounted per cpu. So
the /proc/stat logic has to sum up the interrupt stats for each interrupt.

The following series addresses this by making the interrupt statitics code
in the core generate the sum directly and by making the loop in the
/proc/stat read function smarter.

Thanks,

	tglx

8<----------------
 fs/proc/stat.c          |   28 +++++++++++++++++++++++++---
 include/linux/irqdesc.h |    3 ++-
 kernel/irq/chip.c       |   12 ++++++++++--
 kernel/irq/internals.h  |    8 +++++++-
 kernel/irq/irqdesc.c    |    7 ++++++-
 5 files changed, 50 insertions(+), 8 deletions(-)




^ permalink raw reply	[flat|nested] 12+ messages in thread

* [patch 1/2] genriq: Avoid summation loops for /proc/stat
  2019-01-30 12:31 [patch 0/2] genirq, proc: Speedup /proc/stat interrupt statistics Thomas Gleixner
@ 2019-01-30 12:31 ` Thomas Gleixner
  2019-01-30 16:00   ` Waiman Long
  2019-01-30 12:31 ` [patch 2/2] proc/stat: Make the interrupt statistics more efficient Thomas Gleixner
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: Thomas Gleixner @ 2019-01-30 12:31 UTC (permalink / raw)
  To: LKML
  Cc: Waiman Long, Matthew Wilcox, Andrew Morton, Alexey Dobriyan,
	Kees Cook, linux-fsdevel, Davidlohr Bueso, Miklos Szeredi,
	Daniel Colascione, Dave Chinner, Randy Dunlap, Marc Zyngier

Waiman reported that on large systems with a large amount of interrupts the
readout of /proc/stat takes a long time to sum up the interrupt
statistics. In principle this is not a problem. but for unknown reasons
some enterprise quality software reads /proc/stat with a high frequency.

The reason for this is that interrupt statistics are accounted per cpu. So
the /proc/stat logic has to sum up the interrupt stats for each interrupt.

This can be largely avoided for interrupts which are not marked as
'PER_CPU' interrupts by simply adding a per interrupt summation counter
which is incremented along with the per interrupt per cpu counter.

The PER_CPU interrupts need to avoid that and use only per cpu accounting
because they share the interrupt number and the interrupt descriptor and
concurrent updates would conflict or require unwanted synchronization.

Reported-by: Waiman Long <longman@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

8<-------------

 include/linux/irqdesc.h |    3 ++-
 kernel/irq/chip.c       |   12 ++++++++++--
 kernel/irq/internals.h  |    8 +++++++-
 kernel/irq/irqdesc.c    |    7 ++++++-
 4 files changed, 25 insertions(+), 5 deletions(-)


--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -65,9 +65,10 @@ struct irq_desc {
 	unsigned int		core_internal_state__do_not_mess_with_it;
 	unsigned int		depth;		/* nested irq disables */
 	unsigned int		wake_depth;	/* nested wake enables */
+	unsigned int		tot_count;
 	unsigned int		irq_count;	/* For detecting broken IRQs */
-	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
 	unsigned int		irqs_unhandled;
+	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
 	atomic_t		threads_handled;
 	int			threads_handled_last;
 	raw_spinlock_t		lock;
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -855,7 +855,11 @@ void handle_percpu_irq(struct irq_desc *
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 
-	kstat_incr_irqs_this_cpu(desc);
+	/*
+	 * PER CPU interrupts are not serialized. Do not touch
+	 * desc->tot_count.
+	 */
+	__kstat_incr_irqs_this_cpu(desc);
 
 	if (chip->irq_ack)
 		chip->irq_ack(&desc->irq_data);
@@ -884,7 +888,11 @@ void handle_percpu_devid_irq(struct irq_
 	unsigned int irq = irq_desc_get_irq(desc);
 	irqreturn_t res;
 
-	kstat_incr_irqs_this_cpu(desc);
+	/*
+	 * PER CPU interrupts are not serialized. Do not touch
+	 * desc->tot_count.
+	 */
+	__kstat_incr_irqs_this_cpu(desc);
 
 	if (chip->irq_ack)
 		chip->irq_ack(&desc->irq_data);
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -242,12 +242,18 @@ static inline void irq_state_set_masked(
 
 #undef __irqd_to_state
 
-static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
+static inline void __kstat_incr_irqs_this_cpu(struct irq_desc *desc)
 {
 	__this_cpu_inc(*desc->kstat_irqs);
 	__this_cpu_inc(kstat.irqs_sum);
 }
 
+static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
+{
+	__kstat_incr_irqs_this_cpu(desc);
+	desc->tot_count++;
+}
+
 static inline int irq_desc_get_node(struct irq_desc *desc)
 {
 	return irq_common_data_get_node(&desc->irq_common_data);
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -119,6 +119,7 @@ static void desc_set_defaults(unsigned i
 	desc->depth = 1;
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
+	desc->tot_count = 0;
 	desc->name = NULL;
 	desc->owner = owner;
 	for_each_possible_cpu(cpu)
@@ -919,11 +920,15 @@ unsigned int kstat_irqs_cpu(unsigned int
 unsigned int kstat_irqs(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-	int cpu;
 	unsigned int sum = 0;
+	int cpu;
 
 	if (!desc || !desc->kstat_irqs)
 		return 0;
+	if (!irq_settings_is_per_cpu_devid(desc) &&
+	    !irq_settings_is_per_cpu(desc))
+	    return desc->tot_count;
+
 	for_each_possible_cpu(cpu)
 		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
 	return sum;



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [patch 2/2] proc/stat: Make the interrupt statistics more efficient
  2019-01-30 12:31 [patch 0/2] genirq, proc: Speedup /proc/stat interrupt statistics Thomas Gleixner
  2019-01-30 12:31 ` [patch 1/2] genriq: Avoid summation loops for /proc/stat Thomas Gleixner
@ 2019-01-30 12:31 ` Thomas Gleixner
  2019-01-30 21:24   ` Alexey Dobriyan
  2019-01-30 15:46 ` [patch 0/2] genirq, proc: Speedup /proc/stat interrupt statistics Waiman Long
  2019-01-30 21:58 ` Andrew Morton
  3 siblings, 1 reply; 12+ messages in thread
From: Thomas Gleixner @ 2019-01-30 12:31 UTC (permalink / raw)
  To: LKML
  Cc: Waiman Long, Matthew Wilcox, Andrew Morton, Alexey Dobriyan,
	Kees Cook, linux-fsdevel, Davidlohr Bueso, Miklos Szeredi,
	Daniel Colascione, Dave Chinner, Randy Dunlap, Marc Zyngier

Waiman reported that on large systems with a large amount of interrupts the
readout of /proc/stat takes a long time to sum up the interrupt
statistics. In principle this is not a problem. but for unknown reasons
some enterprise quality software reads /proc/stat with a high frequency.

The reason for this is that interrupt statistics are accounted per cpu. So
the /proc/stat logic has to sum up the interrupt stats for each interrupt.

The interrupt core provides now a per interrupt summary counter which can
be used to avoid the summation loops completely except for interrupts
marked PER_CPU which are only a small fraction of the interrupt space if at
all.

Another simplification is to iterate only over the active interrupts and
skip the potentially large gaps in the interrupt number space and just
print zeros for the gaps without going into the interrupt core in the first
place.

Reported-by: Waiman Long <longman@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/proc/stat.c |   28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -79,6 +79,30 @@ static u64 get_iowait_time(int cpu)
 
 #endif
 
+static void show_irq_gap(struct seq_file *p, int gap)
+{
+	static const char zeros[] = " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0";
+
+	while (gap > 0) {
+		int inc = min_t(int, gap, ARRAY_SIZE(zeros) / 2);
+
+		seq_write(p, zeros, 2 * inc);
+		gap -= inc;
+	}
+}
+
+static void show_all_irqs(struct seq_file *p)
+{
+	int i, next = 0;
+
+	for_each_active_irq(i) {
+		show_irq_gap(p, i - next);
+		seq_put_decimal_ull(p, " ", kstat_irqs_usr(i));
+		next = i + 1;
+	}
+	show_irq_gap(p, nr_irqs - next);
+}
+
 static int show_stat(struct seq_file *p, void *v)
 {
 	int i, j;
@@ -156,9 +180,7 @@ static int show_stat(struct seq_file *p,
 	}
 	seq_put_decimal_ull(p, "intr ", (unsigned long long)sum);
 
-	/* sum again ? it could be updated? */
-	for_each_irq_nr(j)
-		seq_put_decimal_ull(p, " ", kstat_irqs_usr(j));
+	show_all_irqs(p);
 
 	seq_printf(p,
 		"\nctxt %llu\n"



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [patch 0/2] genirq, proc: Speedup /proc/stat interrupt statistics
  2019-01-30 12:31 [patch 0/2] genirq, proc: Speedup /proc/stat interrupt statistics Thomas Gleixner
  2019-01-30 12:31 ` [patch 1/2] genriq: Avoid summation loops for /proc/stat Thomas Gleixner
  2019-01-30 12:31 ` [patch 2/2] proc/stat: Make the interrupt statistics more efficient Thomas Gleixner
@ 2019-01-30 15:46 ` Waiman Long
  2019-01-30 21:58 ` Andrew Morton
  3 siblings, 0 replies; 12+ messages in thread
From: Waiman Long @ 2019-01-30 15:46 UTC (permalink / raw)
  To: Thomas Gleixner, LKML
  Cc: Matthew Wilcox, Andrew Morton, Alexey Dobriyan, Kees Cook,
	linux-fsdevel, Davidlohr Bueso, Miklos Szeredi,
	Daniel Colascione, Dave Chinner, Randy Dunlap, Marc Zyngier

On 01/30/2019 07:31 AM, Thomas Gleixner wrote:
> Waiman reported that on large systems with a large amount of interrupts the
> readout of /proc/stat takes a long time to sum up the interrupt
> statistics. In principle this is not a problem. but for unknown reasons
> some enterprise quality software reads /proc/stat with a high frequency.
>
> The reason for this is that interrupt statistics are accounted per cpu. So
> the /proc/stat logic has to sum up the interrupt stats for each interrupt.
>
> The following series addresses this by making the interrupt statitics code
> in the core generate the sum directly and by making the loop in the
> /proc/stat read function smarter.
>
> Thanks,
>
> 	tglx
>
> 8<----------------
>  fs/proc/stat.c          |   28 +++++++++++++++++++++++++---
>  include/linux/irqdesc.h |    3 ++-
>  kernel/irq/chip.c       |   12 ++++++++++--
>  kernel/irq/internals.h  |    8 +++++++-
>  kernel/irq/irqdesc.c    |    7 ++++++-
>  5 files changed, 50 insertions(+), 8 deletions(-)
>
>
>
For the whole series,

Acked-by: Waiman Long <longman@redhat.com>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [patch 1/2] genriq: Avoid summation loops for /proc/stat
  2019-01-30 12:31 ` [patch 1/2] genriq: Avoid summation loops for /proc/stat Thomas Gleixner
@ 2019-01-30 16:00   ` Waiman Long
  2019-01-30 17:58     ` Thomas Gleixner
  0 siblings, 1 reply; 12+ messages in thread
From: Waiman Long @ 2019-01-30 16:00 UTC (permalink / raw)
  To: Thomas Gleixner, LKML
  Cc: Matthew Wilcox, Andrew Morton, Alexey Dobriyan, Kees Cook,
	linux-fsdevel, Davidlohr Bueso, Miklos Szeredi,
	Daniel Colascione, Dave Chinner, Randy Dunlap, Marc Zyngier

On 01/30/2019 07:31 AM, Thomas Gleixner wrote:
> Waiman reported that on large systems with a large amount of interrupts the
> readout of /proc/stat takes a long time to sum up the interrupt
> statistics. In principle this is not a problem. but for unknown reasons
> some enterprise quality software reads /proc/stat with a high frequency.
>
> The reason for this is that interrupt statistics are accounted per cpu. So
> the /proc/stat logic has to sum up the interrupt stats for each interrupt.
>
> This can be largely avoided for interrupts which are not marked as
> 'PER_CPU' interrupts by simply adding a per interrupt summation counter
> which is incremented along with the per interrupt per cpu counter.
>
> The PER_CPU interrupts need to avoid that and use only per cpu accounting
> because they share the interrupt number and the interrupt descriptor and
> concurrent updates would conflict or require unwanted synchronization.
>
> Reported-by: Waiman Long <longman@redhat.com>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
>
> 8<-------------
>
>  include/linux/irqdesc.h |    3 ++-
>  kernel/irq/chip.c       |   12 ++++++++++--
>  kernel/irq/internals.h  |    8 +++++++-
>  kernel/irq/irqdesc.c    |    7 ++++++-
>  4 files changed, 25 insertions(+), 5 deletions(-)
>
>
> --- a/include/linux/irqdesc.h
> +++ b/include/linux/irqdesc.h
> @@ -65,9 +65,10 @@ struct irq_desc {
>  	unsigned int		core_internal_state__do_not_mess_with_it;
>  	unsigned int		depth;		/* nested irq disables */
>  	unsigned int		wake_depth;	/* nested wake enables */
> +	unsigned int		tot_count;
>  	unsigned int		irq_count;	/* For detecting broken IRQs */
> -	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
>  	unsigned int		irqs_unhandled;
> +	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
>  	atomic_t		threads_handled;
>  	int			threads_handled_last;
>  	raw_spinlock_t		lock;

Just one minor nit. Why you want to move the last_unhandled down one
slot? There were 5 int's before. Adding one more will just fill the
padding hole. Moving down the last_unhandled will probably leave 4-byte
holes in both above and below it assuming that raw_spinlock_t is 4 bytes.

Cheers,
Longman

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [patch 1/2] genriq: Avoid summation loops for /proc/stat
  2019-01-30 16:00   ` Waiman Long
@ 2019-01-30 17:58     ` Thomas Gleixner
  0 siblings, 0 replies; 12+ messages in thread
From: Thomas Gleixner @ 2019-01-30 17:58 UTC (permalink / raw)
  To: Waiman Long
  Cc: LKML, Matthew Wilcox, Andrew Morton, Alexey Dobriyan, Kees Cook,
	linux-fsdevel, Davidlohr Bueso, Miklos Szeredi,
	Daniel Colascione, Dave Chinner, Randy Dunlap, Marc Zyngier

On Wed, 30 Jan 2019, Waiman Long wrote:
> On 01/30/2019 07:31 AM, Thomas Gleixner wrote:
> > --- a/include/linux/irqdesc.h
> > +++ b/include/linux/irqdesc.h
> > @@ -65,9 +65,10 @@ struct irq_desc {
> >  	unsigned int		core_internal_state__do_not_mess_with_it;
> >  	unsigned int		depth;		/* nested irq disables */
> >  	unsigned int		wake_depth;	/* nested wake enables */
> > +	unsigned int		tot_count;
> >  	unsigned int		irq_count;	/* For detecting broken IRQs */
> > -	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
> >  	unsigned int		irqs_unhandled;
> > +	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
> >  	atomic_t		threads_handled;
> >  	int			threads_handled_last;
> >  	raw_spinlock_t		lock;
> 
> Just one minor nit. Why you want to move the last_unhandled down one
> slot? There were 5 int's before. Adding one more will just fill the
> padding hole. Moving down the last_unhandled will probably leave 4-byte
> holes in both above and below it assuming that raw_spinlock_t is 4 bytes.

Unintentional wreckage. Will undo. Thanks for spotting it.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [patch 2/2] proc/stat: Make the interrupt statistics more efficient
  2019-01-30 12:31 ` [patch 2/2] proc/stat: Make the interrupt statistics more efficient Thomas Gleixner
@ 2019-01-30 21:24   ` Alexey Dobriyan
  2019-01-30 21:27     ` Thomas Gleixner
  0 siblings, 1 reply; 12+ messages in thread
From: Alexey Dobriyan @ 2019-01-30 21:24 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, Waiman Long, Matthew Wilcox, Andrew Morton, Kees Cook,
	linux-fsdevel, Davidlohr Bueso, Miklos Szeredi,
	Daniel Colascione, Dave Chinner, Randy Dunlap, Marc Zyngier

On Wed, Jan 30, 2019 at 01:31:32PM +0100, Thomas Gleixner wrote:
> +static void show_irq_gap(struct seq_file *p, int gap)
> +{
> +	static const char zeros[] = " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0";
> +
> +	while (gap > 0) {
> +		int inc = min_t(int, gap, ARRAY_SIZE(zeros) / 2);
> +
> +		seq_write(p, zeros, 2 * inc);
> +		gap -= inc;
> +	}
> +}
> +
> +static void show_all_irqs(struct seq_file *p)
> +{
> +	int i, next = 0;
> +
> +	for_each_active_irq(i) {
> +		show_irq_gap(p, i - next);
> +		seq_put_decimal_ull(p, " ", kstat_irqs_usr(i));
> +		next = i + 1;
> +	}
> +	show_irq_gap(p, nr_irqs - next);
> +}

Every signed int can and should be unsigned int in this patch.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [patch 2/2] proc/stat: Make the interrupt statistics more efficient
  2019-01-30 21:24   ` Alexey Dobriyan
@ 2019-01-30 21:27     ` Thomas Gleixner
  0 siblings, 0 replies; 12+ messages in thread
From: Thomas Gleixner @ 2019-01-30 21:27 UTC (permalink / raw)
  To: Alexey Dobriyan
  Cc: LKML, Waiman Long, Matthew Wilcox, Andrew Morton, Kees Cook,
	linux-fsdevel, Davidlohr Bueso, Miklos Szeredi,
	Daniel Colascione, Dave Chinner, Randy Dunlap, Marc Zyngier

On Thu, 31 Jan 2019, Alexey Dobriyan wrote:

> On Wed, Jan 30, 2019 at 01:31:32PM +0100, Thomas Gleixner wrote:
> > +static void show_irq_gap(struct seq_file *p, int gap)
> > +{
> > +	static const char zeros[] = " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0";
> > +
> > +	while (gap > 0) {
> > +		int inc = min_t(int, gap, ARRAY_SIZE(zeros) / 2);
> > +
> > +		seq_write(p, zeros, 2 * inc);
> > +		gap -= inc;
> > +	}
> > +}
> > +
> > +static void show_all_irqs(struct seq_file *p)
> > +{
> > +	int i, next = 0;
> > +
> > +	for_each_active_irq(i) {
> > +		show_irq_gap(p, i - next);
> > +		seq_put_decimal_ull(p, " ", kstat_irqs_usr(i));
> > +		next = i + 1;
> > +	}
> > +	show_irq_gap(p, nr_irqs - next);
> > +}
> 
> Every signed int can and should be unsigned int in this patch.
> 

Indeed.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [patch 0/2] genirq, proc: Speedup /proc/stat interrupt statistics
  2019-01-30 12:31 [patch 0/2] genirq, proc: Speedup /proc/stat interrupt statistics Thomas Gleixner
                   ` (2 preceding siblings ...)
  2019-01-30 15:46 ` [patch 0/2] genirq, proc: Speedup /proc/stat interrupt statistics Waiman Long
@ 2019-01-30 21:58 ` Andrew Morton
  2019-01-30 22:00   ` Thomas Gleixner
  3 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2019-01-30 21:58 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, Waiman Long, Matthew Wilcox, Alexey Dobriyan, Kees Cook,
	linux-fsdevel, Davidlohr Bueso, Miklos Szeredi,
	Daniel Colascione, Dave Chinner, Randy Dunlap, Marc Zyngier

On Wed, 30 Jan 2019 13:31:30 +0100 Thomas Gleixner <tglx@linutronix.de> wrote:

> Waiman reported that on large systems with a large amount of interrupts the
> readout of /proc/stat takes a long time to sum up the interrupt
> statistics. In principle this is not a problem. but for unknown reasons
> some enterprise quality software reads /proc/stat with a high frequency.
> 
> The reason for this is that interrupt statistics are accounted per cpu. So
> the /proc/stat logic has to sum up the interrupt stats for each interrupt.
> 
> The following series addresses this by making the interrupt statitics code
> in the core generate the sum directly and by making the loop in the
> /proc/stat read function smarter.
> 

Has the speedup been quantified?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [patch 0/2] genirq, proc: Speedup /proc/stat interrupt statistics
  2019-01-30 21:58 ` Andrew Morton
@ 2019-01-30 22:00   ` Thomas Gleixner
  2019-01-30 22:09     ` Waiman Long
  2019-01-31  3:46     ` Waiman Long
  0 siblings, 2 replies; 12+ messages in thread
From: Thomas Gleixner @ 2019-01-30 22:00 UTC (permalink / raw)
  To: Andrew Morton
  Cc: LKML, Waiman Long, Matthew Wilcox, Alexey Dobriyan, Kees Cook,
	linux-fsdevel, Davidlohr Bueso, Miklos Szeredi,
	Daniel Colascione, Dave Chinner, Randy Dunlap, Marc Zyngier

On Wed, 30 Jan 2019, Andrew Morton wrote:
> On Wed, 30 Jan 2019 13:31:30 +0100 Thomas Gleixner <tglx@linutronix.de> wrote:
> 
> > Waiman reported that on large systems with a large amount of interrupts the
> > readout of /proc/stat takes a long time to sum up the interrupt
> > statistics. In principle this is not a problem. but for unknown reasons
> > some enterprise quality software reads /proc/stat with a high frequency.
> > 
> > The reason for this is that interrupt statistics are accounted per cpu. So
> > the /proc/stat logic has to sum up the interrupt stats for each interrupt.
> > 
> > The following series addresses this by making the interrupt statitics code
> > in the core generate the sum directly and by making the loop in the
> > /proc/stat read function smarter.
> > 
> 
> Has the speedup been quantified?

Waiman should be able to provide numbers



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [patch 0/2] genirq, proc: Speedup /proc/stat interrupt statistics
  2019-01-30 22:00   ` Thomas Gleixner
@ 2019-01-30 22:09     ` Waiman Long
  2019-01-31  3:46     ` Waiman Long
  1 sibling, 0 replies; 12+ messages in thread
From: Waiman Long @ 2019-01-30 22:09 UTC (permalink / raw)
  To: Thomas Gleixner, Andrew Morton
  Cc: LKML, Matthew Wilcox, Alexey Dobriyan, Kees Cook, linux-fsdevel,
	Davidlohr Bueso, Miklos Szeredi, Daniel Colascione, Dave Chinner,
	Randy Dunlap, Marc Zyngier

On 01/30/2019 05:00 PM, Thomas Gleixner wrote:
> On Wed, 30 Jan 2019, Andrew Morton wrote:
>> On Wed, 30 Jan 2019 13:31:30 +0100 Thomas Gleixner <tglx@linutronix.de> wrote:
>>
>>> Waiman reported that on large systems with a large amount of interrupts the
>>> readout of /proc/stat takes a long time to sum up the interrupt
>>> statistics. In principle this is not a problem. but for unknown reasons
>>> some enterprise quality software reads /proc/stat with a high frequency.
>>>
>>> The reason for this is that interrupt statistics are accounted per cpu. So
>>> the /proc/stat logic has to sum up the interrupt stats for each interrupt.
>>>
>>> The following series addresses this by making the interrupt statitics code
>>> in the core generate the sum directly and by making the loop in the
>>> /proc/stat read function smarter.
>>>
>> Has the speedup been quantified?
> Waiman should be able to provide numbers
>
>
Will run some performance test on the patch and give you guys the result
sometime tomorrow.

Cheers,
Longman


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [patch 0/2] genirq, proc: Speedup /proc/stat interrupt statistics
  2019-01-30 22:00   ` Thomas Gleixner
  2019-01-30 22:09     ` Waiman Long
@ 2019-01-31  3:46     ` Waiman Long
  1 sibling, 0 replies; 12+ messages in thread
From: Waiman Long @ 2019-01-31  3:46 UTC (permalink / raw)
  To: Thomas Gleixner, Andrew Morton
  Cc: LKML, Matthew Wilcox, Alexey Dobriyan, Kees Cook, linux-fsdevel,
	Davidlohr Bueso, Miklos Szeredi, Daniel Colascione, Dave Chinner,
	Randy Dunlap, Marc Zyngier

On 01/30/2019 05:00 PM, Thomas Gleixner wrote:
> On Wed, 30 Jan 2019, Andrew Morton wrote:
>> On Wed, 30 Jan 2019 13:31:30 +0100 Thomas Gleixner <tglx@linutronix.de> wrote:
>>
>>> Waiman reported that on large systems with a large amount of interrupts the
>>> readout of /proc/stat takes a long time to sum up the interrupt
>>> statistics. In principle this is not a problem. but for unknown reasons
>>> some enterprise quality software reads /proc/stat with a high frequency.
>>>
>>> The reason for this is that interrupt statistics are accounted per cpu. So
>>> the /proc/stat logic has to sum up the interrupt stats for each interrupt.
>>>
>>> The following series addresses this by making the interrupt statitics code
>>> in the core generate the sum directly and by making the loop in the
>>> /proc/stat read function smarter.
>>>
>> Has the speedup been quantified?
> Waiman should be able to provide numbers
>
>
On a 4-socket IvyBridge-EX system (60-core 120-thread) and 3016 irqs, I
ran a test program that read /proc/stat 50,000 time. Before the patch,
the elapsed time was 18.436s (sys 18.380s). After the patch, it was
3.769s (sys 3.742s). It was an almost 80% reduction in execution time.
It was better than I expected. I like that.

Cheers,
Longman



^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-01-31  3:46 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-30 12:31 [patch 0/2] genirq, proc: Speedup /proc/stat interrupt statistics Thomas Gleixner
2019-01-30 12:31 ` [patch 1/2] genriq: Avoid summation loops for /proc/stat Thomas Gleixner
2019-01-30 16:00   ` Waiman Long
2019-01-30 17:58     ` Thomas Gleixner
2019-01-30 12:31 ` [patch 2/2] proc/stat: Make the interrupt statistics more efficient Thomas Gleixner
2019-01-30 21:24   ` Alexey Dobriyan
2019-01-30 21:27     ` Thomas Gleixner
2019-01-30 15:46 ` [patch 0/2] genirq, proc: Speedup /proc/stat interrupt statistics Waiman Long
2019-01-30 21:58 ` Andrew Morton
2019-01-30 22:00   ` Thomas Gleixner
2019-01-30 22:09     ` Waiman Long
2019-01-31  3:46     ` Waiman Long

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).