All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU
@ 2020-06-25  5:34 Nicholas Piggin
  2020-06-25 21:42 ` Anton Blanchard
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Nicholas Piggin @ 2020-06-25  5:34 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Nicholas Piggin, Paul McKenney, Anton Blanchard, linux-kernel

On a 144 thread system, `perf ftrace` takes about 20 seconds to start
up, due to calling synchronize_rcu() for each CPU.

  cat /proc/108560/stack
    0xc0003e7eb336f470
    __switch_to+0x2e0/0x480
    __wait_rcu_gp+0x20c/0x220
    synchronize_rcu+0x9c/0xc0
    ring_buffer_reset_cpu+0x88/0x2e0
    tracing_reset_online_cpus+0x84/0xe0
    tracing_open+0x1d4/0x1f0

On a system with 10x more threads, it starts to become an annoyance.

Batch these up so we disable all the per-cpu buffers first, then
synchronize_rcu() once, then reset each of the buffers. This brings
the time down to about 0.5s.

Cc: Paul McKenney <paulmck@kernel.org>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 include/linux/ring_buffer.h |  1 +
 kernel/trace/ring_buffer.c  | 85 +++++++++++++++++++++++++++++++------
 kernel/trace/trace.c        |  4 +-
 3 files changed, 73 insertions(+), 17 deletions(-)

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index c76b2f3b3ac4..136ea0997e6d 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -143,6 +143,7 @@ bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter);
 unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu);
 
 void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu);
+void ring_buffer_reset_online_cpus(struct trace_buffer *buffer);
 void ring_buffer_reset(struct trace_buffer *buffer);
 
 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b8e1ca48be50..3f1fd02bd14a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -270,6 +270,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 #define for_each_buffer_cpu(buffer, cpu)		\
 	for_each_cpu(cpu, buffer->cpumask)
 
+#define for_each_online_buffer_cpu(buffer, cpu)		\
+	for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask)
+
 #define TS_SHIFT	27
 #define TS_MASK		((1ULL << TS_SHIFT) - 1)
 #define TS_DELTA_TEST	(~TS_MASK)
@@ -4484,6 +4487,26 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 	rb_head_page_activate(cpu_buffer);
 }
 
+/* Must have disabled the cpu buffer then done a synchronize_rcu */
+static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+	if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
+		goto out;
+
+	arch_spin_lock(&cpu_buffer->lock);
+
+	rb_reset_cpu(cpu_buffer);
+
+	arch_spin_unlock(&cpu_buffer->lock);
+
+ out:
+	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+}
+
 /**
  * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
  * @buffer: The ring buffer to reset a per cpu buffer of
@@ -4492,7 +4515,6 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
-	unsigned long flags;
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return;
@@ -4503,24 +4525,42 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
 	/* Make sure all commits have finished */
 	synchronize_rcu();
 
-	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+	reset_disabled_cpu_buffer(cpu_buffer);
 
-	if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
-		goto out;
+	atomic_dec(&cpu_buffer->record_disabled);
+	atomic_dec(&cpu_buffer->resize_disabled);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
 
-	arch_spin_lock(&cpu_buffer->lock);
+/**
+ * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
+ * @buffer: The ring buffer to reset a per cpu buffer of
+ * @cpu: The CPU buffer to be reset
+ */
+void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu;
 
-	rb_reset_cpu(cpu_buffer);
+	for_each_online_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
 
-	arch_spin_unlock(&cpu_buffer->lock);
+		atomic_inc(&cpu_buffer->resize_disabled);
+		atomic_inc(&cpu_buffer->record_disabled);
+	}
 
- out:
-	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+	/* Make sure all commits have finished */
+	synchronize_rcu();
 
-	atomic_dec(&cpu_buffer->record_disabled);
-	atomic_dec(&cpu_buffer->resize_disabled);
+	for_each_online_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+
+		reset_disabled_cpu_buffer(cpu_buffer);
+
+		atomic_dec(&cpu_buffer->record_disabled);
+		atomic_dec(&cpu_buffer->resize_disabled);
+	}
 }
-EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
 
 /**
  * ring_buffer_reset - reset a ring buffer
@@ -4528,10 +4568,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
  */
 void ring_buffer_reset(struct trace_buffer *buffer)
 {
+	struct ring_buffer_per_cpu *cpu_buffer;
 	int cpu;
 
-	for_each_buffer_cpu(buffer, cpu)
-		ring_buffer_reset_cpu(buffer, cpu);
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+
+		atomic_inc(&cpu_buffer->resize_disabled);
+		atomic_inc(&cpu_buffer->record_disabled);
+	}
+
+	/* Make sure all commits have finished */
+	synchronize_rcu();
+
+	for_each_buffer_cpu(buffer, cpu) {
+		cpu_buffer = buffer->buffers[cpu];
+
+		reset_disabled_cpu_buffer(cpu_buffer);
+
+		atomic_dec(&cpu_buffer->record_disabled);
+		atomic_dec(&cpu_buffer->resize_disabled);
+	}
 }
 EXPORT_SYMBOL_GPL(ring_buffer_reset);
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ec44b0e2a19c..9a26a1c875ae 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2003,7 +2003,6 @@ static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
 void tracing_reset_online_cpus(struct array_buffer *buf)
 {
 	struct trace_buffer *buffer = buf->buffer;
-	int cpu;
 
 	if (!buffer)
 		return;
@@ -2015,8 +2014,7 @@ void tracing_reset_online_cpus(struct array_buffer *buf)
 
 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
 
-	for_each_online_cpu(cpu)
-		ring_buffer_reset_cpu(buffer, cpu);
+	ring_buffer_reset_online_cpus(buffer);
 
 	ring_buffer_record_enable(buffer);
 }
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU
  2020-06-25  5:34 [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU Nicholas Piggin
@ 2020-06-25 21:42 ` Anton Blanchard
  2020-06-29 15:35 ` Paul E. McKenney
  2020-06-29 22:16 ` Steven Rostedt
  2 siblings, 0 replies; 6+ messages in thread
From: Anton Blanchard @ 2020-06-25 21:42 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: Steven Rostedt, Paul McKenney, linux-kernel

Hi Nick,

> On a 144 thread system, `perf ftrace` takes about 20 seconds to start
> up, due to calling synchronize_rcu() for each CPU.
> 
>   cat /proc/108560/stack
>     0xc0003e7eb336f470
>     __switch_to+0x2e0/0x480
>     __wait_rcu_gp+0x20c/0x220
>     synchronize_rcu+0x9c/0xc0
>     ring_buffer_reset_cpu+0x88/0x2e0
>     tracing_reset_online_cpus+0x84/0xe0
>     tracing_open+0x1d4/0x1f0
> 
> On a system with 10x more threads, it starts to become an annoyance.
> 
> Batch these up so we disable all the per-cpu buffers first, then
> synchronize_rcu() once, then reset each of the buffers. This brings
> the time down to about 0.5s.

It's gone from somewhere more than 10 minutes (I gave up waiting) to
3 seconds. Nice work!

Tested-by: Anton Blanchard <anton@ozlabs.org>

Thanks,
Anton

> Cc: Paul McKenney <paulmck@kernel.org>
> Cc: Anton Blanchard <anton@ozlabs.org>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  include/linux/ring_buffer.h |  1 +
>  kernel/trace/ring_buffer.c  | 85
> +++++++++++++++++++++++++++++++------ kernel/trace/trace.c        |
> 4 +- 3 files changed, 73 insertions(+), 17 deletions(-)
> 
> diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
> index c76b2f3b3ac4..136ea0997e6d 100644
> --- a/include/linux/ring_buffer.h
> +++ b/include/linux/ring_buffer.h
> @@ -143,6 +143,7 @@ bool ring_buffer_iter_dropped(struct
> ring_buffer_iter *iter); unsigned long ring_buffer_size(struct
> trace_buffer *buffer, int cpu); 
>  void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu);
> +void ring_buffer_reset_online_cpus(struct trace_buffer *buffer);
>  void ring_buffer_reset(struct trace_buffer *buffer);
>  
>  #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index b8e1ca48be50..3f1fd02bd14a 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -270,6 +270,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
>  #define for_each_buffer_cpu(buffer, cpu)		\
>  	for_each_cpu(cpu, buffer->cpumask)
>  
> +#define for_each_online_buffer_cpu(buffer, cpu)		\
> +	for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask)
> +
>  #define TS_SHIFT	27
>  #define TS_MASK		((1ULL << TS_SHIFT) - 1)
>  #define TS_DELTA_TEST	(~TS_MASK)
> @@ -4484,6 +4487,26 @@ rb_reset_cpu(struct ring_buffer_per_cpu
> *cpu_buffer) rb_head_page_activate(cpu_buffer);
>  }
>  
> +/* Must have disabled the cpu buffer then done a synchronize_rcu */
> +static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu
> *cpu_buffer) +{
> +	unsigned long flags;
> +
> +	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
> +
> +	if (RB_WARN_ON(cpu_buffer,
> local_read(&cpu_buffer->committing)))
> +		goto out;
> +
> +	arch_spin_lock(&cpu_buffer->lock);
> +
> +	rb_reset_cpu(cpu_buffer);
> +
> +	arch_spin_unlock(&cpu_buffer->lock);
> +
> + out:
> +	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
> +}
> +
>  /**
>   * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
>   * @buffer: The ring buffer to reset a per cpu buffer of
> @@ -4492,7 +4515,6 @@ rb_reset_cpu(struct ring_buffer_per_cpu
> *cpu_buffer) void ring_buffer_reset_cpu(struct trace_buffer *buffer,
> int cpu) {
>  	struct ring_buffer_per_cpu *cpu_buffer =
> buffer->buffers[cpu];
> -	unsigned long flags;
>  
>  	if (!cpumask_test_cpu(cpu, buffer->cpumask))
>  		return;
> @@ -4503,24 +4525,42 @@ void ring_buffer_reset_cpu(struct
> trace_buffer *buffer, int cpu) /* Make sure all commits have finished
> */ synchronize_rcu();
>  
> -	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
> +	reset_disabled_cpu_buffer(cpu_buffer);
>  
> -	if (RB_WARN_ON(cpu_buffer,
> local_read(&cpu_buffer->committing)))
> -		goto out;
> +	atomic_dec(&cpu_buffer->record_disabled);
> +	atomic_dec(&cpu_buffer->resize_disabled);
> +}
> +EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
>  
> -	arch_spin_lock(&cpu_buffer->lock);
> +/**
> + * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
> + * @buffer: The ring buffer to reset a per cpu buffer of
> + * @cpu: The CPU buffer to be reset
> + */
> +void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
> +{
> +	struct ring_buffer_per_cpu *cpu_buffer;
> +	int cpu;
>  
> -	rb_reset_cpu(cpu_buffer);
> +	for_each_online_buffer_cpu(buffer, cpu) {
> +		cpu_buffer = buffer->buffers[cpu];
>  
> -	arch_spin_unlock(&cpu_buffer->lock);
> +		atomic_inc(&cpu_buffer->resize_disabled);
> +		atomic_inc(&cpu_buffer->record_disabled);
> +	}
>  
> - out:
> -	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
> +	/* Make sure all commits have finished */
> +	synchronize_rcu();
>  
> -	atomic_dec(&cpu_buffer->record_disabled);
> -	atomic_dec(&cpu_buffer->resize_disabled);
> +	for_each_online_buffer_cpu(buffer, cpu) {
> +		cpu_buffer = buffer->buffers[cpu];
> +
> +		reset_disabled_cpu_buffer(cpu_buffer);
> +
> +		atomic_dec(&cpu_buffer->record_disabled);
> +		atomic_dec(&cpu_buffer->resize_disabled);
> +	}
>  }
> -EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
>  
>  /**
>   * ring_buffer_reset - reset a ring buffer
> @@ -4528,10 +4568,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
>   */
>  void ring_buffer_reset(struct trace_buffer *buffer)
>  {
> +	struct ring_buffer_per_cpu *cpu_buffer;
>  	int cpu;
>  
> -	for_each_buffer_cpu(buffer, cpu)
> -		ring_buffer_reset_cpu(buffer, cpu);
> +	for_each_buffer_cpu(buffer, cpu) {
> +		cpu_buffer = buffer->buffers[cpu];
> +
> +		atomic_inc(&cpu_buffer->resize_disabled);
> +		atomic_inc(&cpu_buffer->record_disabled);
> +	}
> +
> +	/* Make sure all commits have finished */
> +	synchronize_rcu();
> +
> +	for_each_buffer_cpu(buffer, cpu) {
> +		cpu_buffer = buffer->buffers[cpu];
> +
> +		reset_disabled_cpu_buffer(cpu_buffer);
> +
> +		atomic_dec(&cpu_buffer->record_disabled);
> +		atomic_dec(&cpu_buffer->resize_disabled);
> +	}
>  }
>  EXPORT_SYMBOL_GPL(ring_buffer_reset);
>  
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index ec44b0e2a19c..9a26a1c875ae 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -2003,7 +2003,6 @@ static void tracing_reset_cpu(struct
> array_buffer *buf, int cpu) void tracing_reset_online_cpus(struct
> array_buffer *buf) {
>  	struct trace_buffer *buffer = buf->buffer;
> -	int cpu;
>  
>  	if (!buffer)
>  		return;
> @@ -2015,8 +2014,7 @@ void tracing_reset_online_cpus(struct
> array_buffer *buf) 
>  	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
>  
> -	for_each_online_cpu(cpu)
> -		ring_buffer_reset_cpu(buffer, cpu);
> +	ring_buffer_reset_online_cpus(buffer);
>  
>  	ring_buffer_record_enable(buffer);
>  }


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU
  2020-06-25  5:34 [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU Nicholas Piggin
  2020-06-25 21:42 ` Anton Blanchard
@ 2020-06-29 15:35 ` Paul E. McKenney
  2020-06-29 15:40   ` Steven Rostedt
  2020-06-29 22:16 ` Steven Rostedt
  2 siblings, 1 reply; 6+ messages in thread
From: Paul E. McKenney @ 2020-06-29 15:35 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: Steven Rostedt, Anton Blanchard, linux-kernel

On Thu, Jun 25, 2020 at 03:34:03PM +1000, Nicholas Piggin wrote:
> On a 144 thread system, `perf ftrace` takes about 20 seconds to start
> up, due to calling synchronize_rcu() for each CPU.
> 
>   cat /proc/108560/stack
>     0xc0003e7eb336f470
>     __switch_to+0x2e0/0x480
>     __wait_rcu_gp+0x20c/0x220
>     synchronize_rcu+0x9c/0xc0
>     ring_buffer_reset_cpu+0x88/0x2e0
>     tracing_reset_online_cpus+0x84/0xe0
>     tracing_open+0x1d4/0x1f0
> 
> On a system with 10x more threads, it starts to become an annoyance.
> 
> Batch these up so we disable all the per-cpu buffers first, then
> synchronize_rcu() once, then reset each of the buffers. This brings
> the time down to about 0.5s.
> 
> Cc: Paul McKenney <paulmck@kernel.org>
> Cc: Anton Blanchard <anton@ozlabs.org>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Looks plausible from an RCU viewpoint:

Acked-by: Paul E. McKenney <paulmck@kernel.org>

> ---
>  include/linux/ring_buffer.h |  1 +
>  kernel/trace/ring_buffer.c  | 85 +++++++++++++++++++++++++++++++------
>  kernel/trace/trace.c        |  4 +-
>  3 files changed, 73 insertions(+), 17 deletions(-)
> 
> diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
> index c76b2f3b3ac4..136ea0997e6d 100644
> --- a/include/linux/ring_buffer.h
> +++ b/include/linux/ring_buffer.h
> @@ -143,6 +143,7 @@ bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter);
>  unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu);
>  
>  void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu);
> +void ring_buffer_reset_online_cpus(struct trace_buffer *buffer);
>  void ring_buffer_reset(struct trace_buffer *buffer);
>  
>  #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index b8e1ca48be50..3f1fd02bd14a 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -270,6 +270,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
>  #define for_each_buffer_cpu(buffer, cpu)		\
>  	for_each_cpu(cpu, buffer->cpumask)
>  
> +#define for_each_online_buffer_cpu(buffer, cpu)		\
> +	for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask)
> +
>  #define TS_SHIFT	27
>  #define TS_MASK		((1ULL << TS_SHIFT) - 1)
>  #define TS_DELTA_TEST	(~TS_MASK)
> @@ -4484,6 +4487,26 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
>  	rb_head_page_activate(cpu_buffer);
>  }
>  
> +/* Must have disabled the cpu buffer then done a synchronize_rcu */
> +static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
> +{
> +	unsigned long flags;
> +
> +	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
> +
> +	if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
> +		goto out;
> +
> +	arch_spin_lock(&cpu_buffer->lock);
> +
> +	rb_reset_cpu(cpu_buffer);
> +
> +	arch_spin_unlock(&cpu_buffer->lock);
> +
> + out:
> +	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
> +}
> +
>  /**
>   * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
>   * @buffer: The ring buffer to reset a per cpu buffer of
> @@ -4492,7 +4515,6 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
>  void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
>  {
>  	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
> -	unsigned long flags;
>  
>  	if (!cpumask_test_cpu(cpu, buffer->cpumask))
>  		return;
> @@ -4503,24 +4525,42 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
>  	/* Make sure all commits have finished */
>  	synchronize_rcu();
>  
> -	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
> +	reset_disabled_cpu_buffer(cpu_buffer);
>  
> -	if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
> -		goto out;
> +	atomic_dec(&cpu_buffer->record_disabled);
> +	atomic_dec(&cpu_buffer->resize_disabled);
> +}
> +EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
>  
> -	arch_spin_lock(&cpu_buffer->lock);
> +/**
> + * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
> + * @buffer: The ring buffer to reset a per cpu buffer of
> + * @cpu: The CPU buffer to be reset
> + */
> +void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
> +{
> +	struct ring_buffer_per_cpu *cpu_buffer;
> +	int cpu;
>  
> -	rb_reset_cpu(cpu_buffer);
> +	for_each_online_buffer_cpu(buffer, cpu) {
> +		cpu_buffer = buffer->buffers[cpu];
>  
> -	arch_spin_unlock(&cpu_buffer->lock);
> +		atomic_inc(&cpu_buffer->resize_disabled);
> +		atomic_inc(&cpu_buffer->record_disabled);
> +	}
>  
> - out:
> -	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
> +	/* Make sure all commits have finished */
> +	synchronize_rcu();
>  
> -	atomic_dec(&cpu_buffer->record_disabled);
> -	atomic_dec(&cpu_buffer->resize_disabled);
> +	for_each_online_buffer_cpu(buffer, cpu) {
> +		cpu_buffer = buffer->buffers[cpu];
> +
> +		reset_disabled_cpu_buffer(cpu_buffer);
> +
> +		atomic_dec(&cpu_buffer->record_disabled);
> +		atomic_dec(&cpu_buffer->resize_disabled);
> +	}
>  }
> -EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
>  
>  /**
>   * ring_buffer_reset - reset a ring buffer
> @@ -4528,10 +4568,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
>   */
>  void ring_buffer_reset(struct trace_buffer *buffer)
>  {
> +	struct ring_buffer_per_cpu *cpu_buffer;
>  	int cpu;
>  
> -	for_each_buffer_cpu(buffer, cpu)
> -		ring_buffer_reset_cpu(buffer, cpu);
> +	for_each_buffer_cpu(buffer, cpu) {
> +		cpu_buffer = buffer->buffers[cpu];
> +
> +		atomic_inc(&cpu_buffer->resize_disabled);
> +		atomic_inc(&cpu_buffer->record_disabled);
> +	}
> +
> +	/* Make sure all commits have finished */
> +	synchronize_rcu();
> +
> +	for_each_buffer_cpu(buffer, cpu) {
> +		cpu_buffer = buffer->buffers[cpu];
> +
> +		reset_disabled_cpu_buffer(cpu_buffer);
> +
> +		atomic_dec(&cpu_buffer->record_disabled);
> +		atomic_dec(&cpu_buffer->resize_disabled);
> +	}
>  }
>  EXPORT_SYMBOL_GPL(ring_buffer_reset);
>  
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index ec44b0e2a19c..9a26a1c875ae 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -2003,7 +2003,6 @@ static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
>  void tracing_reset_online_cpus(struct array_buffer *buf)
>  {
>  	struct trace_buffer *buffer = buf->buffer;
> -	int cpu;
>  
>  	if (!buffer)
>  		return;
> @@ -2015,8 +2014,7 @@ void tracing_reset_online_cpus(struct array_buffer *buf)
>  
>  	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
>  
> -	for_each_online_cpu(cpu)
> -		ring_buffer_reset_cpu(buffer, cpu);
> +	ring_buffer_reset_online_cpus(buffer);
>  
>  	ring_buffer_record_enable(buffer);
>  }
> -- 
> 2.23.0
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU
  2020-06-29 15:35 ` Paul E. McKenney
@ 2020-06-29 15:40   ` Steven Rostedt
  0 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2020-06-29 15:40 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: Nicholas Piggin, Anton Blanchard, linux-kernel

On Mon, 29 Jun 2020 08:35:11 -0700
"Paul E. McKenney" <paulmck@kernel.org> wrote:

> Looks plausible from an RCU viewpoint:
> 
> Acked-by: Paul E. McKenney <paulmck@kernel.org>

Thanks Nicholas, Anton and Paul,

I'll pull this in and start testing it.

-- Steve

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU
  2020-06-25  5:34 [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU Nicholas Piggin
  2020-06-25 21:42 ` Anton Blanchard
  2020-06-29 15:35 ` Paul E. McKenney
@ 2020-06-29 22:16 ` Steven Rostedt
  2020-06-30  2:07   ` Nicholas Piggin
  2 siblings, 1 reply; 6+ messages in thread
From: Steven Rostedt @ 2020-06-29 22:16 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: Paul McKenney, Anton Blanchard, linux-kernel

On Thu, 25 Jun 2020 15:34:03 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:

> Batch these up so we disable all the per-cpu buffers first, then
> synchronize_rcu() once, then reset each of the buffers. This brings
> the time down to about 0.5s.

After applying this patch, running tools/testing/selftests/ftracetest
went from 5 minutes and 35 seconds to 5 minutes 5 seconds to complete
on my 4 core (8 with hyperthreading) machine! That's almost a 10% drop!

Thanks, I'm definitely applying this for the next merge window.

-- Steve

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU
  2020-06-29 22:16 ` Steven Rostedt
@ 2020-06-30  2:07   ` Nicholas Piggin
  0 siblings, 0 replies; 6+ messages in thread
From: Nicholas Piggin @ 2020-06-30  2:07 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Anton Blanchard, linux-kernel, Paul McKenney

Excerpts from Steven Rostedt's message of June 30, 2020 8:16 am:
> On Thu, 25 Jun 2020 15:34:03 +1000
> Nicholas Piggin <npiggin@gmail.com> wrote:
> 
>> Batch these up so we disable all the per-cpu buffers first, then
>> synchronize_rcu() once, then reset each of the buffers. This brings
>> the time down to about 0.5s.
> 
> After applying this patch, running tools/testing/selftests/ftracetest
> went from 5 minutes and 35 seconds to 5 minutes 5 seconds to complete
> on my 4 core (8 with hyperthreading) machine! That's almost a 10% drop!
> 
> Thanks, I'm definitely applying this for the next merge window.

Cool, always good when a big system optimisation helps small ones as 
well. Thanks all.

Thanks,
Nick


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-06-30  2:07 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-25  5:34 [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU Nicholas Piggin
2020-06-25 21:42 ` Anton Blanchard
2020-06-29 15:35 ` Paul E. McKenney
2020-06-29 15:40   ` Steven Rostedt
2020-06-29 22:16 ` Steven Rostedt
2020-06-30  2:07   ` Nicholas Piggin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.