[for-next][PATCH 0/2] tracing: Use a temp buffer when filtering events

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [for-next][PATCH 0/2] tracing: Use a temp buffer when filtering events
@ 2016-05-04 13:52 Steven Rostedt
  2016-05-04 13:52 ` [for-next][PATCH 1/2] tracing: Remove TRACE_EVENT_FL_USE_CALL_FILTER logic Steven Rostedt
  2016-05-04 13:52 ` [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events Steven Rostedt
  0 siblings, 2 replies; 12+ messages in thread
From: Steven Rostedt @ 2016-05-04 13:52 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Andrew Morton

  git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git
for-next

Head SHA1: 0fc1b09ff1ff404ddf753f5ffa5cd0adc8fdcdc9


Steven Rostedt (Red Hat) (2):
      tracing: Remove TRACE_EVENT_FL_USE_CALL_FILTER logic
      tracing: Use temp buffer when filtering events

----
 include/linux/trace_events.h       |   3 -
 kernel/trace/trace.c               | 154 +++++++++++++++++++++++++++++++++++--
 kernel/trace/trace.h               |  19 ++++-
 kernel/trace/trace_events.c        |  10 +++
 kernel/trace/trace_events_filter.c |  77 +++++--------------
 5 files changed, 193 insertions(+), 70 deletions(-)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [for-next][PATCH 1/2] tracing: Remove TRACE_EVENT_FL_USE_CALL_FILTER logic
  2016-05-04 13:52 [for-next][PATCH 0/2] tracing: Use a temp buffer when filtering events Steven Rostedt
@ 2016-05-04 13:52 ` Steven Rostedt
  2016-05-04 13:52 ` [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events Steven Rostedt
  1 sibling, 0 replies; 12+ messages in thread
From: Steven Rostedt @ 2016-05-04 13:52 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Andrew Morton

[-- Attachment #1: 0001-tracing-Remove-TRACE_EVENT_FL_USE_CALL_FILTER-logic.patch --]
[-- Type: text/plain, Size: 5882 bytes --]

From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>

Nothing sets TRACE_EVENT_FL_USE_CALL_FILTER anymore. Remove it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h       |  3 --
 kernel/trace/trace_events_filter.c | 71 ++++++--------------------------------
 2 files changed, 10 insertions(+), 64 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 3111a1efdad6..ba6456302e34 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -214,7 +214,6 @@ enum {
 	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
 	TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
 	TRACE_EVENT_FL_WAS_ENABLED_BIT,
-	TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
 	TRACE_EVENT_FL_TRACEPOINT_BIT,
 	TRACE_EVENT_FL_KPROBE_BIT,
 	TRACE_EVENT_FL_UPROBE_BIT,
@@ -229,7 +228,6 @@ enum {
  *  WAS_ENABLED   - Set and stays set when an event was ever enabled
  *                    (used for module unloading, if a module event is enabled,
  *                     it is best to clear the buffers that used it).
- *  USE_CALL_FILTER - For trace internal events, don't use file filter
  *  TRACEPOINT    - Event is a tracepoint
  *  KPROBE        - Event is a kprobe
  *  UPROBE        - Event is a uprobe
@@ -240,7 +238,6 @@ enum {
 	TRACE_EVENT_FL_NO_SET_FILTER	= (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
 	TRACE_EVENT_FL_IGNORE_ENABLE	= (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
 	TRACE_EVENT_FL_WAS_ENABLED	= (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
-	TRACE_EVENT_FL_USE_CALL_FILTER	= (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
 	TRACE_EVENT_FL_TRACEPOINT	= (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
 	TRACE_EVENT_FL_KPROBE		= (1 << TRACE_EVENT_FL_KPROBE_BIT),
 	TRACE_EVENT_FL_UPROBE		= (1 << TRACE_EVENT_FL_UPROBE_BIT),
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index b3f5051cd4e9..d1d27bf37a19 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -689,10 +689,7 @@ static void append_filter_err(struct filter_parse_state *ps,
 
 static inline struct event_filter *event_filter(struct trace_event_file *file)
 {
-	if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
-		return file->event_call->filter;
-	else
-		return file->filter;
+	return file->filter;
 }
 
 /* caller must hold event_mutex */
@@ -826,12 +823,7 @@ static void __free_preds(struct event_filter *filter)
 
 static void filter_disable(struct trace_event_file *file)
 {
-	struct trace_event_call *call = file->event_call;
-
-	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
-		call->flags &= ~TRACE_EVENT_FL_FILTERED;
-	else
-		file->flags &= ~EVENT_FILE_FL_FILTERED;
+	file->flags &= ~EVENT_FILE_FL_FILTERED;
 }
 
 static void __free_filter(struct event_filter *filter)
@@ -883,13 +875,8 @@ static int __alloc_preds(struct event_filter *filter, int n_preds)
 
 static inline void __remove_filter(struct trace_event_file *file)
 {
-	struct trace_event_call *call = file->event_call;
-
 	filter_disable(file);
-	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
-		remove_filter_string(call->filter);
-	else
-		remove_filter_string(file->filter);
+	remove_filter_string(file->filter);
 }
 
 static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir,
@@ -906,15 +893,8 @@ static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir,
 
 static inline void __free_subsystem_filter(struct trace_event_file *file)
 {
-	struct trace_event_call *call = file->event_call;
-
-	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) {
-		__free_filter(call->filter);
-		call->filter = NULL;
-	} else {
-		__free_filter(file->filter);
-		file->filter = NULL;
-	}
+	__free_filter(file->filter);
+	file->filter = NULL;
 }
 
 static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
@@ -1718,69 +1698,38 @@ fail:
 
 static inline void event_set_filtered_flag(struct trace_event_file *file)
 {
-	struct trace_event_call *call = file->event_call;
-
-	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
-		call->flags |= TRACE_EVENT_FL_FILTERED;
-	else
-		file->flags |= EVENT_FILE_FL_FILTERED;
+	file->flags |= EVENT_FILE_FL_FILTERED;
 }
 
 static inline void event_set_filter(struct trace_event_file *file,
 				    struct event_filter *filter)
 {
-	struct trace_event_call *call = file->event_call;
-
-	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
-		rcu_assign_pointer(call->filter, filter);
-	else
-		rcu_assign_pointer(file->filter, filter);
+	rcu_assign_pointer(file->filter, filter);
 }
 
 static inline void event_clear_filter(struct trace_event_file *file)
 {
-	struct trace_event_call *call = file->event_call;
-
-	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
-		RCU_INIT_POINTER(call->filter, NULL);
-	else
-		RCU_INIT_POINTER(file->filter, NULL);
+	RCU_INIT_POINTER(file->filter, NULL);
 }
 
 static inline void
 event_set_no_set_filter_flag(struct trace_event_file *file)
 {
-	struct trace_event_call *call = file->event_call;
-
-	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
-		call->flags |= TRACE_EVENT_FL_NO_SET_FILTER;
-	else
-		file->flags |= EVENT_FILE_FL_NO_SET_FILTER;
+	file->flags |= EVENT_FILE_FL_NO_SET_FILTER;
 }
 
 static inline void
 event_clear_no_set_filter_flag(struct trace_event_file *file)
 {
-	struct trace_event_call *call = file->event_call;
-
-	if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
-		call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER;
-	else
-		file->flags &= ~EVENT_FILE_FL_NO_SET_FILTER;
+	file->flags &= ~EVENT_FILE_FL_NO_SET_FILTER;
 }
 
 static inline bool
 event_no_set_filter_flag(struct trace_event_file *file)
 {
-	struct trace_event_call *call = file->event_call;
-
 	if (file->flags & EVENT_FILE_FL_NO_SET_FILTER)
 		return true;
 
-	if ((call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) &&
-	    (call->flags & TRACE_EVENT_FL_NO_SET_FILTER))
-		return true;
-
 	return false;
 }
 
-- 
2.8.0.rc3

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events
  2016-05-04 13:52 [for-next][PATCH 0/2] tracing: Use a temp buffer when filtering events Steven Rostedt
  2016-05-04 13:52 ` [for-next][PATCH 1/2] tracing: Remove TRACE_EVENT_FL_USE_CALL_FILTER logic Steven Rostedt
@ 2016-05-04 13:52 ` Steven Rostedt
  2016-05-05 15:20   ` Alexei Starovoitov
  2020-08-28  7:53   ` Wen Gong
  1 sibling, 2 replies; 12+ messages in thread
From: Steven Rostedt @ 2016-05-04 13:52 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Andrew Morton, Alexei Starovoitov

[-- Attachment #1: 0002-tracing-Use-temp-buffer-when-filtering-events.patch --]
[-- Type: text/plain, Size: 12085 bytes --]

From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>

Filtering of events requires the data to be written to the ring buffer
before it can be decided to filter or not. This is because the parameters of
the filter are based on the result that is written to the ring buffer and
not on the parameters that are passed into the trace functions.

The ftrace ring buffer is optimized for writing into the ring buffer and
committing. The discard procedure used when filtering decides the event
should be discarded is much more heavy weight. Thus, using a temporary
filter when filtering events can speed things up drastically.

Without a temp buffer we have:

 # trace-cmd start -p nop
 # perf stat -r 10 hackbench 50
       0.790706626 seconds time elapsed ( +-  0.71% )

 # trace-cmd start -e all
 # perf stat -r 10 hackbench 50
       1.566904059 seconds time elapsed ( +-  0.27% )

 # trace-cmd start -e all -f 'common_preempt_count==20'
 # perf stat -r 10 hackbench 50
       1.690598511 seconds time elapsed ( +-  0.19% )

 # trace-cmd start -e all -f 'common_preempt_count!=20'
 # perf stat -r 10 hackbench 50
       1.707486364 seconds time elapsed ( +-  0.30% )

The first run above is without any tracing, just to get a based figure.
hackbench takes ~0.79 seconds to run on the system.

The second run enables tracing all events where nothing is filtered. This
increases the time by 100% and hackbench takes 1.57 seconds to run.

The third run filters all events where the preempt count will equal "20"
(this should never happen) thus all events are discarded. This takes 1.69
seconds to run. This is 10% slower than just committing the events!

The last run enables all events and filters where the filter will commit all
events, and this takes 1.70 seconds to run. The filtering overhead is
approximately 10%. Thus, the discard and commit of an event from the ring
buffer may be about the same time.

With this patch, the numbers change:

 # trace-cmd start -p nop
 # perf stat -r 10 hackbench 50
       0.778233033 seconds time elapsed ( +-  0.38% )

 # trace-cmd start -e all
 # perf stat -r 10 hackbench 50
       1.582102692 seconds time elapsed ( +-  0.28% )

 # trace-cmd start -e all -f 'common_preempt_count==20'
 # perf stat -r 10 hackbench 50
       1.309230710 seconds time elapsed ( +-  0.22% )

 # trace-cmd start -e all -f 'common_preempt_count!=20'
 # perf stat -r 10 hackbench 50
       1.786001924 seconds time elapsed ( +-  0.20% )

The first run is again the base with no tracing.

The second run is all tracing with no filtering. It is a little slower, but
that may be well within the noise.

The third run shows that discarding all events only took 1.3 seconds. This
is a speed up of 23%! The discard is much faster than even the commit.

The one downside is shown in the last run. Events that are not discarded by
the filter will take longer to add, this is due to the extra copy of the
event.

Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c               | 154 +++++++++++++++++++++++++++++++++++--
 kernel/trace/trace.h               |  19 ++++-
 kernel/trace/trace_events.c        |  10 +++
 kernel/trace/trace_events_filter.c |  10 +++
 4 files changed, 185 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c09e8ffadc73..8a4bd6b68a0b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -312,7 +312,7 @@ int call_filter_check_discard(struct trace_event_call *call, void *rec,
 {
 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
 	    !filter_match_preds(call->filter, rec)) {
-		ring_buffer_discard_commit(buffer, event);
+		__trace_event_discard_commit(buffer, event);
 		return 1;
 	}
 
@@ -1660,6 +1660,16 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 }
 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
 
+static __always_inline void
+trace_event_setup(struct ring_buffer_event *event,
+		  int type, unsigned long flags, int pc)
+{
+	struct trace_entry *ent = ring_buffer_event_data(event);
+
+	tracing_generic_entry_update(ent, flags, pc);
+	ent->type = type;
+}
+
 struct ring_buffer_event *
 trace_buffer_lock_reserve(struct ring_buffer *buffer,
 			  int type,
@@ -1669,21 +1679,136 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer,
 	struct ring_buffer_event *event;
 
 	event = ring_buffer_lock_reserve(buffer, len);
-	if (event != NULL) {
-		struct trace_entry *ent = ring_buffer_event_data(event);
+	if (event != NULL)
+		trace_event_setup(event, type, flags, pc);
+
+	return event;
+}
+
+DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
+DEFINE_PER_CPU(int, trace_buffered_event_cnt);
+static int trace_buffered_event_ref;
+
+/**
+ * trace_buffered_event_enable - enable buffering events
+ *
+ * When events are being filtered, it is quicker to use a temporary
+ * buffer to write the event data into if there's a likely chance
+ * that it will not be committed. The discard of the ring buffer
+ * is not as fast as committing, and is much slower than copying
+ * a commit.
+ *
+ * When an event is to be filtered, allocate per cpu buffers to
+ * write the event data into, and if the event is filtered and discarded
+ * it is simply dropped, otherwise, the entire data is to be committed
+ * in one shot.
+ */
+void trace_buffered_event_enable(void)
+{
+	struct ring_buffer_event *event;
+	struct page *page;
+	int cpu;
 
-		tracing_generic_entry_update(ent, flags, pc);
-		ent->type = type;
+	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
+
+	if (trace_buffered_event_ref++)
+		return;
+
+	for_each_tracing_cpu(cpu) {
+		page = alloc_pages_node(cpu_to_node(cpu),
+					GFP_KERNEL | __GFP_NORETRY, 0);
+		if (!page)
+			goto failed;
+
+		event = page_address(page);
+		memset(event, 0, sizeof(*event));
+
+		per_cpu(trace_buffered_event, cpu) = event;
+
+		preempt_disable();
+		if (cpu == smp_processor_id() &&
+		    this_cpu_read(trace_buffered_event) !=
+		    per_cpu(trace_buffered_event, cpu))
+			WARN_ON_ONCE(1);
+		preempt_enable();
 	}
 
-	return event;
+	return;
+ failed:
+	trace_buffered_event_disable();
+}
+
+static void enable_trace_buffered_event(void *data)
+{
+	/* Probably not needed, but do it anyway */
+	smp_rmb();
+	this_cpu_dec(trace_buffered_event_cnt);
+}
+
+static void disable_trace_buffered_event(void *data)
+{
+	this_cpu_inc(trace_buffered_event_cnt);
+}
+
+/**
+ * trace_buffered_event_disable - disable buffering events
+ *
+ * When a filter is removed, it is faster to not use the buffered
+ * events, and to commit directly into the ring buffer. Free up
+ * the temp buffers when there are no more users. This requires
+ * special synchronization with current events.
+ */
+void trace_buffered_event_disable(void)
+{
+	int cpu;
+
+	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
+
+	if (WARN_ON_ONCE(!trace_buffered_event_ref))
+		return;
+
+	if (--trace_buffered_event_ref)
+		return;
+
+	preempt_disable();
+	/* For each CPU, set the buffer as used. */
+	smp_call_function_many(tracing_buffer_mask,
+			       disable_trace_buffered_event, NULL, 1);
+	preempt_enable();
+
+	/* Wait for all current users to finish */
+	synchronize_sched();
+
+	for_each_tracing_cpu(cpu) {
+		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
+		per_cpu(trace_buffered_event, cpu) = NULL;
+	}
+	/*
+	 * Make sure trace_buffered_event is NULL before clearing
+	 * trace_buffered_event_cnt.
+	 */
+	smp_wmb();
+
+	preempt_disable();
+	/* Do the work on each cpu */
+	smp_call_function_many(tracing_buffer_mask,
+			       enable_trace_buffered_event, NULL, 1);
+	preempt_enable();
 }
 
 void
 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
 {
 	__this_cpu_write(trace_cmdline_save, true);
-	ring_buffer_unlock_commit(buffer, event);
+
+	/* If this is the temp buffer, we need to commit fully */
+	if (this_cpu_read(trace_buffered_event) == event) {
+		/* Length is in event->array[0] */
+		ring_buffer_write(buffer, event->array[0], &event->array[1]);
+		/* Release the temp buffer */
+		this_cpu_dec(trace_buffered_event_cnt);
+	} else
+		ring_buffer_unlock_commit(buffer, event);
 }
 
 static struct ring_buffer *temp_buffer;
@@ -1695,8 +1820,23 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
 			  unsigned long flags, int pc)
 {
 	struct ring_buffer_event *entry;
+	int val;
 
 	*current_rb = trace_file->tr->trace_buffer.buffer;
+
+	if ((trace_file->flags &
+	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
+	    (entry = this_cpu_read(trace_buffered_event))) {
+		/* Try to use the per cpu buffer first */
+		val = this_cpu_inc_return(trace_buffered_event_cnt);
+		if (val == 1) {
+			trace_event_setup(entry, type, flags, pc);
+			entry->array[0] = len;
+			return entry;
+		}
+		this_cpu_dec(trace_buffered_event_cnt);
+	}
+
 	entry = trace_buffer_lock_reserve(*current_rb,
 					 type, len, flags, pc);
 	/*
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 10156a09103f..5167c366d6b7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1083,6 +1083,23 @@ static inline void trace_buffer_unlock_commit(struct trace_array *tr,
 	trace_buffer_unlock_commit_regs(tr, buffer, event, flags, pc, NULL);
 }
 
+DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
+DECLARE_PER_CPU(int, trace_buffered_event_cnt);
+void trace_buffered_event_disable(void);
+void trace_buffered_event_enable(void);
+
+static inline void
+__trace_event_discard_commit(struct ring_buffer *buffer,
+			     struct ring_buffer_event *event)
+{
+	if (this_cpu_read(trace_buffered_event) == event) {
+		/* Simply release the temp buffer */
+		this_cpu_dec(trace_buffered_event_cnt);
+		return;
+	}
+	ring_buffer_discard_commit(buffer, event);
+}
+
 /*
  * Helper function for event_trigger_unlock_commit{_regs}().
  * If there are event triggers attached to this event that requires
@@ -1111,7 +1128,7 @@ __event_trigger_test_discard(struct trace_event_file *file,
 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
 	     !filter_match_preds(file->filter, entry))) {
-		ring_buffer_discard_commit(buffer, event);
+		__trace_event_discard_commit(buffer, event);
 		return true;
 	}
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index da1eeb6190e3..4d006707b947 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -363,6 +363,7 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
 {
 	struct trace_event_call *call = file->event_call;
 	struct trace_array *tr = file->tr;
+	unsigned long file_flags = file->flags;
 	int ret = 0;
 	int disable;
 
@@ -445,6 +446,15 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
 		break;
 	}
 
+	/* Enable or disable use of trace_buffered_event */
+	if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) !=
+	    (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) {
+		if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
+			trace_buffered_event_enable();
+		else
+			trace_buffered_event_disable();
+	}
+
 	return ret;
 }
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index d1d27bf37a19..9daa9b3bc6d9 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -823,7 +823,12 @@ static void __free_preds(struct event_filter *filter)
 
 static void filter_disable(struct trace_event_file *file)
 {
+	unsigned long old_flags = file->flags;
+
 	file->flags &= ~EVENT_FILE_FL_FILTERED;
+
+	if (old_flags != file->flags)
+		trace_buffered_event_disable();
 }
 
 static void __free_filter(struct event_filter *filter)
@@ -1698,7 +1703,12 @@ fail:
 
 static inline void event_set_filtered_flag(struct trace_event_file *file)
 {
+	unsigned long old_flags = file->flags;
+
 	file->flags |= EVENT_FILE_FL_FILTERED;
+
+	if (old_flags != file->flags)
+		trace_buffered_event_enable();
 }
 
 static inline void event_set_filter(struct trace_event_file *file,
-- 
2.8.0.rc3

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events
  2016-05-04 13:52 ` [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events Steven Rostedt
@ 2016-05-05 15:20   ` Alexei Starovoitov
  2016-05-05 15:32     ` Steven Rostedt
  2020-08-28  7:53   ` Wen Gong
  1 sibling, 1 reply; 12+ messages in thread
From: Alexei Starovoitov @ 2016-05-05 15:20 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Ingo Molnar, Andrew Morton, Alexei Starovoitov

On Wed, May 04, 2016 at 09:52:04AM -0400, Steven Rostedt wrote:
> From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
> 
> Filtering of events requires the data to be written to the ring buffer
> before it can be decided to filter or not. This is because the parameters of
> the filter are based on the result that is written to the ring buffer and
> not on the parameters that are passed into the trace functions.
> 
> The ftrace ring buffer is optimized for writing into the ring buffer and
> committing. The discard procedure used when filtering decides the event
> should be discarded is much more heavy weight. Thus, using a temporary
> filter when filtering events can speed things up drastically.
> 
> Without a temp buffer we have:
> 
>  # trace-cmd start -p nop
>  # perf stat -r 10 hackbench 50
>        0.790706626 seconds time elapsed ( +-  0.71% )
> 
>  # trace-cmd start -e all
>  # perf stat -r 10 hackbench 50
>        1.566904059 seconds time elapsed ( +-  0.27% )
> 
>  # trace-cmd start -e all -f 'common_preempt_count==20'
>  # perf stat -r 10 hackbench 50
>        1.690598511 seconds time elapsed ( +-  0.19% )
> 
>  # trace-cmd start -e all -f 'common_preempt_count!=20'
>  # perf stat -r 10 hackbench 50
>        1.707486364 seconds time elapsed ( +-  0.30% )
> 
> The first run above is without any tracing, just to get a based figure.
> hackbench takes ~0.79 seconds to run on the system.
> 
> The second run enables tracing all events where nothing is filtered. This
> increases the time by 100% and hackbench takes 1.57 seconds to run.
> 
> The third run filters all events where the preempt count will equal "20"
> (this should never happen) thus all events are discarded. This takes 1.69
> seconds to run. This is 10% slower than just committing the events!
> 
> The last run enables all events and filters where the filter will commit all
> events, and this takes 1.70 seconds to run. The filtering overhead is
> approximately 10%. Thus, the discard and commit of an event from the ring
> buffer may be about the same time.
> 
> With this patch, the numbers change:
> 
>  # trace-cmd start -p nop
>  # perf stat -r 10 hackbench 50
>        0.778233033 seconds time elapsed ( +-  0.38% )
> 
>  # trace-cmd start -e all
>  # perf stat -r 10 hackbench 50
>        1.582102692 seconds time elapsed ( +-  0.28% )
> 
>  # trace-cmd start -e all -f 'common_preempt_count==20'
>  # perf stat -r 10 hackbench 50
>        1.309230710 seconds time elapsed ( +-  0.22% )
> 
>  # trace-cmd start -e all -f 'common_preempt_count!=20'
>  # perf stat -r 10 hackbench 50
>        1.786001924 seconds time elapsed ( +-  0.20% )
> 
> The first run is again the base with no tracing.
> 
> The second run is all tracing with no filtering. It is a little slower, but
> that may be well within the noise.
> 
> The third run shows that discarding all events only took 1.3 seconds. This
> is a speed up of 23%! The discard is much faster than even the commit.
> 
> The one downside is shown in the last run. Events that are not discarded by
> the filter will take longer to add, this is due to the extra copy of the
> event.
> 
> Cc: Alexei Starovoitov <ast@kernel.org>
> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
...
>  	*current_rb = trace_file->tr->trace_buffer.buffer;
> +
> +	if ((trace_file->flags &
> +	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
> +	    (entry = this_cpu_read(trace_buffered_event))) {
> +		/* Try to use the per cpu buffer first */
> +		val = this_cpu_inc_return(trace_buffered_event_cnt);
> +		if (val == 1) {
> +			trace_event_setup(entry, type, flags, pc);
> +			entry->array[0] = len;
> +			return entry;
> +		}
> +		this_cpu_dec(trace_buffered_event_cnt);

tricky :)
so the buffer is used only for non-recursive events.
If the 2nd event on the same cpu also needs filtering it will
be going through normal trace_buffer_lock_reserve() path,
but then it means such events will be out of order if both
are accepted, right?
Is that a concern or not?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events
  2016-05-05 15:20   ` Alexei Starovoitov
@ 2016-05-05 15:32     ` Steven Rostedt
  2016-05-05 15:35       ` Steven Rostedt
  0 siblings, 1 reply; 12+ messages in thread
From: Steven Rostedt @ 2016-05-05 15:32 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: linux-kernel, Ingo Molnar, Andrew Morton, Alexei Starovoitov

On Thu, 5 May 2016 08:20:57 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:


> tricky :)

Thanks ;-)

> so the buffer is used only for non-recursive events.
> If the 2nd event on the same cpu also needs filtering it will
> be going through normal trace_buffer_lock_reserve() path,
> but then it means such events will be out of order if both
> are accepted, right?
> Is that a concern or not?

Well, what is the order?

Think about it, what's the difference if the interrupt came in just
before the trace or just after? It still came in the same location with
respect to the normal flow of the code. The only difference is, where
we recorded it.

-- Steve

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events
  2016-05-05 15:32     ` Steven Rostedt
@ 2016-05-05 15:35       ` Steven Rostedt
  2016-05-05 15:40         ` Alexei Starovoitov
  0 siblings, 1 reply; 12+ messages in thread
From: Steven Rostedt @ 2016-05-05 15:35 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: linux-kernel, Ingo Molnar, Andrew Morton, Alexei Starovoitov

On Thu, 5 May 2016 11:32:51 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> Think about it, what's the difference if the interrupt came in just
> before the trace or just after? It still came in the same location with
> respect to the normal flow of the code. The only difference is, where
> we recorded it.

Also, if we used a temp buffer for each one, the same thing would
happen. The interrupt would be committed first before returning back to
the interrupted event. Perf does the same thing, but all the time.

-- Steve

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events
  2016-05-05 15:35       ` Steven Rostedt
@ 2016-05-05 15:40         ` Alexei Starovoitov
  0 siblings, 0 replies; 12+ messages in thread
From: Alexei Starovoitov @ 2016-05-05 15:40 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Ingo Molnar, Andrew Morton, Alexei Starovoitov

On Thu, May 05, 2016 at 11:35:43AM -0400, Steven Rostedt wrote:
> On Thu, 5 May 2016 11:32:51 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > Think about it, what's the difference if the interrupt came in just
> > before the trace or just after? It still came in the same location with
> > respect to the normal flow of the code. The only difference is, where
> > we recorded it.
> 
> Also, if we used a temp buffer for each one, the same thing would
> happen. The interrupt would be committed first before returning back to
> the interrupted event. Perf does the same thing, but all the time.

yeah. good point. there is no actual 'order' here.
The whole thing looks good to me. 
Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events
  2016-05-04 13:52 ` [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events Steven Rostedt
  2016-05-05 15:20   ` Alexei Starovoitov
@ 2020-08-28  7:53   ` Wen Gong
  2020-08-28 22:49     ` Steven Rostedt
  1 sibling, 1 reply; 12+ messages in thread
From: Wen Gong @ 2020-08-28  7:53 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Ingo Molnar, Andrew Morton, Alexei Starovoitov,
	linux-kernel-owner, ath10k, ath11k, abhishekpandit, briannorris,
	drinkcat, tientzu

On 2016-05-04 21:52, Steven Rostedt wrote:
> From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
> 
> Filtering of events requires the data to be written to the ring buffer
> before it can be decided to filter or not. This is because the 
> parameters of
> the filter are based on the result that is written to the ring buffer 
> and
> not on the parameters that are passed into the trace functions.
> 
> The ftrace ring buffer is optimized for writing into the ring buffer 
> and
> committing. The discard procedure used when filtering decides the event
> should be discarded is much more heavy weight. Thus, using a temporary
> filter when filtering events can speed things up drastically.
> 
> Without a temp buffer we have:
> 
>  # trace-cmd start -p nop
>  # perf stat -r 10 hackbench 50
>        0.790706626 seconds time elapsed ( +-  0.71% )
> 
>  # trace-cmd start -e all
>  # perf stat -r 10 hackbench 50
>        1.566904059 seconds time elapsed ( +-  0.27% )
> 
>  # trace-cmd start -e all -f 'common_preempt_count==20'
>  # perf stat -r 10 hackbench 50
>        1.690598511 seconds time elapsed ( +-  0.19% )
> 
>  # trace-cmd start -e all -f 'common_preempt_count!=20'
>  # perf stat -r 10 hackbench 50
>        1.707486364 seconds time elapsed ( +-  0.30% )
> 
> The first run above is without any tracing, just to get a based figure.
> hackbench takes ~0.79 seconds to run on the system.
> 
> The second run enables tracing all events where nothing is filtered. 
> This
> increases the time by 100% and hackbench takes 1.57 seconds to run.
> 
> The third run filters all events where the preempt count will equal 
> "20"
> (this should never happen) thus all events are discarded. This takes 
> 1.69
> seconds to run. This is 10% slower than just committing the events!
> 
> The last run enables all events and filters where the filter will 
> commit all
> events, and this takes 1.70 seconds to run. The filtering overhead is
> approximately 10%. Thus, the discard and commit of an event from the 
> ring
> buffer may be about the same time.
> 
> With this patch, the numbers change:
> 
>  # trace-cmd start -p nop
>  # perf stat -r 10 hackbench 50
>        0.778233033 seconds time elapsed ( +-  0.38% )
> 
>  # trace-cmd start -e all
>  # perf stat -r 10 hackbench 50
>        1.582102692 seconds time elapsed ( +-  0.28% )
> 
>  # trace-cmd start -e all -f 'common_preempt_count==20'
>  # perf stat -r 10 hackbench 50
>        1.309230710 seconds time elapsed ( +-  0.22% )
> 
>  # trace-cmd start -e all -f 'common_preempt_count!=20'
>  # perf stat -r 10 hackbench 50
>        1.786001924 seconds time elapsed ( +-  0.20% )
> 
> The first run is again the base with no tracing.
> 
> The second run is all tracing with no filtering. It is a little slower, 
> but
> that may be well within the noise.
> 
> The third run shows that discarding all events only took 1.3 seconds. 
> This
> is a speed up of 23%! The discard is much faster than even the commit.
> 
> The one downside is shown in the last run. Events that are not 
> discarded by
> the filter will take longer to add, this is due to the extra copy of 
> the
> event.
> 
> Cc: Alexei Starovoitov <ast@kernel.org>
> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
> ---
>  kernel/trace/trace.c               | 154 
> +++++++++++++++++++++++++++++++++++--
>  kernel/trace/trace.h               |  19 ++++-
>  kernel/trace/trace_events.c        |  10 +++
>  kernel/trace/trace_events_filter.c |  10 +++
>  4 files changed, 185 insertions(+), 8 deletions(-)
> 
...
> @@ -1695,8 +1820,23 @@ trace_event_buffer_lock_reserve(struct
> ring_buffer **current_rb,
>  			  unsigned long flags, int pc)
>  {
>  	struct ring_buffer_event *entry;
> +	int val;
> 
>  	*current_rb = trace_file->tr->trace_buffer.buffer;
> +
> +	if ((trace_file->flags &
> +	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
> +	    (entry = this_cpu_read(trace_buffered_event))) {
> +		/* Try to use the per cpu buffer first */
> +		val = this_cpu_inc_return(trace_buffered_event_cnt);
> +		if (val == 1) {
> +			trace_event_setup(entry, type, flags, pc);
> +			entry->array[0] = len;
> +			return entry;
> +		}
> +		this_cpu_dec(trace_buffered_event_cnt);
> +	}
> +
>  	entry = trace_buffer_lock_reserve(*current_rb,
>  					 type, len, flags, pc);
>  	/*
this patch commit id is : 0fc1b09ff1ff404ddf753f5ffa5cd0adc8fdcdc9 which 
has upstream.

how much size is the per cpu buffer?
seems it is initilized in trace_buffered_event_enable,
it is only 1 page size as below:
void trace_buffered_event_enable(void)
{
...
	for_each_tracing_cpu(cpu) {
		page = alloc_pages_node(cpu_to_node(cpu),
					GFP_KERNEL | __GFP_NORETRY, 0);
If the size of buffer to trace is more than 1 page, such as 46680, then 
it trigger kernel crash/panic in my case while run trace-cmd.
After debugging, the trace_file->flags in 
trace_event_buffer_lock_reserve is 0x40b while run trace-cmd, and it is 
0x403 while collecting ftrace log.

Is it have any operation to disable this patch dynamically?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events
  2020-08-28  7:53   ` Wen Gong
@ 2020-08-28 22:49     ` Steven Rostedt
  2020-08-28 22:54       ` Steven Rostedt
  0 siblings, 1 reply; 12+ messages in thread
From: Steven Rostedt @ 2020-08-28 22:49 UTC (permalink / raw)
  To: Wen Gong
  Cc: linux-kernel, Ingo Molnar, Andrew Morton, Alexei Starovoitov,
	linux-kernel-owner, ath10k, ath11k, abhishekpandit, briannorris,
	drinkcat, tientzu

On Fri, 28 Aug 2020 15:53:06 +0800
Wen Gong <wgong@codeaurora.org> wrote:

> this patch commit id is : 0fc1b09ff1ff404ddf753f5ffa5cd0adc8fdcdc9 which 
> has upstream.
> 
> how much size is the per cpu buffer?
> seems it is initilized in trace_buffered_event_enable,
> it is only 1 page size as below:
> void trace_buffered_event_enable(void)
> {
> ...
> 	for_each_tracing_cpu(cpu) {
> 		page = alloc_pages_node(cpu_to_node(cpu),
> 					GFP_KERNEL | __GFP_NORETRY, 0);
> If the size of buffer to trace is more than 1 page, such as 46680, then 
> it trigger kernel crash/panic in my case while run trace-cmd.
> After debugging, the trace_file->flags in 
> trace_event_buffer_lock_reserve is 0x40b while run trace-cmd, and it is 
> 0x403 while collecting ftrace log.
> 
> Is it have any operation to disable this patch dynamically?

It shouldn't be disabled, this is a bug that needs to be fixed.

Also, if an event is more than a page, it wont be saved in the ftrace
ring buffer, as events are limited by page size minus the headers.

-- Steve

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events
  2020-08-28 22:49     ` Steven Rostedt
@ 2020-08-28 22:54       ` Steven Rostedt
  2020-08-28 22:55         ` Steven Rostedt
  0 siblings, 1 reply; 12+ messages in thread
From: Steven Rostedt @ 2020-08-28 22:54 UTC (permalink / raw)
  To: Wen Gong
  Cc: linux-kernel, Ingo Molnar, Andrew Morton, Alexei Starovoitov,
	linux-kernel-owner, ath10k, ath11k, abhishekpandit, briannorris,
	drinkcat, tientzu

On Fri, 28 Aug 2020 18:49:55 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Fri, 28 Aug 2020 15:53:06 +0800
> Wen Gong <wgong@codeaurora.org> wrote:
> 
> > this patch commit id is : 0fc1b09ff1ff404ddf753f5ffa5cd0adc8fdcdc9 which 
> > has upstream.
> > 
> > how much size is the per cpu buffer?
> > seems it is initilized in trace_buffered_event_enable,
> > it is only 1 page size as below:
> > void trace_buffered_event_enable(void)
> > {
> > ...
> > 	for_each_tracing_cpu(cpu) {
> > 		page = alloc_pages_node(cpu_to_node(cpu),
> > 					GFP_KERNEL | __GFP_NORETRY, 0);
> > If the size of buffer to trace is more than 1 page, such as 46680, then 
> > it trigger kernel crash/panic in my case while run trace-cmd.
> > After debugging, the trace_file->flags in 
> > trace_event_buffer_lock_reserve is 0x40b while run trace-cmd, and it is 
> > 0x403 while collecting ftrace log.
> > 
> > Is it have any operation to disable this patch dynamically?  
> 
> It shouldn't be disabled, this is a bug that needs to be fixed.
> 
> Also, if an event is more than a page, it wont be saved in the ftrace
> ring buffer, as events are limited by page size minus the headers.
> 

Untested (not even compiled, as I'm now on PTO) but does this patch
work for you?

-- Steve

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f40d850ebabc..3a9b4422e7fc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2598,7 +2598,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
 	    (entry = this_cpu_read(trace_buffered_event))) {
 		/* Try to use the per cpu buffer first */
 		val = this_cpu_inc_return(trace_buffered_event_cnt);
-		if (val == 1) {
+		if (val == 1 || (len > (PAGE_SIZE - 8))) {
 			trace_event_setup(entry, type, flags, pc);
 			entry->array[0] = len;
 			return entry;

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events
  2020-08-28 22:54       ` Steven Rostedt
@ 2020-08-28 22:55         ` Steven Rostedt
  2020-08-29 15:52           ` Wen Gong
  0 siblings, 1 reply; 12+ messages in thread
From: Steven Rostedt @ 2020-08-28 22:55 UTC (permalink / raw)
  To: Wen Gong
  Cc: linux-kernel, Ingo Molnar, Andrew Morton, Alexei Starovoitov,
	linux-kernel-owner, ath10k, ath11k, abhishekpandit, briannorris,
	drinkcat, tientzu

On Fri, 28 Aug 2020 18:54:50 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Fri, 28 Aug 2020 18:49:55 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > On Fri, 28 Aug 2020 15:53:06 +0800
> > Wen Gong <wgong@codeaurora.org> wrote:
> >   
> > > this patch commit id is : 0fc1b09ff1ff404ddf753f5ffa5cd0adc8fdcdc9 which 
> > > has upstream.
> > > 
> > > how much size is the per cpu buffer?
> > > seems it is initilized in trace_buffered_event_enable,
> > > it is only 1 page size as below:
> > > void trace_buffered_event_enable(void)
> > > {
> > > ...
> > > 	for_each_tracing_cpu(cpu) {
> > > 		page = alloc_pages_node(cpu_to_node(cpu),
> > > 					GFP_KERNEL | __GFP_NORETRY, 0);
> > > If the size of buffer to trace is more than 1 page, such as 46680, then 
> > > it trigger kernel crash/panic in my case while run trace-cmd.
> > > After debugging, the trace_file->flags in 
> > > trace_event_buffer_lock_reserve is 0x40b while run trace-cmd, and it is 
> > > 0x403 while collecting ftrace log.
> > > 
> > > Is it have any operation to disable this patch dynamically?    
> > 
> > It shouldn't be disabled, this is a bug that needs to be fixed.
> > 
> > Also, if an event is more than a page, it wont be saved in the ftrace
> > ring buffer, as events are limited by page size minus the headers.
> >   
> 
> Untested (not even compiled, as I'm now on PTO) but does this patch
> work for you?
> 
> -- Steve
> 
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index f40d850ebabc..3a9b4422e7fc 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -2598,7 +2598,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
>  	    (entry = this_cpu_read(trace_buffered_event))) {
>  		/* Try to use the per cpu buffer first */
>  		val = this_cpu_inc_return(trace_buffered_event_cnt);
> -		if (val == 1) {
> +		if (val == 1 || (len > (PAGE_SIZE - 8))) {

That was suppose to be:

		if (val == 1 && (len < (PAGE_SIZE - 8))) {

-- Steve

>  			trace_event_setup(entry, type, flags, pc);
>  			entry->array[0] = len;
>  			return entry;


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events
  2020-08-28 22:55         ` Steven Rostedt
@ 2020-08-29 15:52           ` Wen Gong
  0 siblings, 0 replies; 12+ messages in thread
From: Wen Gong @ 2020-08-29 15:52 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Ingo Molnar, Andrew Morton, Alexei Starovoitov,
	linux-kernel-owner, ath10k, ath11k, abhishekpandit, briannorris,
	drinkcat, tientzu

On 2020-08-29 06:55, Steven Rostedt wrote:
> On Fri, 28 Aug 2020 18:54:50 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
>> On Fri, 28 Aug 2020 18:49:55 -0400
>> Steven Rostedt <rostedt@goodmis.org> wrote:
>> 
>> > On Fri, 28 Aug 2020 15:53:06 +0800
>> > Wen Gong <wgong@codeaurora.org> wrote:
>> >
>> > > this patch commit id is : 0fc1b09ff1ff404ddf753f5ffa5cd0adc8fdcdc9 which
>> > > has upstream.
>> > >
>> > > how much size is the per cpu buffer?
>> > > seems it is initilized in trace_buffered_event_enable,
>> > > it is only 1 page size as below:
>> > > void trace_buffered_event_enable(void)
>> > > {
>> > > ...
>> > > 	for_each_tracing_cpu(cpu) {
>> > > 		page = alloc_pages_node(cpu_to_node(cpu),
>> > > 					GFP_KERNEL | __GFP_NORETRY, 0);
>> > > If the size of buffer to trace is more than 1 page, such as 46680, then
>> > > it trigger kernel crash/panic in my case while run trace-cmd.
>> > > After debugging, the trace_file->flags in
>> > > trace_event_buffer_lock_reserve is 0x40b while run trace-cmd, and it is
>> > > 0x403 while collecting ftrace log.
>> > >
>> > > Is it have any operation to disable this patch dynamically?
>> >
>> > It shouldn't be disabled, this is a bug that needs to be fixed.
>> >
>> > Also, if an event is more than a page, it wont be saved in the ftrace
>> > ring buffer, as events are limited by page size minus the headers.
>> >
>> 
>> Untested (not even compiled, as I'm now on PTO) but does this patch
>> work for you?
>> 
>> -- Steve
>> 
>> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
>> index f40d850ebabc..3a9b4422e7fc 100644
>> --- a/kernel/trace/trace.c
>> +++ b/kernel/trace/trace.c
>> @@ -2598,7 +2598,7 @@ trace_event_buffer_lock_reserve(struct 
>> trace_buffer **current_rb,
>>  	    (entry = this_cpu_read(trace_buffered_event))) {
>>  		/* Try to use the per cpu buffer first */
>>  		val = this_cpu_inc_return(trace_buffered_event_cnt);
>> -		if (val == 1) {
>> +		if (val == 1 || (len > (PAGE_SIZE - 8))) {
> 
> That was suppose to be:
> 
> 		if (val == 1 && (len < (PAGE_SIZE - 8))) {
> 
> -- Steve
Thanks Steve!

If change like this, I think it will fix my issue of crash.
Will you commit this change?
> 
>>  			trace_event_setup(entry, type, flags, pc);
>>  			entry->array[0] = len;
>>  			return entry;

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-08-29 15:53 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-04 13:52 [for-next][PATCH 0/2] tracing: Use a temp buffer when filtering events Steven Rostedt
2016-05-04 13:52 ` [for-next][PATCH 1/2] tracing: Remove TRACE_EVENT_FL_USE_CALL_FILTER logic Steven Rostedt
2016-05-04 13:52 ` [for-next][PATCH 2/2] tracing: Use temp buffer when filtering events Steven Rostedt
2016-05-05 15:20   ` Alexei Starovoitov
2016-05-05 15:32     ` Steven Rostedt
2016-05-05 15:35       ` Steven Rostedt
2016-05-05 15:40         ` Alexei Starovoitov
2020-08-28  7:53   ` Wen Gong
2020-08-28 22:49     ` Steven Rostedt
2020-08-28 22:54       ` Steven Rostedt
2020-08-28 22:55         ` Steven Rostedt
2020-08-29 15:52           ` Wen Gong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).