linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 0/5] Add "func_no_repete" tracing option
@ 2021-03-04  9:01 Yordan Karadzhov (VMware)
  2021-03-04  9:01 ` [RFC PATCH 1/5] tracing: Define new ftrace event "func_repeats" Yordan Karadzhov (VMware)
                   ` (4 more replies)
  0 siblings, 5 replies; 9+ messages in thread
From: Yordan Karadzhov (VMware) @ 2021-03-04  9:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: tglx, peterz, rostedt, Yordan Karadzhov (VMware)

The new option for function tracing aims to save space on the ring
buffer and to make it more readable in the case when a single function
is called number of times consecutively:

	while (cond)
		do_func();

Instead of having an identical records for each call of the function
we will record only the first call, followed by an event showing the
number of repeats.

Yordan Karadzhov (VMware) (5):
  tracing: Define new ftrace event "func_repeats"
  tracing: Add "last_func_repeats" to struct trace_array
  tracing: Add method for recording "func_repeats" events
  tracing: Unify the logic for function tracing options
  tracing: Add "func_no_repeats" option for function tracing

 kernel/trace/trace.c           |  22 ++++
 kernel/trace/trace.h           |  24 ++++
 kernel/trace/trace_entries.h   |  16 +++
 kernel/trace/trace_functions.c | 219 +++++++++++++++++++++++++++++----
 kernel/trace/trace_output.c    |  44 +++++++
 5 files changed, 298 insertions(+), 27 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [RFC PATCH 1/5] tracing: Define new ftrace event "func_repeats"
  2021-03-04  9:01 [RFC PATCH 0/5] Add "func_no_repete" tracing option Yordan Karadzhov (VMware)
@ 2021-03-04  9:01 ` Yordan Karadzhov (VMware)
  2021-03-04 16:38   ` Steven Rostedt
  2021-03-04  9:01 ` [RFC PATCH 2/5] tracing: Add "last_func_repeats" to struct trace_array Yordan Karadzhov (VMware)
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 9+ messages in thread
From: Yordan Karadzhov (VMware) @ 2021-03-04  9:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: tglx, peterz, rostedt, Yordan Karadzhov (VMware)

The event aims to consolidate the function tracing record in the cases
when a single function is called number of times consecutively.

	while (cond)
		do_func();

This may happen in various scenarios (busy waiting for example).
The new ftrace event can be used to show repeated function events with
a single event and save space on the ring buffer

Signed-off-by: Yordan Karadzhov (VMware) <y.karadz@gmail.com>
---
 kernel/trace/trace.h         |  3 +++
 kernel/trace/trace_entries.h | 16 +++++++++++++
 kernel/trace/trace_output.c  | 44 ++++++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index a6446c03cfbc..2be4a56879de 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -45,6 +45,7 @@ enum trace_type {
 	TRACE_BPUTS,
 	TRACE_HWLAT,
 	TRACE_RAW_DATA,
+	TRACE_FUNC_REPEATS,
 
 	__TRACE_LAST_TYPE,
 };
@@ -441,6 +442,8 @@ extern void __ftrace_bad_type(void);
 			  TRACE_GRAPH_ENT);		\
 		IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,	\
 			  TRACE_GRAPH_RET);		\
+		IF_ASSIGN(var, ent, struct func_repeats_entry,		\
+			  TRACE_FUNC_REPEATS);				\
 		__ftrace_bad_type();					\
 	} while (0)
 
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 4547ac59da61..8007f9b6417f 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -338,3 +338,19 @@ FTRACE_ENTRY(hwlat, hwlat_entry,
 		 __entry->nmi_total_ts,
 		 __entry->nmi_count)
 );
+
+FTRACE_ENTRY(func_repeats, func_repeats_entry,
+
+	TRACE_FUNC_REPEATS,
+
+	F_STRUCT(
+		__field(	unsigned long,	ip	)
+		__field(	unsigned long,	pip	)
+		__field(	unsigned long,	count	)
+	),
+
+	F_printk(" %ps <-%ps\t(repeats:%lu)",
+		 (void *)__entry->ip,
+		 (void *)__entry->pip,
+		 __entry->count)
+);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 61255bad7e01..af6b066972e9 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1373,6 +1373,49 @@ static struct trace_event trace_raw_data_event = {
 	.funcs		= &trace_raw_data_funcs,
 };
 
+static enum print_line_t
+trace_func_repeats_raw(struct trace_iterator *iter, int flags,
+			 struct trace_event *event)
+{
+	struct func_repeats_entry *field;
+	struct trace_seq *s = &iter->seq;
+
+	trace_assign_type(field, iter->ent);
+
+	trace_seq_printf(s, "%lu %lu %li\n",
+			 field->pip,
+			 field->ip,
+			 field->count);
+
+	return trace_handle_return(s);
+}
+
+static enum print_line_t
+trace_func_repeats_print(struct trace_iterator *iter, int flags,
+			 struct trace_event *event)
+{
+	struct func_repeats_entry *field;
+	struct trace_seq *s = &iter->seq;
+
+	trace_assign_type(field, iter->ent);
+
+	seq_print_ip_sym(s, field->ip, flags);
+	trace_seq_puts(s, " <-");
+	seq_print_ip_sym(s, field->pip, flags);
+	trace_seq_printf(s, " (repeats: %li)\n", field->count);
+
+	return trace_handle_return(s);
+}
+
+static struct trace_event_functions trace_func_repeats_funcs = {
+	.trace		= trace_func_repeats_print,
+	.raw		= trace_func_repeats_raw,
+};
+
+static struct trace_event trace_func_repeats_event = {
+	.type	 	= TRACE_FUNC_REPEATS,
+	.funcs		= &trace_func_repeats_funcs,
+};
 
 static struct trace_event *events[] __initdata = {
 	&trace_fn_event,
@@ -1385,6 +1428,7 @@ static struct trace_event *events[] __initdata = {
 	&trace_print_event,
 	&trace_hwlat_event,
 	&trace_raw_data_event,
+	&trace_func_repeats_event,
 	NULL
 };
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [RFC PATCH 2/5] tracing: Add "last_func_repeats" to struct trace_array
  2021-03-04  9:01 [RFC PATCH 0/5] Add "func_no_repete" tracing option Yordan Karadzhov (VMware)
  2021-03-04  9:01 ` [RFC PATCH 1/5] tracing: Define new ftrace event "func_repeats" Yordan Karadzhov (VMware)
@ 2021-03-04  9:01 ` Yordan Karadzhov (VMware)
  2021-03-04  9:01 ` [RFC PATCH 3/5] tracing: Add method for recording "func_repeats" events Yordan Karadzhov (VMware)
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: Yordan Karadzhov (VMware) @ 2021-03-04  9:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: tglx, peterz, rostedt, Yordan Karadzhov (VMware)

The field is used to keep track of the consecutive (on the same CPU) calls
of a single function. This information is needed in order to consolidate
the function tracing record in the cases when a single function is called
number of times.

Signed-off-by: Yordan Karadzhov (VMware) <y.karadz@gmail.com>
---
 kernel/trace/trace.c |  1 +
 kernel/trace/trace.h | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e295c413580e..5f5fa08c0644 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8895,6 +8895,7 @@ static int __remove_instance(struct trace_array *tr)
 	ftrace_clear_pids(tr);
 	ftrace_destroy_function_files(tr);
 	tracefs_remove(tr->dir);
+	free_percpu(tr->last_func_repeats);
 	free_trace_buffers(tr);
 
 	for (i = 0; i < tr->nr_topts; i++) {
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2be4a56879de..09bf12c038f4 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -262,6 +262,16 @@ struct cond_snapshot {
 	cond_update_fn_t		update;
 };
 
+/*
+ * struct trace_func_repeats - used to keep track of the consecutive
+ * (on the same CPU) calls of a single function.
+ */
+struct trace_func_repeats {
+	unsigned long ip;
+	unsigned long parent_ip;
+	unsigned long count;
+};
+
 /*
  * The trace array - an array of per-CPU trace arrays. This is the
  * highest level data structure that individual tracers deal with.
@@ -358,8 +368,15 @@ struct trace_array {
 #ifdef CONFIG_TRACER_SNAPSHOT
 	struct cond_snapshot	*cond_snapshot;
 #endif
+	struct trace_func_repeats	__percpu *last_func_repeats;
 };
 
+static inline struct trace_func_repeats *
+tracer_alloc_func_repeats(struct trace_array *tr)
+{
+	return tr->last_func_repeats = alloc_percpu(struct trace_func_repeats);
+}
+
 enum {
 	TRACE_ARRAY_FL_GLOBAL	= (1 << 0)
 };
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [RFC PATCH 3/5] tracing: Add method for recording "func_repeats" events
  2021-03-04  9:01 [RFC PATCH 0/5] Add "func_no_repete" tracing option Yordan Karadzhov (VMware)
  2021-03-04  9:01 ` [RFC PATCH 1/5] tracing: Define new ftrace event "func_repeats" Yordan Karadzhov (VMware)
  2021-03-04  9:01 ` [RFC PATCH 2/5] tracing: Add "last_func_repeats" to struct trace_array Yordan Karadzhov (VMware)
@ 2021-03-04  9:01 ` Yordan Karadzhov (VMware)
  2021-03-04  9:01 ` [RFC PATCH 4/5] tracing: Unify the logic for function tracing options Yordan Karadzhov (VMware)
  2021-03-04  9:01 ` [RFC PATCH 5/5] tracing: Add "func_no_repeats" option for function tracing Yordan Karadzhov (VMware)
  4 siblings, 0 replies; 9+ messages in thread
From: Yordan Karadzhov (VMware) @ 2021-03-04  9:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: tglx, peterz, rostedt, Yordan Karadzhov (VMware)

This patch only provides the implementation of the method.
Later we will used it in a combination with a new option for
function tracing.

Signed-off-by: Yordan Karadzhov (VMware) <y.karadz@gmail.com>
---
 kernel/trace/trace.c | 21 +++++++++++++++++++++
 kernel/trace/trace.h |  4 ++++
 2 files changed, 25 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5f5fa08c0644..5c62fda666af 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3109,6 +3109,27 @@ static void ftrace_trace_userstack(struct trace_array *tr,
 
 #endif /* CONFIG_STACKTRACE */
 
+void trace_last_func_repeats(struct trace_array *tr,
+			     struct trace_func_repeats *last_info,
+			     unsigned int trace_ctx)
+{
+	struct trace_buffer *buffer = tr->array_buffer.buffer;
+	struct func_repeats_entry *entry;
+	struct ring_buffer_event *event;
+
+	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
+					    sizeof(*entry), trace_ctx);
+	if (!event)
+		return;
+
+	entry = ring_buffer_event_data(event);
+	entry->ip = last_info->ip;
+	entry->pip = last_info->parent_ip;
+	entry->count = last_info->count;
+
+	__buffer_unlock_commit(buffer, event);
+}
+
 /* created for use with alloc_percpu */
 struct trace_buffer_struct {
 	int nesting;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 09bf12c038f4..0ef823bb9594 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -696,6 +696,10 @@ static inline void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
 }
 #endif /* CONFIG_STACKTRACE */
 
+void trace_last_func_repeats(struct trace_array *tr,
+			     struct trace_func_repeats *last_info,
+			     unsigned int trace_ctx);
+
 extern u64 ftrace_now(int cpu);
 
 extern void trace_find_cmdline(int pid, char comm[]);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [RFC PATCH 4/5] tracing: Unify the logic for function tracing options
  2021-03-04  9:01 [RFC PATCH 0/5] Add "func_no_repete" tracing option Yordan Karadzhov (VMware)
                   ` (2 preceding siblings ...)
  2021-03-04  9:01 ` [RFC PATCH 3/5] tracing: Add method for recording "func_repeats" events Yordan Karadzhov (VMware)
@ 2021-03-04  9:01 ` Yordan Karadzhov (VMware)
  2021-03-04  9:01 ` [RFC PATCH 5/5] tracing: Add "func_no_repeats" option for function tracing Yordan Karadzhov (VMware)
  4 siblings, 0 replies; 9+ messages in thread
From: Yordan Karadzhov (VMware) @ 2021-03-04  9:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: tglx, peterz, rostedt, Yordan Karadzhov (VMware)

Currently the logic for dealing with the options for function tracing
has two different implementations. One is used when we set the flags
(in "static int func_set_flag()") and another used when we initialize
the tracer (in "static int function_trace_init()"). Those two
implementations are meant to do essentially the same thing and they
are both not very convenient for adding new options. In this patch
we add a helper function that provides a single implementation of
the logic for dealing with the options and we make it such that new
options can be easily added.

Signed-off-by: Yordan Karadzhov (VMware) <y.karadz@gmail.com>
---
 kernel/trace/trace_functions.c | 66 ++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 26 deletions(-)

diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index f93723ca66bc..6c912eb0508a 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -27,13 +27,17 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
 static void
 function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
 			  struct ftrace_ops *op, struct ftrace_regs *fregs);
+static ftrace_func_t select_trace_function(u32 flags_val);
 static struct tracer_flags func_flags;
 
 /* Our option */
 enum {
+	TRACE_FUNC_NO_OPTS	= 0x0, /* No flags set. */
 	TRACE_FUNC_OPT_STACK	= 0x1,
 };
 
+#define TRACE_FUNC_OPT_MASK	(TRACE_FUNC_OPT_STACK)
+
 int ftrace_allocate_ftrace_ops(struct trace_array *tr)
 {
 	struct ftrace_ops *ops;
@@ -97,12 +101,9 @@ static int function_trace_init(struct trace_array *tr)
 	if (!tr->ops)
 		return -ENOMEM;
 
-	/* Currently only the global instance can do stack tracing */
-	if (tr->flags & TRACE_ARRAY_FL_GLOBAL &&
-	    func_flags.val & TRACE_FUNC_OPT_STACK)
-		func = function_stack_trace_call;
-	else
-		func = function_trace_call;
+	func = select_trace_function(func_flags.val);
+	if (!func)
+		return -EINVAL;
 
 	ftrace_init_array_ops(tr, func);
 
@@ -205,6 +206,18 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
 	local_irq_restore(flags);
 }
 
+static ftrace_func_t select_trace_function(u32 flags_val)
+{
+	switch (flags_val & TRACE_FUNC_OPT_MASK) {
+	case TRACE_FUNC_NO_OPTS:
+		return function_trace_call;
+	case TRACE_FUNC_OPT_STACK:
+		return function_stack_trace_call;
+	default:
+		return NULL;
+	}
+}
+
 static struct tracer_opt func_opts[] = {
 #ifdef CONFIG_STACKTRACE
 	{ TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
@@ -213,7 +226,7 @@ static struct tracer_opt func_opts[] = {
 };
 
 static struct tracer_flags func_flags = {
-	.val = 0, /* By default: all flags disabled */
+	.val = TRACE_FUNC_NO_OPTS, /* By default: all flags disabled */
 	.opts = func_opts
 };
 
@@ -235,30 +248,31 @@ static struct tracer function_trace;
 static int
 func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 {
-	switch (bit) {
-	case TRACE_FUNC_OPT_STACK:
-		/* do nothing if already set */
-		if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
-			break;
+	ftrace_func_t func;
+	u32 new_flags_val;
 
-		/* We can change this flag when not running. */
-		if (tr->current_trace != &function_trace)
-			break;
+	/* Do nothing if already set. */
+	if (!!set == !!(func_flags.val & bit))
+		return 0;
 
-		unregister_ftrace_function(tr->ops);
+	/* We can change this flag only when not running. */
+	if (tr->current_trace != &function_trace)
+		return 0;
 
-		if (set) {
-			tr->ops->func = function_stack_trace_call;
-			register_ftrace_function(tr->ops);
-		} else {
-			tr->ops->func = function_trace_call;
-			register_ftrace_function(tr->ops);
-		}
+	new_flags_val = (func_flags.val & ~(1UL << (bit - 1)));
+	new_flags_val |= (set << (bit - 1));
 
-		break;
-	default:
+	func = select_trace_function(new_flags_val);
+	if (!func)
 		return -EINVAL;
-	}
+
+	/* Check if there's anything to change. */
+	if (tr->ops->func == func)
+		return 0;
+
+	unregister_ftrace_function(tr->ops);
+	tr->ops->func = func;
+	register_ftrace_function(tr->ops);
 
 	return 0;
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [RFC PATCH 5/5] tracing: Add "func_no_repeats" option for function tracing
  2021-03-04  9:01 [RFC PATCH 0/5] Add "func_no_repete" tracing option Yordan Karadzhov (VMware)
                   ` (3 preceding siblings ...)
  2021-03-04  9:01 ` [RFC PATCH 4/5] tracing: Unify the logic for function tracing options Yordan Karadzhov (VMware)
@ 2021-03-04  9:01 ` Yordan Karadzhov (VMware)
  4 siblings, 0 replies; 9+ messages in thread
From: Yordan Karadzhov (VMware) @ 2021-03-04  9:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: tglx, peterz, rostedt, Yordan Karadzhov (VMware)

If the option is activated the function tracing record gets
consolidated in the cases when a single function is called number
of times consecutively. Instead of having an identical record for
each call of the function we will record only the first call
following by event showing the number of repeats.

Signed-off-by: Yordan Karadzhov (VMware) <y.karadz@gmail.com>

fix last
---
 kernel/trace/trace_functions.c | 157 ++++++++++++++++++++++++++++++++-
 1 file changed, 154 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 6c912eb0508a..fbf60ff93ffb 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -27,16 +27,28 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
 static void
 function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
 			  struct ftrace_ops *op, struct ftrace_regs *fregs);
+static void
+function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
+			       struct ftrace_ops *op, struct ftrace_regs *fregs);
+static void
+function_stack_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
+				     struct ftrace_ops *op,
+				     struct ftrace_regs *fregs);
 static ftrace_func_t select_trace_function(u32 flags_val);
 static struct tracer_flags func_flags;
 
 /* Our option */
 enum {
-	TRACE_FUNC_NO_OPTS	= 0x0, /* No flags set. */
-	TRACE_FUNC_OPT_STACK	= 0x1,
+
+	TRACE_FUNC_NO_OPTS		= 0x0, /* No flags set. */
+	TRACE_FUNC_OPT_STACK		= 0x1,
+	TRACE_FUNC_OPT_NO_REPEATS	= 0x2,
+
+	/* Update this to next highest bit. */
+	TRACE_FUNC_OPT_HIGHEST_BIT	= 0x4
 };
 
-#define TRACE_FUNC_OPT_MASK	(TRACE_FUNC_OPT_STACK)
+#define TRACE_FUNC_OPT_MASK	(TRACE_FUNC_OPT_HIGHEST_BIT - 1)
 
 int ftrace_allocate_ftrace_ops(struct trace_array *tr)
 {
@@ -90,6 +102,17 @@ void ftrace_destroy_function_files(struct trace_array *tr)
 	ftrace_free_ftrace_ops(tr);
 }
 
+static bool handle_func_repeats(struct trace_array *tr, u32 flags_val)
+{
+	if (!tr->last_func_repeats &&
+	    (flags_val & TRACE_FUNC_OPT_NO_REPEATS)) {
+		if (!tracer_alloc_func_repeats(tr))
+			return false;
+	}
+
+	return true;
+}
+
 static int function_trace_init(struct trace_array *tr)
 {
 	ftrace_func_t func;
@@ -105,6 +128,9 @@ static int function_trace_init(struct trace_array *tr)
 	if (!func)
 		return -EINVAL;
 
+	if (!handle_func_repeats(tr, func_flags.val))
+		return -ENOMEM;
+
 	ftrace_init_array_ops(tr, func);
 
 	tr->array_buffer.cpu = raw_smp_processor_id();
@@ -206,6 +232,123 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
 	local_irq_restore(flags);
 }
 
+static inline bool is_repeat(struct trace_func_repeats *last_info,
+			     unsigned long ip, unsigned long parent_ip)
+{
+	if (last_info->ip == ip &&
+	    last_info->parent_ip == parent_ip) {
+		last_info->count++;
+		return true;
+	}
+
+	return false;
+}
+
+static inline void process_repeats(struct trace_array *tr,
+				   unsigned long ip, unsigned long parent_ip,
+				   struct trace_func_repeats *last_info,
+				   unsigned int trace_ctx)
+{
+	if (last_info->count) {
+		trace_last_func_repeats(tr, last_info, trace_ctx);
+		last_info->count = 0;
+	}
+
+	last_info->ip = ip;
+	last_info->parent_ip = parent_ip;
+}
+
+static void
+function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
+			       struct ftrace_ops *op,
+			       struct ftrace_regs *fregs)
+{
+	struct trace_func_repeats *last_info;
+	struct trace_array *tr = op->private;
+	struct trace_array_cpu *data;
+	unsigned int trace_ctx;
+	unsigned long flags;
+	int bit;
+	int cpu;
+
+	if (unlikely(!tr->function_enabled))
+		return;
+
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
+	if (bit < 0)
+		return;
+
+	preempt_disable_notrace();
+
+	cpu = smp_processor_id();
+	data = per_cpu_ptr(tr->array_buffer.data, cpu);
+	if (atomic_read(&data->disabled))
+		goto out;
+
+	/*
+	 * An interrupt may happen at any place here. But as far as I can see,
+	 * the only damage that this can cause is to mess up the repetition
+	 * counter without valuable data being lost.
+	 * TODO: think about a solution that is better than just hoping to be
+	 * lucky.
+	 */
+	last_info = per_cpu_ptr(tr->last_func_repeats, cpu);
+	if (is_repeat(last_info, ip, parent_ip))
+		goto out;
+
+	local_save_flags(flags);
+	trace_ctx = tracing_gen_ctx_flags(flags);
+	process_repeats(tr, ip, parent_ip, last_info, trace_ctx);
+
+	trace_function(tr, ip, parent_ip, trace_ctx);
+
+out:
+	ftrace_test_recursion_unlock(bit);
+	preempt_enable_notrace();
+}
+
+static void
+function_stack_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
+				     struct ftrace_ops *op,
+				     struct ftrace_regs *fregs)
+{
+	struct trace_func_repeats *last_info;
+	struct trace_array *tr = op->private;
+	struct trace_array_cpu *data;
+	unsigned long flags;
+	long disabled;
+	int cpu;
+	unsigned int trace_ctx;
+
+	if (unlikely(!tr->function_enabled))
+		return;
+
+	/*
+	 * Need to use raw, since this must be called before the
+	 * recursive protection is performed.
+	 */
+	local_irq_save(flags);
+	cpu = raw_smp_processor_id();
+	data = per_cpu_ptr(tr->array_buffer.data, cpu);
+	disabled = atomic_inc_return(&data->disabled);
+
+	if (likely(disabled == 1)) {
+		last_info = per_cpu_ptr(tr->last_func_repeats, cpu);
+		if (is_repeat(last_info, ip, parent_ip))
+			goto out;
+
+		trace_ctx = tracing_gen_ctx_flags(flags);
+		process_repeats(tr, ip, parent_ip, last_info, trace_ctx);
+
+		trace_function(tr, ip, parent_ip, trace_ctx);
+		__trace_stack(tr, trace_ctx, STACK_SKIP);
+	}
+
+ out:
+	atomic_dec(&data->disabled);
+	local_irq_restore(flags);
+}
+
 static ftrace_func_t select_trace_function(u32 flags_val)
 {
 	switch (flags_val & TRACE_FUNC_OPT_MASK) {
@@ -213,6 +356,10 @@ static ftrace_func_t select_trace_function(u32 flags_val)
 		return function_trace_call;
 	case TRACE_FUNC_OPT_STACK:
 		return function_stack_trace_call;
+	case TRACE_FUNC_OPT_NO_REPEATS:
+		return function_no_repeats_trace_call;
+	case TRACE_FUNC_OPT_STACK | TRACE_FUNC_OPT_NO_REPEATS:
+		return function_stack_no_repeats_trace_call;
 	default:
 		return NULL;
 	}
@@ -222,6 +369,7 @@ static struct tracer_opt func_opts[] = {
 #ifdef CONFIG_STACKTRACE
 	{ TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
 #endif
+	{ TRACER_OPT(func-no-repeats, TRACE_FUNC_OPT_NO_REPEATS) },
 	{ } /* Always set a last empty entry */
 };
 
@@ -270,6 +418,9 @@ func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 	if (tr->ops->func == func)
 		return 0;
 
+	if (!handle_func_repeats(tr, new_flags_val))
+		return -ENOMEM;
+
 	unregister_ftrace_function(tr->ops);
 	tr->ops->func = func;
 	register_ftrace_function(tr->ops);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 1/5] tracing: Define new ftrace event "func_repeats"
  2021-03-04  9:01 ` [RFC PATCH 1/5] tracing: Define new ftrace event "func_repeats" Yordan Karadzhov (VMware)
@ 2021-03-04 16:38   ` Steven Rostedt
  2021-03-08 14:57     ` Yordan Karadzhov (VMware)
  0 siblings, 1 reply; 9+ messages in thread
From: Steven Rostedt @ 2021-03-04 16:38 UTC (permalink / raw)
  To: Yordan Karadzhov (VMware); +Cc: linux-kernel, tglx, peterz

On Thu,  4 Mar 2021 11:01:37 +0200
"Yordan Karadzhov (VMware)" <y.karadz@gmail.com> wrote:

Thanks Yordan for doing this!

I have some comments below.

> diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
> index 4547ac59da61..8007f9b6417f 100644
> --- a/kernel/trace/trace_entries.h
> +++ b/kernel/trace/trace_entries.h
> @@ -338,3 +338,19 @@ FTRACE_ENTRY(hwlat, hwlat_entry,
>  		 __entry->nmi_total_ts,
>  		 __entry->nmi_count)
>  );
> +
> +FTRACE_ENTRY(func_repeats, func_repeats_entry,
> +
> +	TRACE_FUNC_REPEATS,
> +
> +	F_STRUCT(
> +		__field(	unsigned long,	ip	)
> +		__field(	unsigned long,	pip	)
> +		__field(	unsigned long,	count	)
> +	),
> +
> +	F_printk(" %ps <-%ps\t(repeats:%lu)",
> +		 (void *)__entry->ip,
> +		 (void *)__entry->pip,
> +		 __entry->count)

After playing with this a little, I realized that we should also store the
last timestamp as well. I think that would be interesting information.

           <...>-37      [004] ...1  2022.303820: gc_worker <-process_one_work
           <...>-37      [004] ...1  2022.303820: ___might_sleep <-gc_worker
           <...>-37      [004] ...1  2022.303831: ___might_sleep <-gc_worker (repeats: 127)
           <...>-37      [004] ...1  2022.303831: queue_delayed_work_on <-process_one_work

The above shows that __might_sleep() was called 128 times, but what I don't
get from the above, is when that last call was made. You'll see that the
timestamp for the repeat output is the same as the next function shown
(queue_delayed_work_on()). But the timestamp for the last call to
__might_sleep() is lost, and the repeat event ends up being written when
it is detected that there are no more repeats.

If we had:

           <...>-37      [004] ...1  2022.303820: gc_worker <-process_one_work
           <...>-37      [004] ...1  2022.303820: ___might_sleep <-gc_worker
           <...>-37      [004] ...1  2022.303831: ___might_sleep <-gc_worker (last ts: 2022.303828 repeats: 127)
           <...>-37      [004] ...1  2022.303831: queue_delayed_work_on <-process_one_work

We would know the last time __might_sleep was called.

That is, not only should we save the ip and pip in the trace_func_repeats
structure, but we should also be storing the last time stamp of the last
function event that repeated. Otherwise the above looks like the last
__might_sleep called above happened when the queue_delayed_work_on
happened, where that may not be the case.

-- Steve



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 1/5] tracing: Define new ftrace event "func_repeats"
  2021-03-04 16:38   ` Steven Rostedt
@ 2021-03-08 14:57     ` Yordan Karadzhov (VMware)
  2021-03-08 18:15       ` Steven Rostedt
  0 siblings, 1 reply; 9+ messages in thread
From: Yordan Karadzhov (VMware) @ 2021-03-08 14:57 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, tglx, peterz


On 4.03.21 г. 18:38, Steven Rostedt wrote:
> On Thu,  4 Mar 2021 11:01:37 +0200
> "Yordan Karadzhov (VMware)" <y.karadz@gmail.com> wrote:
> 
> Thanks Yordan for doing this!
> 
> I have some comments below.
> 

Hi Steven,

Thank you very much for looking into this!

Your suggestion makes perfect sense. I only have one clarifying question 
below.

>> diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
>> index 4547ac59da61..8007f9b6417f 100644
>> --- a/kernel/trace/trace_entries.h
>> +++ b/kernel/trace/trace_entries.h
>> @@ -338,3 +338,19 @@ FTRACE_ENTRY(hwlat, hwlat_entry,
>>   		 __entry->nmi_total_ts,
>>   		 __entry->nmi_count)
>>   );
>> +
>> +FTRACE_ENTRY(func_repeats, func_repeats_entry,
>> +
>> +	TRACE_FUNC_REPEATS,
>> +
>> +	F_STRUCT(
>> +		__field(	unsigned long,	ip	)
>> +		__field(	unsigned long,	pip	)
>> +		__field(	unsigned long,	count	)
>> +	),
>> +
>> +	F_printk(" %ps <-%ps\t(repeats:%lu)",
>> +		 (void *)__entry->ip,
>> +		 (void *)__entry->pip,
>> +		 __entry->count)
> 
> After playing with this a little, I realized that we should also store the
> last timestamp as well. I think that would be interesting information.
> 
>             <...>-37      [004] ...1  2022.303820: gc_worker <-process_one_work
>             <...>-37      [004] ...1  2022.303820: ___might_sleep <-gc_worker
>             <...>-37      [004] ...1  2022.303831: ___might_sleep <-gc_worker (repeats: 127)
>             <...>-37      [004] ...1  2022.303831: queue_delayed_work_on <-process_one_work
> 
> The above shows that __might_sleep() was called 128 times, but what I don't
> get from the above, is when that last call was made. You'll see that the
> timestamp for the repeat output is the same as the next function shown
> (queue_delayed_work_on()). But the timestamp for the last call to
> __might_sleep() is lost, and the repeat event ends up being written when
> it is detected that there are no more repeats.
> 
> If we had:
> 
>             <...>-37      [004] ...1  2022.303820: gc_worker <-process_one_work
>             <...>-37      [004] ...1  2022.303820: ___might_sleep <-gc_worker
>             <...>-37      [004] ...1  2022.303831: ___might_sleep <-gc_worker (last ts: 2022.303828 repeats: 127)
>             <...>-37      [004] ...1  2022.303831: queue_delayed_work_on <-process_one_work
> 
> We would know the last time __might_sleep was called.
> 
> That is, not only should we save the ip and pip in the trace_func_repeats
> structure, but we should also be storing the last time stamp of the last
> function event that repeated. Otherwise the above looks like the last
> __might_sleep called above happened when the queue_delayed_work_on
> happened, where that may not be the case.

If we store the last timestamp, this means we will need to use 
additional 64b on the buffer, every time we record the "func_repeats" 
event. This looks like an overkill to me.
Can we store only the duration of the repeats (the difference between 
the timestamp)? This way we can use less memory at the price of having 
one extra arithmetic operation.
Alternative approach can be to store only the least-significant bits of 
the timestamp.

What do you think?

Best regards,
Yordan

> 
> -- Steve
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 1/5] tracing: Define new ftrace event "func_repeats"
  2021-03-08 14:57     ` Yordan Karadzhov (VMware)
@ 2021-03-08 18:15       ` Steven Rostedt
  0 siblings, 0 replies; 9+ messages in thread
From: Steven Rostedt @ 2021-03-08 18:15 UTC (permalink / raw)
  To: Yordan Karadzhov (VMware); +Cc: linux-kernel, tglx, peterz

On Mon, 8 Mar 2021 16:57:29 +0200
"Yordan Karadzhov (VMware)" <y.karadz@gmail.com> wrote:

> > If we had:
> > 
> >             <...>-37      [004] ...1  2022.303820: gc_worker <-process_one_work
> >             <...>-37      [004] ...1  2022.303820: ___might_sleep <-gc_worker
> >             <...>-37      [004] ...1  2022.303831: ___might_sleep <-gc_worker (last ts: 2022.303828 repeats: 127)
> >             <...>-37      [004] ...1  2022.303831: queue_delayed_work_on <-process_one_work
> > 
> > We would know the last time __might_sleep was called.
> > 
> > That is, not only should we save the ip and pip in the trace_func_repeats
> > structure, but we should also be storing the last time stamp of the last
> > function event that repeated. Otherwise the above looks like the last
> > __might_sleep called above happened when the queue_delayed_work_on
> > happened, where that may not be the case.  
> 
> If we store the last timestamp, this means we will need to use 
> additional 64b on the buffer, every time we record the "func_repeats" 
> event. This looks like an overkill to me.
> Can we store only the duration of the repeats (the difference between 
> the timestamp)? This way we can use less memory at the price of having 
> one extra arithmetic operation.
> Alternative approach can be to store only the least-significant bits of 
> the timestamp.
> 
> What do you think?

I like the way you're thinking ;-)

Let's take a look at the current event sizes.

The function event is defined as:

name: function
ID: 1
format:
	field:unsigned short common_type;	offset:0;	size:2;	signed:0;
	field:unsigned char common_flags;	offset:2;	size:1;	signed:0;
	field:unsigned char common_preempt_count;	offset:3;	size:1;	signed:0;
	field:int common_pid;	offset:4;	size:4;	signed:1;

	field:unsigned long ip;	offset:8;	size:8;	signed:0;
	field:unsigned long parent_ip;	offset:16;	size:8;	signed:0;

Showing that it's total size is 24 bytes (offset:16 + size:8)

and your current repeat event has:

ID: 17
format:
	field:unsigned short common_type;	offset:0;	size:2;	signed:0;
	field:unsigned char common_flags;	offset:2;	size:1;	signed:0;
	field:unsigned char common_preempt_count;	offset:3;	size:1;	signed:0;
	field:int common_pid;	offset:4;	size:4;	signed:1;

	field:unsigned long ip;	offset:8;	size:8;	signed:0;
	field:unsigned long pip;	offset:16;	size:8;	signed:0;
	field:unsigned long count;	offset:24;	size:8;	signed:0;

Which is 32 bytes. Adding another 8 would make it 40. It's bigger than one
function event, but still smaller than two, and adding just 4 bytes is
still some size more than a single function event.

Since this event is new, we could even do another trick (one that the
preemptirq events do:

ID: 434
format:
	field:unsigned short common_type;	offset:0;	size:2;	signed:0;
	field:unsigned char common_flags;	offset:2;	size:1;	signed:0;
	field:unsigned char common_preempt_count;	offset:3;	size:1;	signed:0;
	field:int common_pid;	offset:4;	size:4;	signed:1;

	field:s32 caller_offs;	offset:8;	size:4;	signed:1;
	field:s32 parent_offs;	offset:12;	size:4;	signed:1;

print fmt: "caller=%pS parent=%pS", (void *)((unsigned long)(_stext) + REC->caller_offs), (void *)((unsigned long)(_stext) + REC->parent_offs)

The (_stext) is defined in /proc/kallsyms:

# grep stext /proc/kallsyms 
ffffffff8d000000 T _stext

And the offsets are off of that, which we could do the same thing here.

	field:s32 ip;	offset:8;	size:4;	signed:1;
	field:s32 pip;	offset:12;	size:4;	signed:1;
	field:s16 count;	offset:20;	size:2;	signed:1;
	field:u16 top_ts;	offset:22;	size:2; signed:0;
	field:u32 bottom_ts;	offset:24;	size:4; signed:0;

By putting count down to 2 bytes. Having more than 64K repeats is
probably not going to happen, and if it does, we could inject this more
than once ;-)

And as all events must be 4 byte aligned, we could use 6 bytes for the
offset, giving us: 2^40 bits instead of just 2^32 bits, that is, making the
time go from 4 seconds (2^32 nanoseconds) to 18 minutes (2^40 nanoseconds).

I would grab the time stamp after saving the event to make sure that it
doesn't go backwards. It may be a little racy (if an interrupt comes in
between), but it's still a "best effort" approach.

-- Steve


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-03-08 18:17 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-04  9:01 [RFC PATCH 0/5] Add "func_no_repete" tracing option Yordan Karadzhov (VMware)
2021-03-04  9:01 ` [RFC PATCH 1/5] tracing: Define new ftrace event "func_repeats" Yordan Karadzhov (VMware)
2021-03-04 16:38   ` Steven Rostedt
2021-03-08 14:57     ` Yordan Karadzhov (VMware)
2021-03-08 18:15       ` Steven Rostedt
2021-03-04  9:01 ` [RFC PATCH 2/5] tracing: Add "last_func_repeats" to struct trace_array Yordan Karadzhov (VMware)
2021-03-04  9:01 ` [RFC PATCH 3/5] tracing: Add method for recording "func_repeats" events Yordan Karadzhov (VMware)
2021-03-04  9:01 ` [RFC PATCH 4/5] tracing: Unify the logic for function tracing options Yordan Karadzhov (VMware)
2021-03-04  9:01 ` [RFC PATCH 5/5] tracing: Add "func_no_repeats" option for function tracing Yordan Karadzhov (VMware)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).