linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] ftrace: updates to tip
@ 2009-01-16  0:40 Steven Rostedt
  2009-01-16  0:40 ` [PATCH 1/2] ftrace: fix trace_output Steven Rostedt
                   ` (3 more replies)
  0 siblings, 4 replies; 29+ messages in thread
From: Steven Rostedt @ 2009-01-16  0:40 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker, Chris Mason


Ingo,

The first patch is critical, and needs to stay with trace_output.c
Not that critical since trace_output.c is not in mainline yet.

The second patch gives the ability to stack trace functions.
I've been leery about adding this and still keep it a separate
option from the "stacktrace" that already exists. This is because
when enabled with no filtering, the lag between typing and seeing
what is typed can be up to 10 seconds or more.

I made the function stack trace an option attached to the function
tracer, so it must be enabled after the function tracer has been
set. This still needs to be updated in ftrace.txt.

The following patches are in:

  git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace.git

    branch: tip/devel


Lai Jiangshan (1):
      ftrace: fix trace_output

Steven Rostedt (1):
      ftrace: add stack trace to function tracer

----
 kernel/trace/trace.c           |   26 ++++++++----
 kernel/trace/trace.h           |    7 +++
 kernel/trace/trace_functions.c |   84 ++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_output.c    |   65 +++++++++++++++----------------
 kernel/trace/trace_output.h    |    4 +-
 5 files changed, 141 insertions(+), 45 deletions(-)

-- 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/2] ftrace: fix trace_output
  2009-01-16  0:40 [PATCH 0/2] ftrace: updates to tip Steven Rostedt
@ 2009-01-16  0:40 ` Steven Rostedt
  2009-01-16  1:40   ` Andrew Morton
  2009-01-16  0:40 ` [PATCH 2/2] ftrace: add stack trace to function tracer Steven Rostedt
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2009-01-16  0:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker, Chris Mason,
	Lai Jiangshan, Steven Rostedt

[-- Attachment #1: 0001-ftrace-fix-trace_output.patch --]
[-- Type: text/plain, Size: 5132 bytes --]

From: Lai Jiangshan <laijs@cn.fujitsu.com>

Impact: fix bug for handling partial line

trace_seq_printf(), seq_print_userip_objs(), ... return
0          -- partial line was written
other(>0)  -- success

duplicate output is also removed in trace_print_raw().

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace_output.c |   65 ++++++++++++++++++++----------------------
 kernel/trace/trace_output.h |    4 +-
 2 files changed, 33 insertions(+), 36 deletions(-)

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index df0c25c..4e3ad36 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -440,9 +440,9 @@ trace_fn_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
 
 	trace_assign_type(field, entry);
 
-	if (trace_seq_printf(s, "%x %x\n",
-			     field->ip,
-			     field->parent_ip))
+	if (!trace_seq_printf(s, "%x %x\n",
+			      field->ip,
+			      field->parent_ip))
 		return TRACE_TYPE_PARTIAL_LINE;
 
 	return 0;
@@ -497,14 +497,14 @@ trace_ctxwake_print(struct trace_seq *s, struct trace_entry *entry, int flags,
 	T = task_state_char(field->next_state);
 	S = task_state_char(field->prev_state);
 	comm = trace_find_cmdline(field->next_pid);
-	if (trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
-			     field->prev_pid,
-			     field->prev_prio,
-			     S, delim,
-			     field->next_cpu,
-			     field->next_pid,
-			     field->next_prio,
-			     T, comm))
+	if (!trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
+			      field->prev_pid,
+			      field->prev_prio,
+			      S, delim,
+			      field->next_cpu,
+			      field->next_pid,
+			      field->next_prio,
+			      T, comm))
 		return TRACE_TYPE_PARTIAL_LINE;
 
 	return 0;
@@ -534,14 +534,14 @@ trace_ctxwake_raw(struct trace_seq *s, struct trace_entry *entry, int flags,
 	if (!S)
 		task_state_char(field->prev_state);
 	T = task_state_char(field->next_state);
-	if (trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
-			     field->prev_pid,
-			     field->prev_prio,
-			     S,
-			     field->next_cpu,
-			     field->next_pid,
-			     field->next_prio,
-			     T))
+	if (!trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
+			      field->prev_pid,
+			      field->prev_prio,
+			      S,
+			      field->next_cpu,
+			      field->next_pid,
+			      field->next_prio,
+			      T))
 		return TRACE_TYPE_PARTIAL_LINE;
 
 	return 0;
@@ -639,10 +639,10 @@ trace_special_print(struct trace_seq *s, struct trace_entry *entry, int flags)
 
 	trace_assign_type(field, entry);
 
-	if (trace_seq_printf(s, "# %ld %ld %ld\n",
-			     field->arg1,
-			     field->arg2,
-			     field->arg3))
+	if (!trace_seq_printf(s, "# %ld %ld %ld\n",
+			      field->arg1,
+			      field->arg2,
+			      field->arg3))
 		return TRACE_TYPE_PARTIAL_LINE;
 
 	return 0;
@@ -697,13 +697,13 @@ trace_stack_print(struct trace_seq *s, struct trace_entry *entry, int flags)
 
 	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 		if (i) {
-			if (trace_seq_puts(s, " <= "))
+			if (!trace_seq_puts(s, " <= "))
 				goto partial;
 
-			if (seq_print_ip_sym(s, field->caller[i], flags))
+			if (!seq_print_ip_sym(s, field->caller[i], flags))
 				goto partial;
 		}
-		if (trace_seq_puts(s, "\n"))
+		if (!trace_seq_puts(s, "\n"))
 			goto partial;
 	}
 
@@ -731,10 +731,10 @@ trace_user_stack_print(struct trace_seq *s, struct trace_entry *entry,
 
 	trace_assign_type(field, entry);
 
-	if (seq_print_userip_objs(field, s, flags))
+	if (!seq_print_userip_objs(field, s, flags))
 		goto partial;
 
-	if (trace_seq_putc(s, '\n'))
+	if (!trace_seq_putc(s, '\n'))
 		goto partial;
 
 	return 0;
@@ -760,10 +760,10 @@ trace_print_print(struct trace_seq *s, struct trace_entry *entry, int flags)
 
 	trace_assign_type(field, entry);
 
-	if (seq_print_ip_sym(s, field->ip, flags))
+	if (!seq_print_ip_sym(s, field->ip, flags))
 		goto partial;
 
-	if (trace_seq_printf(s, ": %s", field->buf))
+	if (!trace_seq_printf(s, ": %s", field->buf))
 		goto partial;
 
 	return 0;
@@ -779,10 +779,7 @@ trace_print_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
 
 	trace_assign_type(field, entry);
 
-	if (seq_print_ip_sym(s, field->ip, flags))
-		goto partial;
-
-	if (trace_seq_printf(s, "# %lx %s", field->ip, field->buf))
+	if (!trace_seq_printf(s, "# %lx %s", field->ip, field->buf))
 		goto partial;
 
 	return 0;
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index ecab4ea..b2c1461 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -45,14 +45,14 @@ trace_nop_print(struct trace_seq *s, struct trace_entry *entry, int flags);
 #define SEQ_PUT_FIELD_RET(s, x)				\
 do {							\
 	if (!trace_seq_putmem(s, &(x), sizeof(x)))	\
-		return 0;				\
+		return TRACE_TYPE_PARTIAL_LINE;		\
 } while (0)
 
 #define SEQ_PUT_HEX_FIELD_RET(s, x)			\
 do {							\
 	BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES);	\
 	if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))	\
-		return 0;				\
+		return TRACE_TYPE_PARTIAL_LINE;		\
 } while (0)
 
 #endif
-- 
1.5.6.5

-- 

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 2/2] ftrace: add stack trace to function tracer
  2009-01-16  0:40 [PATCH 0/2] ftrace: updates to tip Steven Rostedt
  2009-01-16  0:40 ` [PATCH 1/2] ftrace: fix trace_output Steven Rostedt
@ 2009-01-16  0:40 ` Steven Rostedt
  2009-01-16  0:48   ` Steven Rostedt
                     ` (2 more replies)
  2009-01-16 11:23 ` [PATCH 0/2] ftrace: updates to tip Ingo Molnar
  2009-01-16 21:47 ` Chris Mason
  3 siblings, 3 replies; 29+ messages in thread
From: Steven Rostedt @ 2009-01-16  0:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker, Chris Mason,
	Steven Rostedt

[-- Attachment #1: 0002-ftrace-add-stack-trace-to-function-tracer.patch --]
[-- Type: text/plain, Size: 6403 bytes --]

From: Steven Rostedt <srostedt@redhat.com>

Impact: new feature to stack trace any function

Chris Mason asked about being able to pick and choose a function
and get a stack trace from it. This feature enables his request.

 # echo io_schedule > /debug/tracing/set_ftrace_filter
 # echo function > /debug/tracing/current_tracer
 # echo func_stack_trace > /debug/tracing/trace_options

Produces the following in /debug/tracing/trace:

       kjournald-702   [001]   135.673060: io_schedule <-sync_buffer
       kjournald-702   [002]   135.673671:
 <= sync_buffer
 <= __wait_on_bit
 <= out_of_line_wait_on_bit
 <= __wait_on_buffer
 <= sync_dirty_buffer
 <= journal_commit_transaction
 <= kjournald

Note, be careful about turning this on without filtering the functions.
You may find that you have a 10 second lag between typing and seeing
what you typed. This is why the stack trace for the function tracer
does not use the same stack_trace flag as the other tracers use.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.c           |   26 ++++++++----
 kernel/trace/trace.h           |    7 +++
 kernel/trace/trace_functions.c |   84 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 9 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dcb757f..3c54cb1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -835,10 +835,10 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
 		trace_function(tr, data, ip, parent_ip, flags, pc);
 }
 
-static void ftrace_trace_stack(struct trace_array *tr,
-			       struct trace_array_cpu *data,
-			       unsigned long flags,
-			       int skip, int pc)
+static void __ftrace_trace_stack(struct trace_array *tr,
+				 struct trace_array_cpu *data,
+				 unsigned long flags,
+				 int skip, int pc)
 {
 #ifdef CONFIG_STACKTRACE
 	struct ring_buffer_event *event;
@@ -846,9 +846,6 @@ static void ftrace_trace_stack(struct trace_array *tr,
 	struct stack_trace trace;
 	unsigned long irq_flags;
 
-	if (!(trace_flags & TRACE_ITER_STACKTRACE))
-		return;
-
 	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
 					 &irq_flags);
 	if (!event)
@@ -869,12 +866,23 @@ static void ftrace_trace_stack(struct trace_array *tr,
 #endif
 }
 
+static void ftrace_trace_stack(struct trace_array *tr,
+			       struct trace_array_cpu *data,
+			       unsigned long flags,
+			       int skip, int pc)
+{
+	if (!(trace_flags & TRACE_ITER_STACKTRACE))
+		return;
+
+	__ftrace_trace_stack(tr, data, flags, skip, pc);
+}
+
 void __trace_stack(struct trace_array *tr,
 		   struct trace_array_cpu *data,
 		   unsigned long flags,
-		   int skip)
+		   int skip, int pc)
 {
-	ftrace_trace_stack(tr, data, flags, skip, preempt_count());
+	__ftrace_trace_stack(tr, data, flags, skip, pc);
 }
 
 static void ftrace_trace_userstack(struct trace_array *tr,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 79c8721..bf39a36 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -457,6 +457,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
 void update_max_tr_single(struct trace_array *tr,
 			  struct task_struct *tsk, int cpu);
 
+void __trace_stack(struct trace_array *tr,
+		   struct trace_array_cpu *data,
+		   unsigned long flags,
+		   int skip, int pc);
+
 extern cycle_t ftrace_now(int cpu);
 
 #ifdef CONFIG_FUNCTION_TRACER
@@ -467,6 +472,8 @@ void tracing_stop_function_trace(void);
 # define tracing_stop_function_trace()		do { } while (0)
 #endif
 
+extern int ftrace_function_enabled;
+
 #ifdef CONFIG_CONTEXT_SWITCH_TRACER
 typedef void
 (*tracer_switch_func_t)(void *private,
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 9236d7e..3a5fa08 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -16,6 +16,8 @@
 
 #include "trace.h"
 
+static struct trace_array	*func_trace;
+
 static void start_function_trace(struct trace_array *tr)
 {
 	tr->cpu = get_cpu();
@@ -34,6 +36,7 @@ static void stop_function_trace(struct trace_array *tr)
 
 static int function_trace_init(struct trace_array *tr)
 {
+	func_trace = tr;
 	start_function_trace(tr);
 	return 0;
 }
@@ -48,12 +51,93 @@ static void function_trace_start(struct trace_array *tr)
 	tracing_reset_online_cpus(tr);
 }
 
+static void
+function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+	struct trace_array *tr = func_trace;
+	struct trace_array_cpu *data;
+	unsigned long flags;
+	long disabled;
+	int cpu;
+	int pc;
+
+	if (unlikely(!ftrace_function_enabled))
+		return;
+
+	/*
+	 * Need to use raw, since this must be called before the
+	 * recursive protection is performed.
+	 */
+	local_irq_save(flags);
+	cpu = raw_smp_processor_id();
+	data = tr->data[cpu];
+	disabled = atomic_inc_return(&data->disabled);
+
+	if (likely(disabled == 1)) {
+		pc = preempt_count();
+		/*
+		 * skip over 5 funcs:
+		 *    __ftrace_trace_stack,
+		 *    __trace_stack,
+		 *    function_stack_trace_call
+		 *    ftrace_list_func
+		 *    ftrace_call
+		 */
+		__trace_stack(tr, data, flags, 5, pc);
+	}
+
+	atomic_dec(&data->disabled);
+	local_irq_restore(flags);
+}
+
+static struct ftrace_ops trace_stack_ops __read_mostly =
+{
+	.func = function_stack_trace_call,
+};
+
+/* Our two options */
+enum {
+	TRACE_FUNC_OPT_STACK = 0x1,
+};
+
+static struct tracer_opt func_opts[] = {
+#ifdef CONFIG_STACKTRACE
+	{ TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
+#endif
+	{ } /* Always set a last empty entry */
+};
+
+static struct tracer_flags func_flags = {
+	.val = 0, /* By default: all flags disabled */
+	.opts = func_opts
+};
+
+static int func_set_flag(u32 old_flags, u32 bit, int set)
+{
+	if (bit == TRACE_FUNC_OPT_STACK) {
+		/* do nothing if already set */
+		if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
+			return 0;
+
+		if (set)
+			register_ftrace_function(&trace_stack_ops);
+		else
+			unregister_ftrace_function(&trace_stack_ops);
+
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
 static struct tracer function_trace __read_mostly =
 {
 	.name	     = "function",
 	.init	     = function_trace_init,
 	.reset	     = function_trace_reset,
 	.start	     = function_trace_start,
+	.flags		= &func_flags,
+	.set_flag	= func_set_flag,
 #ifdef CONFIG_FTRACE_SELFTEST
 	.selftest    = trace_selftest_startup_function,
 #endif
-- 
1.5.6.5

-- 

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/2] ftrace: add stack trace to function tracer
  2009-01-16  0:40 ` [PATCH 2/2] ftrace: add stack trace to function tracer Steven Rostedt
@ 2009-01-16  0:48   ` Steven Rostedt
  2009-01-16 10:08   ` Frédéric Weisbecker
  2009-01-16 11:19   ` Ingo Molnar
  2 siblings, 0 replies; 29+ messages in thread
From: Steven Rostedt @ 2009-01-16  0:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker, Chris Mason,
	Steven Rostedt


On Thu, 15 Jan 2009, Steven Rostedt wrote:
> 
> Produces the following in /debug/tracing/trace:
> 
>        kjournald-702   [001]   135.673060: io_schedule <-sync_buffer
>        kjournald-702   [002]   135.673671:
>  <= sync_buffer
>  <= __wait_on_bit
>  <= out_of_line_wait_on_bit
>  <= __wait_on_buffer
>  <= sync_dirty_buffer
>  <= journal_commit_transaction
>  <= kjournald

Note, the above is from two different CPUS. This is because I did not want 
to messy up the current function trace with more branches. I took the easy 
way out. I simply registered another tracer to the function tracer to dump 
out the trace.

I have an idea how to fix it. I'll work on a solution now.

-- Steve


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ftrace: fix trace_output
  2009-01-16  0:40 ` [PATCH 1/2] ftrace: fix trace_output Steven Rostedt
@ 2009-01-16  1:40   ` Andrew Morton
  2009-01-16  1:46     ` Steven Rostedt
  0 siblings, 1 reply; 29+ messages in thread
From: Andrew Morton @ 2009-01-16  1:40 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Ingo Molnar, Frederic Weisbecker, Chris Mason,
	Lai Jiangshan, Steven Rostedt

On Thu, 15 Jan 2009 19:40:51 -0500 Steven Rostedt <rostedt@goodmis.org> wrote:

> From: Lai Jiangshan <laijs@cn.fujitsu.com>
> 
> Impact: fix bug for handling partial line
> 
> trace_seq_printf(), seq_print_userip_objs(), ... return
> 0          -- partial line was written
> other(>0)  -- success
> 
> duplicate output is also removed in trace_print_raw().
> 
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
> ---
>  kernel/trace/trace_output.c |   65 ++++++++++++++++++++----------------------
>  kernel/trace/trace_output.h |    4 +-
>  2 files changed, 33 insertions(+), 36 deletions(-)
> 
> diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
> index df0c25c..4e3ad36 100644
> --- a/kernel/trace/trace_output.c
> +++ b/kernel/trace/trace_output.c
> @@ -440,9 +440,9 @@ trace_fn_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
>  
>  	trace_assign_type(field, entry);
>  
> -	if (trace_seq_printf(s, "%x %x\n",
> -			     field->ip,
> -			     field->parent_ip))
> +	if (!trace_seq_printf(s, "%x %x\n",

"%lx %lx", surely?

If trace_seq_printf() had the attribute(printf) thing, it would have warned.



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ftrace: fix trace_output
  2009-01-16  1:40   ` Andrew Morton
@ 2009-01-16  1:46     ` Steven Rostedt
  0 siblings, 0 replies; 29+ messages in thread
From: Steven Rostedt @ 2009-01-16  1:46 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, Ingo Molnar, Frederic Weisbecker, Chris Mason,
	Lai Jiangshan, Steven Rostedt


On Thu, 15 Jan 2009, Andrew Morton wrote:
> > 
> > diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
> > index df0c25c..4e3ad36 100644
> > --- a/kernel/trace/trace_output.c
> > +++ b/kernel/trace/trace_output.c
> > @@ -440,9 +440,9 @@ trace_fn_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
> >  
> >  	trace_assign_type(field, entry);
> >  
> > -	if (trace_seq_printf(s, "%x %x\n",
> > -			     field->ip,
> > -			     field->parent_ip))
> > +	if (!trace_seq_printf(s, "%x %x\n",
> 
> "%lx %lx", surely?
> 
> If trace_seq_printf() had the attribute(printf) thing, it would have warned.

Who needs automation when we have an akpm ;-)

Will update.

Thanks,

-- Steve


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/2] ftrace: add stack trace to function tracer
  2009-01-16  0:40 ` [PATCH 2/2] ftrace: add stack trace to function tracer Steven Rostedt
  2009-01-16  0:48   ` Steven Rostedt
@ 2009-01-16 10:08   ` Frédéric Weisbecker
  2009-01-16 14:28     ` Steven Rostedt
  2009-01-16 11:19   ` Ingo Molnar
  2 siblings, 1 reply; 29+ messages in thread
From: Frédéric Weisbecker @ 2009-01-16 10:08 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Ingo Molnar, Andrew Morton, Chris Mason, Steven Rostedt

Hi Steven,

2009/1/16 Steven Rostedt <rostedt@goodmis.org>:
> +static struct tracer_flags func_flags = {
> +       .val = 0, /* By default: all flags disabled */
> +       .opts = func_opts
> +};
> +
> +static int func_set_flag(u32 old_flags, u32 bit, int set)
> +{
> +       if (bit == TRACE_FUNC_OPT_STACK) {
> +               /* do nothing if already set */
> +               if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
> +                       return 0;


This is just a small thing but set will always be 1 or 0. And if you want
the current val of your options, you can take old_flags :-)

Would you prefer I send a patch to not call the set_flag callback
when the option set/unset by the user is already set/unset ?


> +               if (set)
> +                       register_ftrace_function(&trace_stack_ops);
> +               else
> +                       unregister_ftrace_function(&trace_stack_ops);
> +
> +               return 0;
> +       }
> +
> +       return -EINVAL;
> +}
> +
>  static struct tracer function_trace __read_mostly =
>  {
>        .name        = "function",
>        .init        = function_trace_init,
>        .reset       = function_trace_reset,
>        .start       = function_trace_start,
> +       .flags          = &func_flags,
> +       .set_flag       = func_set_flag,
>  #ifdef CONFIG_FTRACE_SELFTEST
>        .selftest    = trace_selftest_startup_function,
>  #endif
> --
> 1.5.6.5
>
> --
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/2] ftrace: add stack trace to function tracer
  2009-01-16  0:40 ` [PATCH 2/2] ftrace: add stack trace to function tracer Steven Rostedt
  2009-01-16  0:48   ` Steven Rostedt
  2009-01-16 10:08   ` Frédéric Weisbecker
@ 2009-01-16 11:19   ` Ingo Molnar
  2 siblings, 0 replies; 29+ messages in thread
From: Ingo Molnar @ 2009-01-16 11:19 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, Chris Mason,
	Steven Rostedt


* Steven Rostedt <rostedt@goodmis.org> wrote:

> From: Steven Rostedt <srostedt@redhat.com>
> 
> Impact: new feature to stack trace any function
> 
> Chris Mason asked about being able to pick and choose a function
> and get a stack trace from it. This feature enables his request.
> 
>  # echo io_schedule > /debug/tracing/set_ftrace_filter
>  # echo function > /debug/tracing/current_tracer
>  # echo func_stack_trace > /debug/tracing/trace_options
> 
> Produces the following in /debug/tracing/trace:
> 
>        kjournald-702   [001]   135.673060: io_schedule <-sync_buffer
>        kjournald-702   [002]   135.673671:
>  <= sync_buffer
>  <= __wait_on_bit
>  <= out_of_line_wait_on_bit
>  <= __wait_on_buffer
>  <= sync_dirty_buffer
>  <= journal_commit_transaction
>  <= kjournald

neat!

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16  0:40 [PATCH 0/2] ftrace: updates to tip Steven Rostedt
  2009-01-16  0:40 ` [PATCH 1/2] ftrace: fix trace_output Steven Rostedt
  2009-01-16  0:40 ` [PATCH 2/2] ftrace: add stack trace to function tracer Steven Rostedt
@ 2009-01-16 11:23 ` Ingo Molnar
  2009-01-16 13:08   ` Frédéric Weisbecker
  2009-01-16 14:32   ` Steven Rostedt
  2009-01-16 21:47 ` Chris Mason
  3 siblings, 2 replies; 29+ messages in thread
From: Ingo Molnar @ 2009-01-16 11:23 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, Chris Mason


* Steven Rostedt <rostedt@goodmis.org> wrote:

> Ingo,
> 
> The first patch is critical, and needs to stay with trace_output.c Not 
> that critical since trace_output.c is not in mainline yet.
> 
> The second patch gives the ability to stack trace functions. I've been 
> leery about adding this and still keep it a separate option from the 
> "stacktrace" that already exists. This is because when enabled with no 
> filtering, the lag between typing and seeing what is typed can be up to 
> 10 seconds or more.

Btw., is this true even if frame pointers are enabled? When frame pointers 
are disabled we scan the full kernel stack => that can be quite slow if 
every kernel function is traced ;-)

> I made the function stack trace an option attached to the function 
> tracer, so it must be enabled after the function tracer has been set. 
> This still needs to be updated in ftrace.txt.

maybe we could drive this via the filter API? Something like:

   echo "*btrfs*:stacktrace" >> set_filter_functions

Would automatically mean that those functions will all generate 
stacktraces too. Note how safe this API is by default: the filter is used 
for a narrow scope of functions anwyay. To get it for all kernel functions 
one would have to do:

   echo "*:stacktrace" >> set_filter_functions

Which one cannot do accidentally.

What do you think?

> The following patches are in:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace.git
> 
>     branch: tip/devel
> 
> 
> Lai Jiangshan (1):
>       ftrace: fix trace_output
> 
> Steven Rostedt (1):
>       ftrace: add stack trace to function tracer
> 
> ----
>  kernel/trace/trace.c           |   26 ++++++++----
>  kernel/trace/trace.h           |    7 +++
>  kernel/trace/trace_functions.c |   84 ++++++++++++++++++++++++++++++++++++++++
>  kernel/trace/trace_output.c    |   65 +++++++++++++++----------------
>  kernel/trace/trace_output.h    |    4 +-
>  5 files changed, 141 insertions(+), 45 deletions(-)

Pulled into tip/tracing/ftrace, thanks Steve!

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16 11:23 ` [PATCH 0/2] ftrace: updates to tip Ingo Molnar
@ 2009-01-16 13:08   ` Frédéric Weisbecker
  2009-01-16 13:10     ` Ingo Molnar
  2009-01-16 14:32   ` Steven Rostedt
  1 sibling, 1 reply; 29+ messages in thread
From: Frédéric Weisbecker @ 2009-01-16 13:08 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, linux-kernel, Andrew Morton, Chris Mason

2009/1/16 Ingo Molnar <mingo@elte.hu>:
>
> * Steven Rostedt <rostedt@goodmis.org> wrote:
>
>> Ingo,
>>
>> The first patch is critical, and needs to stay with trace_output.c Not
>> that critical since trace_output.c is not in mainline yet.
>>
>> The second patch gives the ability to stack trace functions. I've been
>> leery about adding this and still keep it a separate option from the
>> "stacktrace" that already exists. This is because when enabled with no
>> filtering, the lag between typing and seeing what is typed can be up to
>> 10 seconds or more.
>
> Btw., is this true even if frame pointers are enabled? When frame pointers
> are disabled we scan the full kernel stack => that can be quite slow if
> every kernel function is traced ;-)


Hi Ingo,

The function tracer selects Frame pointers :-)

Frederic.


>> I made the function stack trace an option attached to the function
>> tracer, so it must be enabled after the function tracer has been set.
>> This still needs to be updated in ftrace.txt.
>
> maybe we could drive this via the filter API? Something like:
>
>   echo "*btrfs*:stacktrace" >> set_filter_functions
>
> Would automatically mean that those functions will all generate
> stacktraces too. Note how safe this API is by default: the filter is used
> for a narrow scope of functions anwyay. To get it for all kernel functions
> one would have to do:
>
>   echo "*:stacktrace" >> set_filter_functions
>
> Which one cannot do accidentally.
>
> What do you think?
>
>> The following patches are in:
>>
>>   git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace.git
>>
>>     branch: tip/devel
>>
>>
>> Lai Jiangshan (1):
>>       ftrace: fix trace_output
>>
>> Steven Rostedt (1):
>>       ftrace: add stack trace to function tracer
>>
>> ----
>>  kernel/trace/trace.c           |   26 ++++++++----
>>  kernel/trace/trace.h           |    7 +++
>>  kernel/trace/trace_functions.c |   84 ++++++++++++++++++++++++++++++++++++++++
>>  kernel/trace/trace_output.c    |   65 +++++++++++++++----------------
>>  kernel/trace/trace_output.h    |    4 +-
>>  5 files changed, 141 insertions(+), 45 deletions(-)
>
> Pulled into tip/tracing/ftrace, thanks Steve!
>
>        Ingo
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16 13:08   ` Frédéric Weisbecker
@ 2009-01-16 13:10     ` Ingo Molnar
  0 siblings, 0 replies; 29+ messages in thread
From: Ingo Molnar @ 2009-01-16 13:10 UTC (permalink / raw)
  To: Frédéric Weisbecker
  Cc: Steven Rostedt, linux-kernel, Andrew Morton, Chris Mason


* Frédéric Weisbecker <fweisbec@gmail.com> wrote:

> 2009/1/16 Ingo Molnar <mingo@elte.hu>:
> >
> > * Steven Rostedt <rostedt@goodmis.org> wrote:
> >
> >> Ingo,
> >>
> >> The first patch is critical, and needs to stay with trace_output.c Not
> >> that critical since trace_output.c is not in mainline yet.
> >>
> >> The second patch gives the ability to stack trace functions. I've been
> >> leery about adding this and still keep it a separate option from the
> >> "stacktrace" that already exists. This is because when enabled with no
> >> filtering, the lag between typing and seeing what is typed can be up to
> >> 10 seconds or more.
> >
> > Btw., is this true even if frame pointers are enabled? When frame pointers
> > are disabled we scan the full kernel stack => that can be quite slow if
> > every kernel function is traced ;-)
> 
> 
> Hi Ingo,
> 
> The function tracer selects Frame pointers :-)

doh, i even added that to begin with ;-)

Life is short but a coder's memory is even shorter.

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/2] ftrace: add stack trace to function tracer
  2009-01-16 10:08   ` Frédéric Weisbecker
@ 2009-01-16 14:28     ` Steven Rostedt
  0 siblings, 0 replies; 29+ messages in thread
From: Steven Rostedt @ 2009-01-16 14:28 UTC (permalink / raw)
  To: Frédéric Weisbecker
  Cc: linux-kernel, Ingo Molnar, Andrew Morton, Chris Mason, Steven Rostedt



On Fri, 16 Jan 2009, Fr?d?ric Weisbecker wrote:

> Hi Steven,
> 
> 2009/1/16 Steven Rostedt <rostedt@goodmis.org>:
> > +static struct tracer_flags func_flags = {
> > +       .val = 0, /* By default: all flags disabled */
> > +       .opts = func_opts
> > +};
> > +
> > +static int func_set_flag(u32 old_flags, u32 bit, int set)
> > +{
> > +       if (bit == TRACE_FUNC_OPT_STACK) {
> > +               /* do nothing if already set */
> > +               if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
> > +                       return 0;
> 
> 
> This is just a small thing but set will always be 1 or 0. And if you want
> the current val of your options, you can take old_flags :-)

I never assume something is set to 0 or 1 ;-)

But I did overlook the "old_flags". Yeah that would be better to use.

> 
> Would you prefer I send a patch to not call the set_flag callback
> when the option set/unset by the user is already set/unset ?

Hmm, good question. That's the way I do it, but would that make sense fo
others?  Probably would be a good idea. We only want to know when the
flag changes.

-- Steve


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16 11:23 ` [PATCH 0/2] ftrace: updates to tip Ingo Molnar
  2009-01-16 13:08   ` Frédéric Weisbecker
@ 2009-01-16 14:32   ` Steven Rostedt
  2009-01-16 15:21     ` Ingo Molnar
  1 sibling, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2009-01-16 14:32 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, Chris Mason


On Fri, 16 Jan 2009, Ingo Molnar wrote:

> 
> * Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > Ingo,
> > 
> > The first patch is critical, and needs to stay with trace_output.c Not 
> > that critical since trace_output.c is not in mainline yet.
> > 
> > The second patch gives the ability to stack trace functions. I've been 
> > leery about adding this and still keep it a separate option from the 
> > "stacktrace" that already exists. This is because when enabled with no 
> > filtering, the lag between typing and seeing what is typed can be up to 
> > 10 seconds or more.
> 
> Btw., is this true even if frame pointers are enabled? When frame pointers 
> are disabled we scan the full kernel stack => that can be quite slow if 
> every kernel function is traced ;-)

After making my latest changes, I see a half sec lag. I don't know
why it was so slow before. But I do have all the debug features enabled
which would also slow things down quite a bit (lockdep, et. al)

> 
> > I made the function stack trace an option attached to the function 
> > tracer, so it must be enabled after the function tracer has been set. 
> > This still needs to be updated in ftrace.txt.
> 
> maybe we could drive this via the filter API? Something like:
> 
>    echo "*btrfs*:stacktrace" >> set_filter_functions
> 
> Would automatically mean that those functions will all generate 
> stacktraces too. Note how safe this API is by default: the filter is used 
> for a narrow scope of functions anwyay. To get it for all kernel functions 
> one would have to do:
> 
>    echo "*:stacktrace" >> set_filter_functions
> 
> Which one cannot do accidentally.
> 
> What do you think?

Now that I see only a 1/2 sec lag, do you still think this is necessary?

Maybe I should go back and see why it was so bad before?

But I do notice that not all functions produce a valid stack trace. Maybe 
it would be better to add that api :-?

-- Steve


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16 14:32   ` Steven Rostedt
@ 2009-01-16 15:21     ` Ingo Molnar
  2009-01-16 15:53       ` Steven Rostedt
  0 siblings, 1 reply; 29+ messages in thread
From: Ingo Molnar @ 2009-01-16 15:21 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, Chris Mason


* Steven Rostedt <rostedt@goodmis.org> wrote:

> > maybe we could drive this via the filter API? Something like:
> > 
> >    echo "*btrfs*:stacktrace" >> set_filter_functions
> > 
> > Would automatically mean that those functions will all generate 
> > stacktraces too. Note how safe this API is by default: the filter is used 
> > for a narrow scope of functions anwyay. To get it for all kernel functions 
> > one would have to do:
> > 
> >    echo "*:stacktrace" >> set_filter_functions
> > 
> > Which one cannot do accidentally.
> > 
> > What do you think?
> 
> Now that I see only a 1/2 sec lag, do you still think this is necessary?
> 
> Maybe I should go back and see why it was so bad before?
> 
> But I do notice that not all functions produce a valid stack trace. 
> Maybe it would be better to add that api :-?

yes - i think that API would be more intuitive, and that way people could 
mix more interesting functions (with stack traces) with less important 
functions (no stack traces).

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16 15:21     ` Ingo Molnar
@ 2009-01-16 15:53       ` Steven Rostedt
  2009-01-16 16:02         ` Ingo Molnar
  0 siblings, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2009-01-16 15:53 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, Chris Mason


On Fri, 16 Jan 2009, Ingo Molnar wrote:
> > 
> > But I do notice that not all functions produce a valid stack trace. 
> > Maybe it would be better to add that api :-?
> 
> yes - i think that API would be more intuitive, and that way people could 
> mix more interesting functions (with stack traces) with less important 
> functions (no stack traces).

Ah, that's the point. We can't mix and match on these. Either all 
functions that are traced do the stack trace, or none do. This is where 
the new api may be confusing.  We can register a function to be traced via 
the function pointer, and we can pick which functions to trace, but we can 
not separate out different functions for different traces.

-- Steve


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16 15:53       ` Steven Rostedt
@ 2009-01-16 16:02         ` Ingo Molnar
  2009-01-16 16:03           ` Ingo Molnar
  2009-01-16 16:22           ` Steven Rostedt
  0 siblings, 2 replies; 29+ messages in thread
From: Ingo Molnar @ 2009-01-16 16:02 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, Chris Mason


* Steven Rostedt <rostedt@goodmis.org> wrote:

> 
> On Fri, 16 Jan 2009, Ingo Molnar wrote:
> > > 
> > > But I do notice that not all functions produce a valid stack trace. 
> > > Maybe it would be better to add that api :-?
> > 
> > yes - i think that API would be more intuitive, and that way people could 
> > mix more interesting functions (with stack traces) with less important 
> > functions (no stack traces).
> 
> Ah, that's the point. We can't mix and match on these. Either all 
> functions that are traced do the stack trace, or none do. This is where 
> the new api may be confusing. [...]

that would be confusing indeed.

> [...] We can register a function to be traced via the function pointer, 
> and we can pick which functions to trace, but we can not separate out 
> different functions for different traces.

Why not? We could have a hash of all traced functions with metadata 
attached. Since patching functions in/out would be a relatively rare 
operation, this would be a highly efficient read-mostly hash.

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16 16:02         ` Ingo Molnar
@ 2009-01-16 16:03           ` Ingo Molnar
  2009-01-16 16:22           ` Steven Rostedt
  1 sibling, 0 replies; 29+ messages in thread
From: Ingo Molnar @ 2009-01-16 16:03 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, Chris Mason


* Ingo Molnar <mingo@elte.hu> wrote:

> > [...] We can register a function to be traced via the function 
> > pointer, and we can pick which functions to trace, but we can not 
> > separate out different functions for different traces.
> 
> Why not? We could have a hash of all traced functions with metadata 
> attached. Since patching functions in/out would be a relatively rare 
> operation, this would be a highly efficient read-mostly hash.

We could build that hash when registering the tracer. Via recordmcount we 
have advance knowledge of all the functions that may be traced.

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16 16:02         ` Ingo Molnar
  2009-01-16 16:03           ` Ingo Molnar
@ 2009-01-16 16:22           ` Steven Rostedt
  2009-01-16 16:30             ` Ingo Molnar
  1 sibling, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2009-01-16 16:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, Chris Mason


On Fri, 16 Jan 2009, Ingo Molnar wrote:
> * Steven Rostedt <rostedt@goodmis.org> wrote:

> > [...] We can register a function to be traced via the function pointer, 
> > and we can pick which functions to trace, but we can not separate out 
> > different functions for different traces.
> 
> Why not? We could have a hash of all traced functions with metadata 
> attached. Since patching functions in/out would be a relatively rare 
> operation, this would be a highly efficient read-mostly hash.

I was afraid you would say that :-)

Yes, we could add something to do this. It would take some thought on a 
good api, and implementation. This is not an easy task by any means. We 
need a way to map between a function call and a tracer, where a function 
call can call more than one tracer.

Probably be best for the tracer to have its own hash. This way the tracer 
could do a look up to determine if it should record or not. When filtering 
on a function, it is all functions that get called that are registered. 
For instance, the stack_tracer hooks to all functions to test the size of 
the stack. When the function tracer is hooked in, it does not notice that 
a stack tracer is there.

But we could design it that with the "function[:tracer]" api to 
set_ftrace_filter. Where if you specify a ":tracer" it will only effect 
those functions. In fact, we could have those functions be the only one 
that call multiple tracers. Right now, if only one function is added, the 
dynamic tracer will have the mcount call that function directly. If you 
add a second function (in the case of the stack tracer and function tracer 
running simultaneously) it will change mcount to call an internal function 
that calls each registered function. With a new design, we can have some 
functions call a function together, or call the list. We could have the 
list check the hash to determine which function to call, but that may be 
difficult.

-- Steve


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16 16:22           ` Steven Rostedt
@ 2009-01-16 16:30             ` Ingo Molnar
  2009-01-16 22:59               ` Ingo Molnar
  0 siblings, 1 reply; 29+ messages in thread
From: Ingo Molnar @ 2009-01-16 16:30 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, Chris Mason


* Steven Rostedt <rostedt@goodmis.org> wrote:

> On Fri, 16 Jan 2009, Ingo Molnar wrote:
> > * Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > > [...] We can register a function to be traced via the function pointer, 
> > > and we can pick which functions to trace, but we can not separate out 
> > > different functions for different traces.
> > 
> > Why not? We could have a hash of all traced functions with metadata 
> > attached. Since patching functions in/out would be a relatively rare 
> > operation, this would be a highly efficient read-mostly hash.
> 
> I was afraid you would say that :-)
> 
> Yes, we could add something to do this. It would take some thought on a 
> good api, and implementation. This is not an easy task by any means. We 
> need a way to map between a function call and a tracer, where a function 
> call can call more than one tracer.

Note that some other things could be consolidated under per function 
metadata: for example set_graph_function - which really is a per function 
attribute too. Plus a lot of new things would be enabled i think.

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16  0:40 [PATCH 0/2] ftrace: updates to tip Steven Rostedt
                   ` (2 preceding siblings ...)
  2009-01-16 11:23 ` [PATCH 0/2] ftrace: updates to tip Ingo Molnar
@ 2009-01-16 21:47 ` Chris Mason
  2009-01-16 22:26   ` Ingo Molnar
  3 siblings, 1 reply; 29+ messages in thread
From: Chris Mason @ 2009-01-16 21:47 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Ingo Molnar, Andrew Morton, Frederic Weisbecker

[-- Attachment #1: Type: text/plain, Size: 3849 bytes --]

On Thu, 2009-01-15 at 19:40 -0500, Steven Rostedt wrote:
> Ingo,
> 
> The first patch is critical, and needs to stay with trace_output.c
> Not that critical since trace_output.c is not in mainline yet.
> 
> The second patch gives the ability to stack trace functions.
> I've been leery about adding this and still keep it a separate
> option from the "stacktrace" that already exists. This is because
> when enabled with no filtering, the lag between typing and seeing
> what is typed can be up to 10 seconds or more.
> 

I mostly asked for this because I often try to find the most common
reason a given function is called, and oprofile isn't always a great way
to catch it.  systemtap can do it too if you can get systemtap to work
against your current devel kernel, but there are limits on how much
memory it'll use.

I've attached some simple python code that parses the output of the
function stack tracer, it makes some dumb assumptions about the format
but isn't a bad proof of concept.  The first such assumption is that
you're only filtering on a single function.

Here is some example output, trying to find the most common causes of
native_smp_send_reschedule() during a btrfs dbench run.

It relates to the Oracle OLTP thread because oracle heavily uses IPC
semaphores to trigger wakeups of processes as various events finish.
I'd bet that try_to_wakeup is the most common cause of the reschedule
calls there as well.

For btrfs, the btree lock mutexes come back into the profile yet again.
It would be interesting to change the spinning mutex code to look for
spinners and skip the wakeup on unlock, but that's a different thread
entirely.

The short version is: thanks Steve, this is really cool!

12058 hits: 
 <= check_preempt_wakeup
 <= try_to_wake_up
 <= wake_up_process
 <= __mutex_unlock_slowpath
 <= mutex_unlock
 <= btrfs_tree_unlock
 <= unlock_up
 ===========

11678 hits: 
 <= check_preempt_curr_idle
 <= try_to_wake_up
 <= wake_up_process
 <= __mutex_unlock_slowpath
 <= mutex_unlock
 <= btrfs_tree_unlock
 <= unlock_up
 ===========

1569 hits: 
 <= check_preempt_wakeup
 <= try_to_wake_up
 <= default_wake_function
 <= autoremove_wake_function
 <= wake_bit_function
 <= __wake_up_common
 <= __wake_up
 ===========

1505 hits: 
 <= check_preempt_curr_idle
 <= try_to_wake_up
 <= default_wake_function
 <= autoremove_wake_function
 <= wake_bit_function
 <= __wake_up_common
 <= __wake_up
 ===========

1377 hits: # the output isn't perfectly parsable, or I'm dumb
 <= 0
 <= 0
 <= 0
 <= 0
 <= 0
 <= 0
 <= 0
 ===========

1194 hits: 
 <= check_preempt_curr_idle
 <= try_to_wake_up
 <= wake_up_process
 <= btrfs_queue_worker
 <= schedule_bio
 <= btrfs_map_bio
 <= __btrfs_submit_bio_done
 ===========

1108 hits: 
 <= check_preempt_curr_idle
 <= try_to_wake_up
 <= default_wake_function
 <= autoremove_wake_function
 <= __wake_up_common
 <= __wake_up
 <= cache_drop_leaf_ref
 ===========

935 hits: 
 <= check_preempt_wakeup
 <= try_to_wake_up
 <= wake_up_process
 <= btrfs_queue_worker
 <= schedule_bio
 <= btrfs_map_bio
 <= __btrfs_submit_bio_done
 ===========

856 hits: 
 <= check_preempt_curr_idle
 <= try_to_wake_up
 <= wake_up_process
 <= btrfs_queue_worker
 <= btrfs_wq_submit_bio
 <= btrfs_submit_bio_hook
 <= submit_one_bio
 ===========

502 hits: 
 <= check_preempt_wakeup
 <= try_to_wake_up
 <= wake_up_process
 <= btrfs_queue_worker
 <= btrfs_wq_submit_bio
 <= btrfs_submit_bio_hook
 <= submit_one_bio
 ===========

442 hits: 
 <= check_preempt_curr_idle
 <= try_to_wake_up
 <= wake_up_process
 <= __mutex_unlock_slowpath
 <= mutex_unlock
 <= btrfs_tree_unlock
 <= btrfs_release_path
 ===========

327 hits: 
 <= check_preempt_wakeup
 <= try_to_wake_up
 <= default_wake_function
 <= autoremove_wake_function
 <= __wake_up_common
 <= __wake_up
 <= cache_drop_leaf_ref
 ===========


... output continues for a bit



[-- Attachment #2: trace-histogram --]
[-- Type: text/x-python, Size: 1638 bytes --]

#!/usr/bin/env python
#
# this is a fairly trivial parser of the ftrace function stack trace output
# the idea is to find the most common stack traces that lead to calling
# given function, even those that don't use much CPU time and won't
# show up in other profilers.
#
# It assumes ftrace output looks like this:
#       some text including the function name
# <= func
# <= func2
# <= func3
#       some text including the function name
# <= func4
# <= func5
# <= func6
#
# The traces above would generate two histogram entries, one for
# func,func2,func3 and one for func4,func5,fun6
#

import sys, os, signal

stacks = {}

last = []
printed = False

def summary(arg1, arg2):
    global printed;

    printed = True
    count = {}

    for lines, c in stacks.items():
        count.setdefault(c, []).append(lines + ' ===========\n')

    limit = 50
    keys = count.keys()
    keys.sort()
    for x in keys.__reversed__():
        print "%d hits: \n%s" % (x, "".join(count[x]))
        limit -= 1
        if limit <= 0:
            break
    sys.exit(0)

signal.signal(signal.SIGTERM, summary)
signal.signal(signal.SIGINT, summary)

while True:
    try:
        l = sys.stdin.readline()
    except:
        break

    if not l:
        break

    if l[0] == '#':
        continue
    if not l.startswith(' <='):
        if last:
            lines = ''.join(last)
            val = stacks.get(lines, 0) + 1
            stacks[lines] = val
        # we don't put the header line into the trace because that might
        # have timestamp info in it.
        last = []
    else:
        last.append(l)

if not printed:
    summary(1, 2)

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16 21:47 ` Chris Mason
@ 2009-01-16 22:26   ` Ingo Molnar
  2009-01-17  1:30     ` Chris Mason
  2009-01-17  2:38     ` Arnaldo Carvalho de Melo
  0 siblings, 2 replies; 29+ messages in thread
From: Ingo Molnar @ 2009-01-16 22:26 UTC (permalink / raw)
  To: Chris Mason
  Cc: Steven Rostedt, linux-kernel, Andrew Morton, Frederic Weisbecker


* Chris Mason <chris.mason@oracle.com> wrote:

> On Thu, 2009-01-15 at 19:40 -0500, Steven Rostedt wrote:
> > Ingo,
> > 
> > The first patch is critical, and needs to stay with trace_output.c
> > Not that critical since trace_output.c is not in mainline yet.
> > 
> > The second patch gives the ability to stack trace functions.
> > I've been leery about adding this and still keep it a separate
> > option from the "stacktrace" that already exists. This is because
> > when enabled with no filtering, the lag between typing and seeing
> > what is typed can be up to 10 seconds or more.
> > 
> 
> I mostly asked for this because I often try to find the most common
> reason a given function is called, and oprofile isn't always a great way
> to catch it.  systemtap can do it too if you can get systemtap to work
> against your current devel kernel, but there are limits on how much
> memory it'll use.
> 
> I've attached some simple python code that parses the output of the
> function stack tracer, it makes some dumb assumptions about the format
> but isn't a bad proof of concept.  The first such assumption is that
> you're only filtering on a single function.
> 
> Here is some example output, trying to find the most common causes of
> native_smp_send_reschedule() during a btrfs dbench run.
> 
> It relates to the Oracle OLTP thread because oracle heavily uses IPC
> semaphores to trigger wakeups of processes as various events finish.
> I'd bet that try_to_wakeup is the most common cause of the reschedule
> calls there as well.
> 
> For btrfs, the btree lock mutexes come back into the profile yet again.
> It would be interesting to change the spinning mutex code to look for
> spinners and skip the wakeup on unlock, but that's a different thread
> entirely.
> 
> The short version is: thanks Steve, this is really cool!
> 
> 12058 hits: 
>  <= check_preempt_wakeup
>  <= try_to_wake_up
>  <= wake_up_process
>  <= __mutex_unlock_slowpath
>  <= mutex_unlock
>  <= btrfs_tree_unlock
>  <= unlock_up
>  ===========

Cool! We've got scripts/tracing/ [with one Python script in it already] - 
so if this is tidied up to be generally useful we could put it there.

The other thing is that there's the statistics framework of ftrace, being 
worked on by Frederic and Steve. That tries to handle and provide 
higher-order summaries/"views" of plain traces, like histograms and counts 
- provided by the kernel.

Maybe the above type of multi-dimensional-stack-trace based histogram 
could fit into the statistics framework too?

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16 16:30             ` Ingo Molnar
@ 2009-01-16 22:59               ` Ingo Molnar
  2009-01-17  1:14                 ` Steven Rostedt
  0 siblings, 1 reply; 29+ messages in thread
From: Ingo Molnar @ 2009-01-16 22:59 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, Chris Mason


* Ingo Molnar <mingo@elte.hu> wrote:

> > I was afraid you would say that :-)
> > 
> > Yes, we could add something to do this. It would take some thought on 
> > a good api, and implementation. This is not an easy task by any means. 
> > We need a way to map between a function call and a tracer, where a 
> > function call can call more than one tracer.
> 
> Note that some other things could be consolidated under per function 
> metadata: for example set_graph_function - which really is a per 
> function attribute too. Plus a lot of new things would be enabled i 
> think.

a few other things that could be per-function attributes:

 - Using function trace 'limits': trace a function 50 times and dont trace 
   it afterwards. Each invocation of that function decreases the
   remaining-count by one. For example:

     echo 'btrfs_join_transaction:#2' >> set_ftrace_filter

   Would specify that we generate two trace entries of
   btrfs_join_transaction(), then stop tracing this function.

 - Using function-triggered tracing: a function could be specified (via a 
   filter format extension) to act as a 'start tracing' trigger. Another 
   extension would be 'stop tracing' trigger.

   For example:

      echo 'btrfs_join_transaction:+' >> set_ftrace_filter
      echo 'btrfs_commit_transaction:-' >> set_ftrace_filter

   The '+' is a start-tracing trigger condition, the '-' is a stop-tracing 
   trigger condition. All function calls between btrfs_join_transaction() 
   and btrfs_commit_transaction() would be traced.

The two could be combined - to generate the trace of a single btrfs 
transaction, one could do:

      echo 0 > tracing_enabled
      echo 'btrfs_join_transaction:+#1' >> set_ftrace_filter
      echo 'btrfs_commit_transaction:-#1' >> set_ftrace_filter
      echo 1 > tracing_enabled

Other extensions are possible too:

 - Trace length triggers. For example one could do:

      echo 'btrfs_join_transaction:+*#10' >> set_ftrace_filter

   To trace 10 function calls [allowed by current filter settings] after
   the first btrfs_join_transaction() call - and stop tracing after those 
   10 trace entries.

This would allow the creation of "surgical" one-time traces - of events 
and functions one is specifically interested in.

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16 22:59               ` Ingo Molnar
@ 2009-01-17  1:14                 ` Steven Rostedt
  2009-01-17 13:48                   ` Frederic Weisbecker
  2009-01-17 22:34                   ` Ingo Molnar
  0 siblings, 2 replies; 29+ messages in thread
From: Steven Rostedt @ 2009-01-17  1:14 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, Chris Mason


On Fri, 16 Jan 2009, Ingo Molnar wrote:

> 
> * Ingo Molnar <mingo@elte.hu> wrote:
> 
> > > I was afraid you would say that :-)
> > > 
> > > Yes, we could add something to do this. It would take some thought on 
> > > a good api, and implementation. This is not an easy task by any means. 
> > > We need a way to map between a function call and a tracer, where a 
> > > function call can call more than one tracer.
> > 
> > Note that some other things could be consolidated under per function 
> > metadata: for example set_graph_function - which really is a per 
> > function attribute too. Plus a lot of new things would be enabled i 
> > think.
> 
> a few other things that could be per-function attributes:
> 
>  - Using function trace 'limits': trace a function 50 times and dont trace 
>    it afterwards. Each invocation of that function decreases the
>    remaining-count by one. For example:
> 
>      echo 'btrfs_join_transaction:#2' >> set_ftrace_filter
> 
>    Would specify that we generate two trace entries of
>    btrfs_join_transaction(), then stop tracing this function.
> 
>  - Using function-triggered tracing: a function could be specified (via a 
>    filter format extension) to act as a 'start tracing' trigger. Another 
>    extension would be 'stop tracing' trigger.
> 
>    For example:
> 
>       echo 'btrfs_join_transaction:+' >> set_ftrace_filter
>       echo 'btrfs_commit_transaction:-' >> set_ftrace_filter
> 
>    The '+' is a start-tracing trigger condition, the '-' is a stop-tracing 
>    trigger condition. All function calls between btrfs_join_transaction() 
>    and btrfs_commit_transaction() would be traced.
> 
> The two could be combined - to generate the trace of a single btrfs 
> transaction, one could do:
> 
>       echo 0 > tracing_enabled
>       echo 'btrfs_join_transaction:+#1' >> set_ftrace_filter
>       echo 'btrfs_commit_transaction:-#1' >> set_ftrace_filter
>       echo 1 > tracing_enabled
> 
> Other extensions are possible too:
> 
>  - Trace length triggers. For example one could do:
> 
>       echo 'btrfs_join_transaction:+*#10' >> set_ftrace_filter
> 
>    To trace 10 function calls [allowed by current filter settings] after
>    the first btrfs_join_transaction() call - and stop tracing after those 
>    10 trace entries.
> 
> This would allow the creation of "surgical" one-time traces - of events 
> and functions one is specifically interested in.

This all sounds great, but it would add a lot of conditionals into a 
extremely hot function tracer path. Remember, we are not modifying the 
calls to mcount to call a registered function directly. All functions 
being traced must call the same function. The reason is that mcount is not 
a normal function in C. It does not behave the same as other functions, 
and must be implemented in assembly (as you already know ;-) The dynamic 
tracer calls into a trampoline that calls the registered function. There 
is only one trampoline, so only one function gets called on a trace. We 
can at most enable or disable a given function. We can not change what 
that function calls (without implementing it for every arch).

This means any conditional that you make, must be made for all traced 
functions. And this will put an overhead onto the system.

Now we can register multiple functions to be called by a traced function, 
or pick and choose what function will be called by a traced function
depending on what option was asked for. But all traced functions will 
still call the same code. We can start experimenting, but I would 
be more keen on how this will be used by developers. Chris gave a great 
example of how he would use his feature. Doing what you ask would require 
a rewrite of most the code. I hate to do that and have only 2 or 3 people 
using it :-/

-- Steve


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16 22:26   ` Ingo Molnar
@ 2009-01-17  1:30     ` Chris Mason
  2009-01-17  2:38     ` Arnaldo Carvalho de Melo
  1 sibling, 0 replies; 29+ messages in thread
From: Chris Mason @ 2009-01-17  1:30 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Steven Rostedt, linux-kernel, Andrew Morton, Frederic Weisbecker

On Fri, 2009-01-16 at 23:26 +0100, Ingo Molnar wrote:
> * Chris Mason <chris.mason@oracle.com> wrote:

> > 
> > The short version is: thanks Steve, this is really cool!
> > 
> > 12058 hits: 
> >  <= check_preempt_wakeup
> >  <= try_to_wake_up
> >  <= wake_up_process
> >  <= __mutex_unlock_slowpath
> >  <= mutex_unlock
> >  <= btrfs_tree_unlock
> >  <= unlock_up
> >  ===========
> 
> Cool! We've got scripts/tracing/ [with one Python script in it already] - 
> so if this is tidied up to be generally useful we could put it there.
> 

For it to be generally useful I think it would need a way to pull out
the marker for which function is the start of the stack trace.  Some
parse friendly tags would be great.

Other than that I'd add some magic to actually start the tracing for a
given function.  Are there other things people would like it to do?

> The other thing is that there's the statistics framework of ftrace, being 
> worked on by Frederic and Steve. That tries to handle and provide 
> higher-order summaries/"views" of plain traces, like histograms and counts 
> - provided by the kernel.

Ok, I tried to keep it simple because I was hoping others had plans as
well ;)

> 
> Maybe the above type of multi-dimensional-stack-trace based histogram 
> could fit into the statistics framework too?

I'd hope so, just a question of how to keep it clear and usable.

-chris



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-16 22:26   ` Ingo Molnar
  2009-01-17  1:30     ` Chris Mason
@ 2009-01-17  2:38     ` Arnaldo Carvalho de Melo
  2009-01-19 13:31       ` Ingo Molnar
  1 sibling, 1 reply; 29+ messages in thread
From: Arnaldo Carvalho de Melo @ 2009-01-17  2:38 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Chris Mason, Steven Rostedt, linux-kernel, Andrew Morton,
	Frederic Weisbecker

Em Fri, Jan 16, 2009 at 11:26:00PM +0100, Ingo Molnar escreveu:
> 
> * Chris Mason <chris.mason@oracle.com> wrote:
> 
> > On Thu, 2009-01-15 at 19:40 -0500, Steven Rostedt wrote:
> > > Ingo,
> > > 
> > > The first patch is critical, and needs to stay with trace_output.c
> > > Not that critical since trace_output.c is not in mainline yet.
> > > 
> > > The second patch gives the ability to stack trace functions.
> > > I've been leery about adding this and still keep it a separate
> > > option from the "stacktrace" that already exists. This is because
> > > when enabled with no filtering, the lag between typing and seeing
> > > what is typed can be up to 10 seconds or more.
> > > 
> > 
> > I mostly asked for this because I often try to find the most common
> > reason a given function is called, and oprofile isn't always a great way
> > to catch it.  systemtap can do it too if you can get systemtap to work
> > against your current devel kernel, but there are limits on how much
> > memory it'll use.
> > 
> > I've attached some simple python code that parses the output of the
> > function stack tracer, it makes some dumb assumptions about the format
> > but isn't a bad proof of concept.  The first such assumption is that
> > you're only filtering on a single function.
> > 
> > Here is some example output, trying to find the most common causes of
> > native_smp_send_reschedule() during a btrfs dbench run.
> > 
> > It relates to the Oracle OLTP thread because oracle heavily uses IPC
> > semaphores to trigger wakeups of processes as various events finish.
> > I'd bet that try_to_wakeup is the most common cause of the reschedule
> > calls there as well.
> > 
> > For btrfs, the btree lock mutexes come back into the profile yet again.
> > It would be interesting to change the spinning mutex code to look for
> > spinners and skip the wakeup on unlock, but that's a different thread
> > entirely.
> > 
> > The short version is: thanks Steve, this is really cool!
> > 
> > 12058 hits: 
> >  <= check_preempt_wakeup
> >  <= try_to_wake_up
> >  <= wake_up_process
> >  <= __mutex_unlock_slowpath
> >  <= mutex_unlock
> >  <= btrfs_tree_unlock
> >  <= unlock_up
> >  ===========
> 
> Cool! We've got scripts/tracing/ [with one Python script in it already] - 
> so if this is tidied up to be generally useful we could put it there.
> 
> The other thing is that there's the statistics framework of ftrace, being 
> worked on by Frederic and Steve. That tries to handle and provide 
> higher-order summaries/"views" of plain traces, like histograms and counts 
> - provided by the kernel.

That I plan to use to provide something similar (equal?) in blkftrace to what
blktrace produces when it stops a tracing session:

  8,0    1     3767     2.180288230 31765  C   W 126227253 + 8 [0]
^CCPU0 (8,0):
 Reads Queued:           0,        0KiB	 Writes Queued:         141, 564KiB
 Read Dispatches:        0,        0KiB	 Write Dispatches:        0, 0KiB
 Reads Requeued:         0		 Writes Requeued:         0
 Reads Completed:        0,        0KiB	 Writes Completed:        0, 0KiB
 Read Merges:            0,        0KiB	 Write Merges:           45, 180KiB
 Read depth:             1        	 Write depth:             2
 IO unplugs:             2        	 Timer unplugs:           0
CPU1 (8,0):
 Reads Queued:           2,        8KiB	 Writes Queued:         679, 2,716KiB
 Read Dispatches:        2,        8KiB	 Write Dispatches:      598, 3,280KiB
 Reads Requeued:         0		 Writes Requeued:         0
 Reads Completed:        2,        8KiB	 Writes Completed:      598, 3,280KiB
 Read Merges:            0,        0KiB	 Write Merges:          177, 708KiB
 Read depth:             1        	 Write depth:             2
 IO unplugs:             3        	 Timer unplugs:           0

Total (8,0):
 Reads Queued:           2,        8KiB	 Writes Queued:         820, 3,280KiB
 Read Dispatches:        2,        8KiB	 Write Dispatches:      598, 3,280KiB
 Reads Requeued:         0		 Writes Requeued:         0
 Reads Completed:        2,        8KiB	 Writes Completed:      598, 3,280KiB
 Read Merges:            0,        0KiB	 Write Merges:          222, 888KiB
 IO unplugs:             5        	 Timer unplugs:           0

Throughput (R/W): 3KiB/s / 1,504KiB/s
Events (8,0): 4,289 entries
Skips: 0 forward (0 -   0.0%)
[root@doppio ~]# 

:-)

- Arnaldo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-17  1:14                 ` Steven Rostedt
@ 2009-01-17 13:48                   ` Frederic Weisbecker
  2009-01-17 22:34                   ` Ingo Molnar
  1 sibling, 0 replies; 29+ messages in thread
From: Frederic Weisbecker @ 2009-01-17 13:48 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Ingo Molnar, linux-kernel, Andrew Morton, Chris Mason

On Fri, Jan 16, 2009 at 08:14:25PM -0500, Steven Rostedt wrote:
> 
> On Fri, 16 Jan 2009, Ingo Molnar wrote:
> 
> > 
> > * Ingo Molnar <mingo@elte.hu> wrote:
> > 
> > > > I was afraid you would say that :-)
> > > > 
> > > > Yes, we could add something to do this. It would take some thought on 
> > > > a good api, and implementation. This is not an easy task by any means. 
> > > > We need a way to map between a function call and a tracer, where a 
> > > > function call can call more than one tracer.
> > > 
> > > Note that some other things could be consolidated under per function 
> > > metadata: for example set_graph_function - which really is a per 
> > > function attribute too. Plus a lot of new things would be enabled i 
> > > think.
> > 
> > a few other things that could be per-function attributes:
> > 
> >  - Using function trace 'limits': trace a function 50 times and dont trace 
> >    it afterwards. Each invocation of that function decreases the
> >    remaining-count by one. For example:
> > 
> >      echo 'btrfs_join_transaction:#2' >> set_ftrace_filter
> > 
> >    Would specify that we generate two trace entries of
> >    btrfs_join_transaction(), then stop tracing this function.
> > 
> >  - Using function-triggered tracing: a function could be specified (via a 
> >    filter format extension) to act as a 'start tracing' trigger. Another 
> >    extension would be 'stop tracing' trigger.
> > 
> >    For example:
> > 
> >       echo 'btrfs_join_transaction:+' >> set_ftrace_filter
> >       echo 'btrfs_commit_transaction:-' >> set_ftrace_filter
> > 
> >    The '+' is a start-tracing trigger condition, the '-' is a stop-tracing 
> >    trigger condition. All function calls between btrfs_join_transaction() 
> >    and btrfs_commit_transaction() would be traced.
> > 
> > The two could be combined - to generate the trace of a single btrfs 
> > transaction, one could do:
> > 
> >       echo 0 > tracing_enabled
> >       echo 'btrfs_join_transaction:+#1' >> set_ftrace_filter
> >       echo 'btrfs_commit_transaction:-#1' >> set_ftrace_filter
> >       echo 1 > tracing_enabled
> > 
> > Other extensions are possible too:
> > 
> >  - Trace length triggers. For example one could do:
> > 
> >       echo 'btrfs_join_transaction:+*#10' >> set_ftrace_filter
> > 
> >    To trace 10 function calls [allowed by current filter settings] after
> >    the first btrfs_join_transaction() call - and stop tracing after those 
> >    10 trace entries.
> > 
> > This would allow the creation of "surgical" one-time traces - of events 
> > and functions one is specifically interested in.
> 
> This all sounds great, but it would add a lot of conditionals into a 
> extremely hot function tracer path. Remember, we are not modifying the 
> calls to mcount to call a registered function directly. All functions 
> being traced must call the same function. The reason is that mcount is not 
> a normal function in C. It does not behave the same as other functions, 
> and must be implemented in assembly (as you already know ;-) The dynamic 
> tracer calls into a trampoline that calls the registered function. There 
> is only one trampoline, so only one function gets called on a trace. We 
> can at most enable or disable a given function. We can not change what 
> that function calls (without implementing it for every arch).
> 
> This means any conditional that you make, must be made for all traced 
> functions. And this will put an overhead onto the system.
> 
> Now we can register multiple functions to be called by a traced function, 
> or pick and choose what function will be called by a traced function
> depending on what option was asked for. But all traced functions will 
> still call the same code.


We could use a kind of ftrace_filter thing which would be a list of callbacks
to call, depending of the options set.

This would add one condition to verify for each function in the best case.



> We can start experimenting, but I would 
> be more keen on how this will be used by developers. Chris gave a great 
> example of how he would use his feature. Doing what you ask would require 
> a rewrite of most the code. I hate to do that and have only 2 or 3 people 
> using it :-/
> 
> -- Steve
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-17  1:14                 ` Steven Rostedt
  2009-01-17 13:48                   ` Frederic Weisbecker
@ 2009-01-17 22:34                   ` Ingo Molnar
  2009-01-18  7:27                     ` Ingo Molnar
  1 sibling, 1 reply; 29+ messages in thread
From: Ingo Molnar @ 2009-01-17 22:34 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, Chris Mason


* Steven Rostedt <rostedt@goodmis.org> wrote:

> > This would allow the creation of "surgical" one-time traces - of 
> > events and functions one is specifically interested in.
> 
> This all sounds great, but it would add a lot of conditionals into a 
> extremely hot function tracer path. [...]

Why would we need more than exactly one unlikely() condition to handle all 
of this?

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-17 22:34                   ` Ingo Molnar
@ 2009-01-18  7:27                     ` Ingo Molnar
  0 siblings, 0 replies; 29+ messages in thread
From: Ingo Molnar @ 2009-01-18  7:27 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, Chris Mason


* Ingo Molnar <mingo@elte.hu> wrote:

> 
> * Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > > This would allow the creation of "surgical" one-time traces - of 
> > > events and functions one is specifically interested in.
> > 
> > This all sounds great, but it would add a lot of conditionals into a 
> > extremely hot function tracer path. [...]
> 
> Why would we need more than exactly one unlikely() condition to handle 
> all of this?

We could even do it with zero branches, burdening none of the hotpaths of 
existing functionality. Hm?

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/2] ftrace: updates to tip
  2009-01-17  2:38     ` Arnaldo Carvalho de Melo
@ 2009-01-19 13:31       ` Ingo Molnar
  0 siblings, 0 replies; 29+ messages in thread
From: Ingo Molnar @ 2009-01-19 13:31 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Chris Mason, Steven Rostedt,
	linux-kernel, Andrew Morton, Frederic Weisbecker


* Arnaldo Carvalho de Melo <acme@redhat.com> wrote:

> Em Fri, Jan 16, 2009 at 11:26:00PM +0100, Ingo Molnar escreveu:
> > 
> > * Chris Mason <chris.mason@oracle.com> wrote:
> > 
> > > On Thu, 2009-01-15 at 19:40 -0500, Steven Rostedt wrote:
> > > > Ingo,
> > > > 
> > > > The first patch is critical, and needs to stay with trace_output.c
> > > > Not that critical since trace_output.c is not in mainline yet.
> > > > 
> > > > The second patch gives the ability to stack trace functions.
> > > > I've been leery about adding this and still keep it a separate
> > > > option from the "stacktrace" that already exists. This is because
> > > > when enabled with no filtering, the lag between typing and seeing
> > > > what is typed can be up to 10 seconds or more.
> > > > 
> > > 
> > > I mostly asked for this because I often try to find the most common
> > > reason a given function is called, and oprofile isn't always a great way
> > > to catch it.  systemtap can do it too if you can get systemtap to work
> > > against your current devel kernel, but there are limits on how much
> > > memory it'll use.
> > > 
> > > I've attached some simple python code that parses the output of the
> > > function stack tracer, it makes some dumb assumptions about the format
> > > but isn't a bad proof of concept.  The first such assumption is that
> > > you're only filtering on a single function.
> > > 
> > > Here is some example output, trying to find the most common causes of
> > > native_smp_send_reschedule() during a btrfs dbench run.
> > > 
> > > It relates to the Oracle OLTP thread because oracle heavily uses IPC
> > > semaphores to trigger wakeups of processes as various events finish.
> > > I'd bet that try_to_wakeup is the most common cause of the reschedule
> > > calls there as well.
> > > 
> > > For btrfs, the btree lock mutexes come back into the profile yet again.
> > > It would be interesting to change the spinning mutex code to look for
> > > spinners and skip the wakeup on unlock, but that's a different thread
> > > entirely.
> > > 
> > > The short version is: thanks Steve, this is really cool!
> > > 
> > > 12058 hits: 
> > >  <= check_preempt_wakeup
> > >  <= try_to_wake_up
> > >  <= wake_up_process
> > >  <= __mutex_unlock_slowpath
> > >  <= mutex_unlock
> > >  <= btrfs_tree_unlock
> > >  <= unlock_up
> > >  ===========
> > 
> > Cool! We've got scripts/tracing/ [with one Python script in it already] - 
> > so if this is tidied up to be generally useful we could put it there.
> > 
> > The other thing is that there's the statistics framework of ftrace, being 
> > worked on by Frederic and Steve. That tries to handle and provide 
> > higher-order summaries/"views" of plain traces, like histograms and counts 
> > - provided by the kernel.
> 
> That I plan to use to provide something similar (equal?) in blkftrace to what
> blktrace produces when it stops a tracing session:
> 
>   8,0    1     3767     2.180288230 31765  C   W 126227253 + 8 [0]
> ^CCPU0 (8,0):
>  Reads Queued:           0,        0KiB	 Writes Queued:         141, 564KiB
>  Read Dispatches:        0,        0KiB	 Write Dispatches:        0, 0KiB
>  Reads Requeued:         0		 Writes Requeued:         0
>  Reads Completed:        0,        0KiB	 Writes Completed:        0, 0KiB
>  Read Merges:            0,        0KiB	 Write Merges:           45, 180KiB
>  Read depth:             1        	 Write depth:             2
>  IO unplugs:             2        	 Timer unplugs:           0
> CPU1 (8,0):
>  Reads Queued:           2,        8KiB	 Writes Queued:         679, 2,716KiB
>  Read Dispatches:        2,        8KiB	 Write Dispatches:      598, 3,280KiB
>  Reads Requeued:         0		 Writes Requeued:         0
>  Reads Completed:        2,        8KiB	 Writes Completed:      598, 3,280KiB
>  Read Merges:            0,        0KiB	 Write Merges:          177, 708KiB
>  Read depth:             1        	 Write depth:             2
>  IO unplugs:             3        	 Timer unplugs:           0
> 
> Total (8,0):
>  Reads Queued:           2,        8KiB	 Writes Queued:         820, 3,280KiB
>  Read Dispatches:        2,        8KiB	 Write Dispatches:      598, 3,280KiB
>  Reads Requeued:         0		 Writes Requeued:         0
>  Reads Completed:        2,        8KiB	 Writes Completed:      598, 3,280KiB
>  Read Merges:            0,        0KiB	 Write Merges:          222, 888KiB
>  IO unplugs:             5        	 Timer unplugs:           0
> 
> Throughput (R/W): 3KiB/s / 1,504KiB/s
> Events (8,0): 4,289 entries
> Skips: 0 forward (0 -   0.0%)
> [root@doppio ~]# 
> 
> :-)

Cool :)

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2009-01-19 13:33 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-01-16  0:40 [PATCH 0/2] ftrace: updates to tip Steven Rostedt
2009-01-16  0:40 ` [PATCH 1/2] ftrace: fix trace_output Steven Rostedt
2009-01-16  1:40   ` Andrew Morton
2009-01-16  1:46     ` Steven Rostedt
2009-01-16  0:40 ` [PATCH 2/2] ftrace: add stack trace to function tracer Steven Rostedt
2009-01-16  0:48   ` Steven Rostedt
2009-01-16 10:08   ` Frédéric Weisbecker
2009-01-16 14:28     ` Steven Rostedt
2009-01-16 11:19   ` Ingo Molnar
2009-01-16 11:23 ` [PATCH 0/2] ftrace: updates to tip Ingo Molnar
2009-01-16 13:08   ` Frédéric Weisbecker
2009-01-16 13:10     ` Ingo Molnar
2009-01-16 14:32   ` Steven Rostedt
2009-01-16 15:21     ` Ingo Molnar
2009-01-16 15:53       ` Steven Rostedt
2009-01-16 16:02         ` Ingo Molnar
2009-01-16 16:03           ` Ingo Molnar
2009-01-16 16:22           ` Steven Rostedt
2009-01-16 16:30             ` Ingo Molnar
2009-01-16 22:59               ` Ingo Molnar
2009-01-17  1:14                 ` Steven Rostedt
2009-01-17 13:48                   ` Frederic Weisbecker
2009-01-17 22:34                   ` Ingo Molnar
2009-01-18  7:27                     ` Ingo Molnar
2009-01-16 21:47 ` Chris Mason
2009-01-16 22:26   ` Ingo Molnar
2009-01-17  1:30     ` Chris Mason
2009-01-17  2:38     ` Arnaldo Carvalho de Melo
2009-01-19 13:31       ` Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).