All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] tracing - fix function graph trace to properly display failed entries
@ 2009-11-24 12:57 Jiri Olsa
  2009-11-24 17:14 ` Steven Rostedt
  2009-12-10  7:49 ` [tip:tracing/core] tracing: Fix function graph trace_pipe " tip-bot for Jiri Olsa
  0 siblings, 2 replies; 15+ messages in thread
From: Jiri Olsa @ 2009-11-24 12:57 UTC (permalink / raw)
  To: mingo, rostedt, fweisbec; +Cc: linux-kernel, Jiri Olsa

Hi,

there's a case where the graph tracer might get confused and omits
display of a single record.  This applies for both destructive
(trace_pipe) and nondestructive (trace) cases.


<issue description>

The issue description for nondestructive case (trace) follows:

As the function_graph tracer goes through the trace entries (using "trace" file)
it keeps pointer to the current record and the next one:

current ->  func1 ENTRY
   next ->  func2 ENTRY
            func2 RETURN
            func1 RETURN

If it spots adjacent ENTRY and RETURN trace entries of the same function
and pid, it displays the "func2();" trace

            func1 ENTRY
current ->  func2 ENTRY 
   next ->  func2 RETURN
            func1 RETURN

and moves the next trace entry pointer behind the RETURN entry

            func1 ENTRY
current ->  func2 ENTRY 
            func2 RETURN
   next ->  func1 RETURN

so the next record peek will skip the RETURN entry and continue with
whatever is next.

It works ok but for one case. 

If the "func2()" trace does not make it to the seq_file read buffer, it needs
to be processed again in the next read.  And here comes the issue: 
the next read will see following pointers setup for func2 processing:

            func1 ENTRY
current ->  func2 ENTRY 
            func2 RETURN
   next ->  func1 RETURN

which will turn to displaying the func2 entry like: "func2() {", since the
next entry is not RETURN of the same type.  Generaly it is whatever entry 
that follows, but definitelly not the RETURN entry of the same function.

The destructive case (trace_pipe) suffers from the similar issue,
although it keeps only the current pointer.

</issue description>


The following patch propose generic solution for both cases.
It keeps the last read entry/return in the tracer private
iterator storage and keeps track of the failed output.
Whenever the output fails, next read will output previous
'not displayed' entry before processing next entry.

wbr,
jirka


Signed-off-by: Jiri Olsa <jolsa@redhat.com>
---
 include/linux/trace_seq.h            |    5 +-
 kernel/trace/trace.c                 |   10 ++-
 kernel/trace/trace_functions_graph.c |  152 ++++++++++++++++++++++++++++++----
 kernel/trace/trace_output.c          |   26 ++++--
 4 files changed, 162 insertions(+), 31 deletions(-)

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 09077f6..33ab410 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -14,6 +14,7 @@ struct trace_seq {
 	unsigned char		buffer[PAGE_SIZE];
 	unsigned int		len;
 	unsigned int		readpos;
+	int			failed;
 };
 
 static inline void
@@ -33,7 +34,7 @@ extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
 	__attribute__ ((format (printf, 2, 0)));
 extern int
 trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
-extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
+extern int trace_print_seq(struct seq_file *m, struct trace_seq *s);
 extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
 				 size_t cnt);
 extern int trace_seq_puts(struct trace_seq *s, const char *str);
@@ -55,7 +56,7 @@ trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 	return 0;
 }
 
-static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+static inline int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
 }
 static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 874f289..ca52715 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3049,10 +3049,14 @@ waitagain:
 	if (cnt >= PAGE_SIZE)
 		cnt = PAGE_SIZE - 1;
 
-	/* reset all but tr, trace, and overruns */
-	memset(&iter->seq, 0,
+	/*
+	 * Reset all but tr, trace, and overruns. Preserve 'failed'
+	 * field in the trace_seq structure.
+	 */
+	trace_seq_init(&iter->seq);
+	memset(&iter->ent, 0,
 	       sizeof(struct trace_iterator) -
-	       offsetof(struct trace_iterator, seq));
+	       offsetof(struct trace_iterator, ent));
 	iter->pos = -1;
 
 	trace_event_read_lock();
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 45e6c01..bcdfde4 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,11 +14,21 @@
 #include "trace.h"
 #include "trace_output.h"
 
-struct fgraph_data {
+struct fgraph_data_cpu {
 	pid_t		last_pid;
 	int		depth;
 };
 
+struct fgraph_data {
+	struct fgraph_data_cpu *data_cpu;
+
+	/* Place to preserve last processed entry. */
+	struct ftrace_graph_ent_entry ent;
+	struct ftrace_graph_ret_entry ret;
+	int    is_entry;
+	int    is_leaf;
+};
+
 #define TRACE_GRAPH_INDENT	2
 
 /* Flag options */
@@ -384,7 +394,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
 	if (!data)
 		return TRACE_TYPE_HANDLED;
 
-	last_pid = &(per_cpu_ptr(data, cpu)->last_pid);
+	last_pid = &(per_cpu_ptr(data->data_cpu, cpu)->last_pid);
 
 	if (*last_pid == pid)
 		return TRACE_TYPE_HANDLED;
@@ -431,6 +441,27 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
 	return TRACE_TYPE_HANDLED;
 }
 
+static void set_private_entry(struct trace_iterator *iter,
+				struct ftrace_graph_ent_entry *ent,
+				struct ftrace_graph_ret_entry *ret)
+{
+	struct fgraph_data *data = iter->private;
+	data->ent = *ent;
+	data->ret = *ret;
+}
+
+static void set_private_isleaf(struct trace_iterator *iter, int leaf)
+{
+	struct fgraph_data *data = iter->private;
+	data->is_leaf = leaf;
+}
+
+static void set_private_isentry(struct trace_iterator *iter, int entry)
+{
+	struct fgraph_data *data = iter->private;
+	data->is_entry = entry;
+}
+
 static struct ftrace_graph_ret_entry *
 get_return_for_leaf(struct trace_iterator *iter,
 		struct ftrace_graph_ent_entry *curr)
@@ -456,6 +487,13 @@ get_return_for_leaf(struct trace_iterator *iter,
 
 	next = ring_buffer_event_data(event);
 
+	/*
+	 * Save current and next entries for later reference
+	 * if the output failes.
+	 */
+	set_private_entry(iter, curr, next);
+	set_private_isentry(iter, 1);
+
 	if (next->ent.type != TRACE_GRAPH_RET)
 		return NULL;
 
@@ -467,6 +505,9 @@ get_return_for_leaf(struct trace_iterator *iter,
 	if (ring_iter)
 		ring_buffer_read(ring_iter, NULL);
 
+	/* Now we are completely sure this one is a leaf, mark it. */
+	set_private_isleaf(iter, 1);
+
 	return next;
 }
 
@@ -640,7 +681,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 
 	if (data) {
 		int cpu = iter->cpu;
-		int *depth = &(per_cpu_ptr(data, cpu)->depth);
+		int *depth = &(per_cpu_ptr(data->data_cpu, cpu)->depth);
 
 		/*
 		 * Comments display at + 1 to depth. Since
@@ -688,7 +729,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
 
 	if (data) {
 		int cpu = iter->cpu;
-		int *depth = &(per_cpu_ptr(data, cpu)->depth);
+		int *depth = &(per_cpu_ptr(data->data_cpu, cpu)->depth);
 
 		*depth = call->depth;
 	}
@@ -779,17 +820,16 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
 }
 
 static enum print_line_t
-print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
-			struct trace_iterator *iter)
+print_graph_entry(struct ftrace_graph_ent_entry *field,
+		  struct ftrace_graph_ret_entry *leaf_ret,
+		  struct trace_seq *s, struct trace_iterator *iter)
 {
 	int cpu = iter->cpu;
 	struct ftrace_graph_ent *call = &field->graph_ent;
-	struct ftrace_graph_ret_entry *leaf_ret;
 
 	if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	leaf_ret = get_return_for_leaf(iter, field);
 	if (leaf_ret)
 		return print_graph_entry_leaf(iter, field, leaf_ret, s);
 	else
@@ -798,6 +838,17 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
 }
 
 static enum print_line_t
+handle_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
+			struct trace_iterator *iter)
+{
+	struct ftrace_graph_ret_entry *leaf_ret;
+
+	leaf_ret = get_return_for_leaf(iter, field);
+
+	return print_graph_entry(field, leaf_ret, s, iter);
+}
+
+static enum print_line_t
 print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 		   struct trace_entry *ent, struct trace_iterator *iter)
 {
@@ -810,7 +861,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 
 	if (data) {
 		int cpu = iter->cpu;
-		int *depth = &(per_cpu_ptr(data, cpu)->depth);
+		int *depth = &(per_cpu_ptr(data->data_cpu, cpu)->depth);
 
 		/*
 		 * Comments display at + 1 to depth. This is the
@@ -873,7 +924,7 @@ print_graph_comment(struct trace_seq *s,  struct trace_entry *ent,
 	int i;
 
 	if (data)
-		depth = per_cpu_ptr(data, iter->cpu)->depth;
+		depth = per_cpu_ptr(data->data_cpu, iter->cpu)->depth;
 
 	if (print_graph_prologue(iter, s, 0, 0))
 		return TRACE_TYPE_PARTIAL_LINE;
@@ -938,12 +989,68 @@ print_graph_comment(struct trace_seq *s,  struct trace_entry *ent,
 }
 
 
+static int print_failed_entry(struct trace_iterator *iter)
+{
+	struct fgraph_data *data = iter->private;
+	struct trace_seq *s = &iter->seq;
+	struct ftrace_graph_ent_entry *ent = &data->ent;
+	struct ftrace_graph_ret_entry *ret = &data->ret;
+
+	/*
+	 * Only entries suffer from the ahead jump,
+	 * returns are ok.
+	 */
+	if (!data->is_entry)
+		return 0;
+
+	/*
+	 * Last data was a leaf. Trace it out and let's read
+	 * another one, regardless of the consuming or iter
+	 * processing way.
+	 */
+	if (data->is_leaf) {
+		struct ftrace_graph_ent *call = &ent->graph_ent;
+
+		print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func);
+		print_graph_entry_leaf(iter, ent, ret, s);
+		return 1;
+	}
+
+	/*
+	 * Last entry was not a leaf.
+	 *
+	 * If this is iter processing, let it
+	 * handle the usual way.
+	 */
+	if (iter->buffer_iter[iter->cpu])
+		return 0;
+
+	/*
+	 * Last data was not a leaf and we have consuming
+	 * processing. Trace it out and let the usual way
+	 * to pick up on the current entry.
+	 */
+	print_graph_entry(ent, NULL, s, iter);
+	return 0;
+}
+
 enum print_line_t
 print_graph_function(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
 	struct trace_seq *s = &iter->seq;
 
+	/*
+	 * If the last output failed, there's a possibility we need
+	 * to print out the missing entry which would never go out.
+	 */
+	if (iter->seq.failed &&
+	    print_failed_entry(iter))
+		return TRACE_TYPE_HANDLED;
+
+	set_private_isleaf(iter, 0);
+	set_private_isentry(iter, 0);
+
 	switch (entry->type) {
 	case TRACE_GRAPH_ENT: {
 		/*
@@ -955,7 +1062,7 @@ print_graph_function(struct trace_iterator *iter)
 		struct ftrace_graph_ent_entry *field, saved;
 		trace_assign_type(field, entry);
 		saved = *field;
-		return print_graph_entry(&saved, s, iter);
+		return handle_graph_entry(&saved, s, iter);
 	}
 	case TRACE_GRAPH_RET: {
 		struct ftrace_graph_ret_entry *field;
@@ -1030,30 +1137,41 @@ static void print_graph_headers(struct seq_file *s)
 static void graph_trace_open(struct trace_iterator *iter)
 {
 	/* pid and depth on the last trace processed */
-	struct fgraph_data *data = alloc_percpu(struct fgraph_data);
+	struct fgraph_data *data;
+	struct fgraph_data_cpu *data_cpu;
 	int cpu;
 
-	if (!data)
+	data = kzalloc(sizeof(struct fgraph_data), GFP_KERNEL);
+	data_cpu = alloc_percpu(struct fgraph_data_cpu);
+
+	if (!data || !data_cpu)
 		pr_warning("function graph tracer: not enough memory\n");
-	else
+	else {
 		for_each_possible_cpu(cpu) {
-			pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid);
-			int *depth = &(per_cpu_ptr(data, cpu)->depth);
+			pid_t *pid = &(per_cpu_ptr(data_cpu, cpu)->last_pid);
+			int *depth = &(per_cpu_ptr(data_cpu, cpu)->depth);
 			*pid = -1;
 			*depth = 0;
 		}
 
+		data->data_cpu = data_cpu;
+	}
+
 	iter->private = data;
 }
 
 static void graph_trace_close(struct trace_iterator *iter)
 {
-	free_percpu(iter->private);
+	struct fgraph_data *data = iter->private;
+
+	free_percpu(data->data_cpu);
+	kfree(iter->private);
 }
 
 static struct tracer graph_trace __read_mostly = {
 	.name		= "function_graph",
 	.open		= graph_trace_open,
+	.pipe_open	= graph_trace_open,
 	.close		= graph_trace_close,
 	.wait_pipe	= poll_wait_pipe,
 	.init		= graph_trace_init,
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b6c12c6..63e9d4d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -23,13 +23,15 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
 static int next_event_type = __TRACE_LAST_TYPE + 1;
 
-void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
+	int ret;
 	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
 
-	seq_write(m, s->buffer, len);
-
+	ret = s->failed = seq_write(m, s->buffer, len);
 	trace_seq_init(s);
+
+	return ret;
 }
 
 enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -93,7 +95,8 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 	va_end(ap);
 
 	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len)
+	s->failed = ret >= len;
+	if (s->failed)
 		return 0;
 
 	s->len += ret;
@@ -125,7 +128,8 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
 	ret = vsnprintf(s->buffer + s->len, len, fmt, args);
 
 	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len)
+	s->failed = ret >= len;
+	if (s->failed)
 		return 0;
 
 	s->len += ret;
@@ -145,7 +149,8 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 	ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
 
 	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len)
+	s->failed = ret >= len;
+	if (s->failed)
 		return 0;
 
 	s->len += ret;
@@ -167,7 +172,8 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
 {
 	int len = strlen(str);
 
-	if (len > ((PAGE_SIZE - 1) - s->len))
+	s->failed = len > ((PAGE_SIZE - 1) - s->len);
+	if (s->failed)
 		return 0;
 
 	memcpy(s->buffer + s->len, str, len);
@@ -178,7 +184,8 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
 
 int trace_seq_putc(struct trace_seq *s, unsigned char c)
 {
-	if (s->len >= (PAGE_SIZE - 1))
+	s->failed = s->len >= (PAGE_SIZE - 1);
+	if (s->failed)
 		return 0;
 
 	s->buffer[s->len++] = c;
@@ -188,7 +195,8 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
 
 int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
 {
-	if (len > ((PAGE_SIZE - 1) - s->len))
+	s->failed = len > ((PAGE_SIZE - 1) - s->len);
+	if (s->failed)
 		return 0;
 
 	memcpy(s->buffer + s->len, mem, len);

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] tracing - fix function graph trace to properly display failed entries
  2009-11-24 12:57 [PATCH] tracing - fix function graph trace to properly display failed entries Jiri Olsa
@ 2009-11-24 17:14 ` Steven Rostedt
  2009-11-24 21:14   ` Frederic Weisbecker
  2009-12-10  7:49 ` [tip:tracing/core] tracing: Fix function graph trace_pipe " tip-bot for Jiri Olsa
  1 sibling, 1 reply; 15+ messages in thread
From: Steven Rostedt @ 2009-11-24 17:14 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: mingo, fweisbec, linux-kernel

On Tue, 2009-11-24 at 13:57 +0100, Jiri Olsa wrote:
> Hi,
> 
> there's a case where the graph tracer might get confused and omits
> display of a single record.  This applies for both destructive
> (trace_pipe) and nondestructive (trace) cases.
> 
> 
> <issue description>
> 
> The issue description for nondestructive case (trace) follows:
> 
> As the function_graph tracer goes through the trace entries (using "trace" file)
> it keeps pointer to the current record and the next one:
> 
> current ->  func1 ENTRY
>    next ->  func2 ENTRY
>             func2 RETURN
>             func1 RETURN
> 
> If it spots adjacent ENTRY and RETURN trace entries of the same function
> and pid, it displays the "func2();" trace
> 
>             func1 ENTRY
> current ->  func2 ENTRY 
>    next ->  func2 RETURN
>             func1 RETURN
> 
> and moves the next trace entry pointer behind the RETURN entry
> 
>             func1 ENTRY
> current ->  func2 ENTRY 
>             func2 RETURN
>    next ->  func1 RETURN
> 
> so the next record peek will skip the RETURN entry and continue with
> whatever is next.
> 
> It works ok but for one case. 
> 
> If the "func2()" trace does not make it to the seq_file read buffer, it needs

In the trace_pipe it may be possible to fill the trace_seq buffer if the
buffer passed into userspace is larger than the trace_seq buffer. But
the trace_seq buffer should never overflow on the "trace" case. If it
is, then there's probably another bug.


> to be processed again in the next read.  And here comes the issue: 
> the next read will see following pointers setup for func2 processing:
> 
>             func1 ENTRY
> current ->  func2 ENTRY 
>             func2 RETURN
>    next ->  func1 RETURN
> 
> which will turn to displaying the func2 entry like: "func2() {", since the
> next entry is not RETURN of the same type.  Generaly it is whatever entry 
> that follows, but definitelly not the RETURN entry of the same function.
> 
> The destructive case (trace_pipe) suffers from the similar issue,
> although it keeps only the current pointer.
> 
> </issue description>
> 
> 
> The following patch propose generic solution for both cases.
> It keeps the last read entry/return in the tracer private
> iterator storage and keeps track of the failed output.
> Whenever the output fails, next read will output previous
> 'not displayed' entry before processing next entry.

Have you added tests to make sure that the trace_seq buffer is indeed
filling up?  In the trace_pipe, this could happen when the user buffer
gets full.

What I think you are seeing, is a buffer overflow during a run. If the
trace buffer overflows, it will leave gaps in the trace. Those abandoned
leaf functions are probably a result of a trace buffer wrap.

I don't thing this is the proper solution to the problem.

> 


>  static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 874f289..ca52715 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -3049,10 +3049,14 @@ waitagain:

(side note)

This is why I like to have spaces in front of labels (not a comment on
this patch). I hate trying to figure out what function this is for :-p
Luckily, in this case, there's only one function with "waitagain" label.

>  	if (cnt >= PAGE_SIZE)
>  		cnt = PAGE_SIZE - 1;
>  
> -	/* reset all but tr, trace, and overruns */
> -	memset(&iter->seq, 0,
> +	/*
> +	 * Reset all but tr, trace, and overruns. Preserve 'failed'
> +	 * field in the trace_seq structure.
> +	 */
> +	trace_seq_init(&iter->seq);
> +	memset(&iter->ent, 0,
>  	       sizeof(struct trace_iterator) -
> -	       offsetof(struct trace_iterator, seq));
> +	       offsetof(struct trace_iterator, ent));
>  	iter->pos = -1;
>  
>  	trace_event_read_lock();


-- Steve



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tracing - fix function graph trace to properly display failed entries
  2009-11-24 17:14 ` Steven Rostedt
@ 2009-11-24 21:14   ` Frederic Weisbecker
  2009-11-24 22:17     ` Frederic Weisbecker
  2009-11-25  8:45     ` Jiri Olsa
  0 siblings, 2 replies; 15+ messages in thread
From: Frederic Weisbecker @ 2009-11-24 21:14 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Jiri Olsa, mingo, linux-kernel

On Tue, Nov 24, 2009 at 12:14:05PM -0500, Steven Rostedt wrote:
> On Tue, 2009-11-24 at 13:57 +0100, Jiri Olsa wrote:
> > Hi,
> > 
> > there's a case where the graph tracer might get confused and omits
> > display of a single record.  This applies for both destructive
> > (trace_pipe) and nondestructive (trace) cases.
> > 
> > 
> > <issue description>
> > 
> > The issue description for nondestructive case (trace) follows:
> > 
> > As the function_graph tracer goes through the trace entries (using "trace" file)
> > it keeps pointer to the current record and the next one:
> > 
> > current ->  func1 ENTRY
> >    next ->  func2 ENTRY
> >             func2 RETURN
> >             func1 RETURN
> > 
> > If it spots adjacent ENTRY and RETURN trace entries of the same function
> > and pid, it displays the "func2();" trace
> > 
> >             func1 ENTRY
> > current ->  func2 ENTRY 
> >    next ->  func2 RETURN
> >             func1 RETURN
> > 
> > and moves the next trace entry pointer behind the RETURN entry
> > 
> >             func1 ENTRY
> > current ->  func2 ENTRY 
> >             func2 RETURN
> >    next ->  func1 RETURN
> > 
> > so the next record peek will skip the RETURN entry and continue with
> > whatever is next.
> > 
> > It works ok but for one case. 
> > 
> > If the "func2()" trace does not make it to the seq_file read buffer, it needs
> 
> In the trace_pipe it may be possible to fill the trace_seq buffer if the
> buffer passed into userspace is larger than the trace_seq buffer. But
> the trace_seq buffer should never overflow on the "trace" case. If it
> is, then there's probably another bug.



Hmm, yeah in trace_pipe case we repeat until we have no space
left in trace_seq (TRACE_TYPE_PARTIAL_LINE), or until we
the user buffer is filled.

But indeed in case of trace file, we are using a seq file
so the buffer gets flushed after each entries.

Assuming the trace_seq is 4096 bytes long this is probably
enough for every function graph entries (even with headers and
interrupts), this is not something we are supposed to see in trace_seq.

I did not realized that when Jiri sent the first version of this patch.

 
> 
> > to be processed again in the next read.  And here comes the issue: 
> > the next read will see following pointers setup for func2 processing:
> > 
> >             func1 ENTRY
> > current ->  func2 ENTRY 
> >             func2 RETURN
> >    next ->  func1 RETURN
> > 
> > which will turn to displaying the func2 entry like: "func2() {", since the
> > next entry is not RETURN of the same type.  Generaly it is whatever entry 
> > that follows, but definitelly not the RETURN entry of the same function.
> > 
> > The destructive case (trace_pipe) suffers from the similar issue,
> > although it keeps only the current pointer.
> > 
> > </issue description>
> > 
> > 
> > The following patch propose generic solution for both cases.
> > It keeps the last read entry/return in the tracer private
> > iterator storage and keeps track of the failed output.
> > Whenever the output fails, next read will output previous
> > 'not displayed' entry before processing next entry.
> 
> Have you added tests to make sure that the trace_seq buffer is indeed
> filling up?  In the trace_pipe, this could happen when the user buffer
> gets full.
> 
> What I think you are seeing, is a buffer overflow during a run. If the
> trace buffer overflows, it will leave gaps in the trace. Those abandoned
> leaf functions are probably a result of a trace buffer wrap.
> 
> I don't thing this is the proper solution to the problem.


But I guess it doesn't happen because the function graph tracer
has too large entries, or it would do an endless loop while
reading the trace file.

It's more likely a bug somewhere in the trace_seq_* functions.

I'm going to have a look.

Thanks.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tracing - fix function graph trace to properly display failed entries
  2009-11-24 21:14   ` Frederic Weisbecker
@ 2009-11-24 22:17     ` Frederic Weisbecker
  2009-11-24 22:26       ` Steven Rostedt
  2009-11-25  8:47       ` Jiri Olsa
  2009-11-25  8:45     ` Jiri Olsa
  1 sibling, 2 replies; 15+ messages in thread
From: Frederic Weisbecker @ 2009-11-24 22:17 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Jiri Olsa, mingo, linux-kernel

On Tue, Nov 24, 2009 at 10:14:36PM +0100, Frederic Weisbecker wrote:
> But I guess it doesn't happen because the function graph tracer
> has too large entries, or it would do an endless loop while
> reading the trace file.


Ah actually it won't as we don't check the return value
of trace_print_line() from the seq show handler.
There is no retry.

And also there is no need to retry. The buffer gets never
entirely filled in the trace file reading path, as it
is flushed after each entry. And no single trace fills the
whole buffer. I just made some tests and we never reach that
point (no TRACE_TYPE_PARTIAL_LINE) returned. So no overflow
as far as I tested (there might be rare corner cases).

But it happens with trace_pipe reading.

There is another bug. Here is a sample from
trace reading:

 0)               |  sys_newfstat() {
 0)               |    vfs_fstat() {
 0)   1.187 us    |      fget();
 0)               |      vfs_getattr() {
 0)               |        security_inode_getattr() {
 0)   0.608 us    |              }
 0)   2.951 us    |            }
 0) + 65.349 us   |          }
 0)               |          path_put() {
 0)   0.608 us    |            dput();
 0)   0.548 us    |            mntput_no_expire();
 0)   2.748 us    |          }
 0) + 74.472 us   |        }


I think we are loosing some traces here, between security_inode_getattr()
and path_put().

I'm not sure why. The problem related by Olsa would explain such
symptoms, and what olsa reported is a real bug, but only
in trace_pipe. There is also something else, probably more
related to the fast path.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tracing - fix function graph trace to properly display failed entries
  2009-11-24 22:17     ` Frederic Weisbecker
@ 2009-11-24 22:26       ` Steven Rostedt
  2009-11-24 22:53         ` Frederic Weisbecker
  2009-11-25  8:47       ` Jiri Olsa
  1 sibling, 1 reply; 15+ messages in thread
From: Steven Rostedt @ 2009-11-24 22:26 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: Jiri Olsa, mingo, linux-kernel

On Tue, 2009-11-24 at 23:17 +0100, Frederic Weisbecker wrote:
> On Tue, Nov 24, 2009 at 10:14:36PM +0100, Frederic Weisbecker wrote:
> > But I guess it doesn't happen because the function graph tracer
> > has too large entries, or it would do an endless loop while
> > reading the trace file.
> 
> 
> Ah actually it won't as we don't check the return value
> of trace_print_line() from the seq show handler.
> There is no retry.
> 
> And also there is no need to retry. The buffer gets never
> entirely filled in the trace file reading path, as it
> is flushed after each entry. And no single trace fills the
> whole buffer. I just made some tests and we never reach that
> point (no TRACE_TYPE_PARTIAL_LINE) returned. So no overflow
> as far as I tested (there might be rare corner cases).

As I expected ;-)

> 
> But it happens with trace_pipe reading.

Yep, but I think we should be able to handle it without modifying
trace_seq.

> 
> There is another bug. Here is a sample from
> trace reading:
> 
>  0)               |  sys_newfstat() {
>  0)               |    vfs_fstat() {
>  0)   1.187 us    |      fget();
>  0)               |      vfs_getattr() {
>  0)               |        security_inode_getattr() {
>  0)   0.608 us    |              }
>  0)   2.951 us    |            }
>  0) + 65.349 us   |          }
>  0)               |          path_put() {
>  0)   0.608 us    |            dput();
>  0)   0.548 us    |            mntput_no_expire();
>  0)   2.748 us    |          }
>  0) + 74.472 us   |        }
> 
> 
> I think we are loosing some traces here, between security_inode_getattr()
> and path_put().

Sure the buffer didn't just wrap there? Is this a trace_pipe output?

Also note, with the reader page of the ring buffer there could also be
another symptom. Once the writer is off the reader page, it will never
come back on it (unless a reader swaps where the writer is, but that
only happens on a lightly filled buffer).

Thus, even if we freeze recording, when the reader finishes its reader
page and goes off into the main buffer, there may be a gap between the
last read and the next (main buffer) due to the writer wrapping it.

If you are using the trace_pipe while the function tracer is on, you
will probably see this a lot.

> 
> I'm not sure why. The problem related by Olsa would explain such
> symptoms, and what olsa reported is a real bug, but only
> in trace_pipe. There is also something else, probably more
> related to the fast path.

Are you looking at this while the function_graph tracer is running? Or
do you disable tracing (tracing_on = 0)

-- Steve



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tracing - fix function graph trace to properly display failed entries
  2009-11-24 22:26       ` Steven Rostedt
@ 2009-11-24 22:53         ` Frederic Weisbecker
  2009-11-24 23:21           ` Steven Rostedt
  0 siblings, 1 reply; 15+ messages in thread
From: Frederic Weisbecker @ 2009-11-24 22:53 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Jiri Olsa, mingo, linux-kernel

On Tue, Nov 24, 2009 at 05:26:42PM -0500, Steven Rostedt wrote:
> > But it happens with trace_pipe reading.
> 
> Yep, but I think we should be able to handle it without modifying
> trace_seq.



Yeah. I guess now that the bug window is isolated to the
trace_pipe case, the fix will probably (hopefully) be easier.


 
> > 
> > There is another bug. Here is a sample from
> > trace reading:
> > 
> >  0)               |  sys_newfstat() {
> >  0)               |    vfs_fstat() {
> >  0)   1.187 us    |      fget();
> >  0)               |      vfs_getattr() {
> >  0)               |        security_inode_getattr() {
> >  0)   0.608 us    |              }
> >  0)   2.951 us    |            }
> >  0) + 65.349 us   |          }
> >  0)               |          path_put() {
> >  0)   0.608 us    |            dput();
> >  0)   0.548 us    |            mntput_no_expire();
> >  0)   2.748 us    |          }
> >  0) + 74.472 us   |        }
> > 
> > 
> > I think we are loosing some traces here, between security_inode_getattr()
> > and path_put().
> 
> Sure the buffer didn't just wrap there? Is this a trace_pipe output?


No it didn't wrapped, and it's trace output, not trace_pipe.


> Also note, with the reader page of the ring buffer there could also be
> another symptom. Once the writer is off the reader page, it will never
> come back on it (unless a reader swaps where the writer is, but that
> only happens on a lightly filled buffer).
> 
> Thus, even if we freeze recording, when the reader finishes its reader
> page and goes off into the main buffer, there may be a gap between the
> last read and the next (main buffer) due to the writer wrapping it.



You mean the writer could have eaten what we are supposed to read next?
Yeah that could explain the problem.



> If you are using the trace_pipe while the function tracer is on, you
> will probably see this a lot.


That's perhaps the problem. I was tracing while reading (too lazy
to echo 0 > tracing_on)


> > 
> > I'm not sure why. The problem related by Olsa would explain such
> > symptoms, and what olsa reported is a real bug, but only
> > in trace_pipe. There is also something else, probably more
> > related to the fast path.
> 
> Are you looking at this while the function_graph tracer is running? Or
> do you disable tracing (tracing_on = 0)


Yeah it is running.
But I just tried after setting tracing_on to 0 and I found that:


 0)               |          raw_notifier_call_chain() {
 0)               |            tick_notify() {
 0)               |              tick_broadcast_oneshot_control() {
 0)   2.072 us    |                _spin_lock_irqsave();
 0)               |                clockevents_set_mode() {
 0)               |                  lapic_timer_setup() {
 0)   0.976 us    |                    native_apic_mem_read();
 0)   2.223 us    |                    native_apic_mem_write();
 0)   0.946 us    |                    native_apic_mem_write();
 0)   7.959 us    |                  }
 0)   9.844 us    |                }
 0) + 46.074 us   |      }
 0) ! 172.995 us  |    }
 0)   1.178 us    |    menu_reflect();
 0) ! 190.040 us  |  }

The whole trace looks globally reliable but there is this little
corner case. Looks like we are loosing some return entries.

Or that:

 0)               |                clockevents_set_mode() {
 0)               |                  lapic_timer_setup() {
 0)               |                    __setup_APIC_LVTT() {
 0)   0.946 us    |                      native_apic_mem_write();
 0)   0.969 us    |                      native_apic_mem_read();
 0)               |                      native_apic_mem_write() {
 0)   6.487 us    |                    }
 0)   1.096 us    |                    }
 0)   0.946 us    |                    native_apic_mem_write();
 0)   6.781 us    |                  }
 0)   8.657 

This one looks more strange. As if the return trace of
native_apic_mem_write() had a wrong depth.

Sometimes also the depth seems to go too far:

 0)               |    acpi_idle_enter_simple() {
 0)   1.900 us    |                _spin_lock_irqsave();
 0)               |                clockevents_set_mode() {
 0)               |                  lapic_timer_setup() {
 0)               |                    __setup_APIC_LVTT() {
 0)   0.939 us    |                      native_apic_mem_write();
 0)   0.939 us    |                      native_apic_mem_read();
 0)   1.164 us    |                      native_apic_mem_write();
 0)   6.706 us    |                    }
 0)   8.725 us    |                  }
 0) + 10.737 us   |                }
 0)               |        


And to finish, a special gift :-)

 0)               |            tick_notify() {
 0)               |              tick_broadcast_oneshot_control() {
 0)   1.855 us    |                _spin_lock_irqsave();
 0)               |                clockevents_set_mode() {
 0)               |                  lapic_timer_setup() {
 0)               |                    __setup_APIC_LVTT() {
 0)   0.894 us    |                      native_apic_mem_write();
 0)   0.939 us    |                      native_apic_mem_read();
 0)   0.901 us    |                      native_apic_mem_write();
 0)   6.503 us    |                    }
 0)   8.463 us    |                  }
 0)   0.938 us    |                    }
 0)   6.652 us    |                  }
 0)   8.515 us    |                }
 0)   1.637 us    |                _spin_unlock_irqrestore();
 0) + 15.904 us   |              }
 0) + 17.78


I'll try to fix these issues...


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tracing - fix function graph trace to properly display failed entries
  2009-11-24 22:53         ` Frederic Weisbecker
@ 2009-11-24 23:21           ` Steven Rostedt
  0 siblings, 0 replies; 15+ messages in thread
From: Steven Rostedt @ 2009-11-24 23:21 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: Jiri Olsa, mingo, linux-kernel

On Tue, 2009-11-24 at 23:53 +0100, Frederic Weisbecker wrote:
> On Tue, Nov 24, 2009 at 05:26:42PM -0500, Steven Rostedt wrote:
> > > But it happens with trace_pipe reading.
> > 
> > Yep, but I think we should be able to handle it without modifying
> > trace_seq.
> 
> 
> 
> Yeah. I guess now that the bug window is isolated to the
> trace_pipe case, the fix will probably (hopefully) be easier.
> 

Hmm, there is something funky going on. I did the following:

# echo 1 > /debug/tracing/tracing_on
# echo function_graph > /debug/tracing/current_tracer
# sleep 10
# echo 0 > /debug/tracing/tracing_on
# cat /debug/tracing/per_cpu/cpu0/trace > /tmp/t1
# cat /debug/tracing/per_cpu/cpu0/trace_pipe > /tmp/t2
(wait)
Ctrl^C (seems to block for more input)


# diff -u /tmp/t1 /tmp/t2

This is some interesting results:

--- /tmp/t1     2009-11-24 18:11:03.000000000 -0500
+++ /tmp/t2     2009-11-24 18:12:20.000000000 -0500
@@ -1,7 +1,3 @@
-# tracer: function_graph
-#
-# CPU  DURATION                  FUNCTION CALLS
-# |     |   |                     |   |   |   |
  0)   0.543 us    |                        }
  0)   0.670 us    |                        _spin_unlock_irqrestore();
  0)   8.741 us    |                      }

( Just to get the idea, "-" is trace and "+" is trace_pipe )

@@ -84,7 +80,7 @@
  0)   <========== |
  0) ! 99918.45 us |  }
  0)               |  tick_nohz_restart_sched_tick() {
- 0)   0.570 us    |    tick_nohz_stop_idle();
+ 0)   0.570 us    |    }
  0)   0.733 us    |    rcu_exit_nohz();
  0)   0.612 us    |    select_nohz_load_balancer();
  0)   0.573 us    |    ktime_get();

trace_pipe lost the tick_nohz_stop_idle with '}'

@@ -272,7 +264,7 @@
  0)   0.663 us    |                  account_system_time();
  0)   1.939 us    |                }
  0)               |                run_local_timers() {
- 0)               |                  hrtimer_run_queues() {
+ 0)   0.615 us    |                  hrtimer_run_queues();
  0)   0.660 us    |                  raise_softirq();
  0)               |                  softlockup_tick() {
  0)   0.873 us    |                    __touch_softlockup_watchdog();

Now trace adds a '{' where it looks like trace_pipe was correct.

@@ -351,12 +343,12 @@
  0)   0.760 us    |                        _spin_unlock_irqrestore();
  0)   0.905 us    |                        _spin_lock_irqsave();
  0)   0.523 us    |                        __set_se_shares();
- 0)               |                        _spin_unlock_irqrestore() {
+ 0)   0.705 us    |                        _spin_unlock_irqrestore();
  0)   9.272 us    |                      }
  0)   0.573 us    |                      tg_nop();
  0)               |                      tg_shares_up() {
  0)   0.891 us    |                        _spin_lock_irqsave();
- 0)   0.530 us    |                        __set_se_shares();
+ 0)   0.530 us    |                        }
  0)   0.693 us    |                        _spin_unlock_irqrestore();
  0)   0.891 us    |                        _spin_lock_irqsave();
  0)   0.527 us    |                        __set_se_shares();

Again, trace adds the '{' and later trace_pipe loses
__set_se_shares(); ??

And this goes on and on.


So both seem to be a bit buggy. The above really should be almost
identical, besides the header.

-- Steve



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tracing - fix function graph trace to properly display failed entries
  2009-11-24 21:14   ` Frederic Weisbecker
  2009-11-24 22:17     ` Frederic Weisbecker
@ 2009-11-25  8:45     ` Jiri Olsa
  2009-11-25 16:21       ` Steven Rostedt
  2009-12-04 10:46       ` Jiri Olsa
  1 sibling, 2 replies; 15+ messages in thread
From: Jiri Olsa @ 2009-11-25  8:45 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: Steven Rostedt, mingo, linux-kernel

On Tue, Nov 24, 2009 at 10:14:36PM +0100, Frederic Weisbecker wrote:
> On Tue, Nov 24, 2009 at 12:14:05PM -0500, Steven Rostedt wrote:
> > On Tue, 2009-11-24 at 13:57 +0100, Jiri Olsa wrote:
> > > Hi,
> > > 
> > > there's a case where the graph tracer might get confused and omits
> > > display of a single record.  This applies for both destructive
> > > (trace_pipe) and nondestructive (trace) cases.
> > > 
> > > 
> > > <issue description>
> > > 
> > > The issue description for nondestructive case (trace) follows:
> > > 
> > > As the function_graph tracer goes through the trace entries (using "trace" file)
> > > it keeps pointer to the current record and the next one:
> > > 
> > > current ->  func1 ENTRY
> > >    next ->  func2 ENTRY
> > >             func2 RETURN
> > >             func1 RETURN
> > > 
> > > If it spots adjacent ENTRY and RETURN trace entries of the same function
> > > and pid, it displays the "func2();" trace
> > > 
> > >             func1 ENTRY
> > > current ->  func2 ENTRY 
> > >    next ->  func2 RETURN
> > >             func1 RETURN
> > > 
> > > and moves the next trace entry pointer behind the RETURN entry
> > > 
> > >             func1 ENTRY
> > > current ->  func2 ENTRY 
> > >             func2 RETURN
> > >    next ->  func1 RETURN
> > > 
> > > so the next record peek will skip the RETURN entry and continue with
> > > whatever is next.
> > > 
> > > It works ok but for one case. 
> > > 
> > > If the "func2()" trace does not make it to the seq_file read buffer, it needs
> > 
> > In the trace_pipe it may be possible to fill the trace_seq buffer if the
> > buffer passed into userspace is larger than the trace_seq buffer. But
> > the trace_seq buffer should never overflow on the "trace" case. If it
> > is, then there's probably another bug.
> 
> 
> 
> Hmm, yeah in trace_pipe case we repeat until we have no space
> left in trace_seq (TRACE_TYPE_PARTIAL_LINE), or until we
> the user buffer is filled.
> 
> But indeed in case of trace file, we are using a seq file
> so the buffer gets flushed after each entries.
> 
> Assuming the trace_seq is 4096 bytes long this is probably
> enough for every function graph entries (even with headers and
> interrupts), this is not something we are supposed to see in trace_seq.
> 
> I did not realized that when Jiri sent the first version of this patch.

sure, but it is not the trace_seq structure that overflows, it is the bare 
seq_file underneath. The s_show calls "trace_print_seq":

void trace_print_seq(struct seq_file *m, struct trace_seq *s)
{               
        int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
        
        seq_write(m, s->buffer, len);

        trace_seq_init(s);
}               

it flushes the trace_seq to the seq_file and reinits the trace_seq.
But the seq_file will get filled in the end, ending up with
not displayed output in that case I described in the first email.

> 
>  
> > 
> > > to be processed again in the next read.  And here comes the issue: 
> > > the next read will see following pointers setup for func2 processing:
> > > 
> > >             func1 ENTRY
> > > current ->  func2 ENTRY 
> > >             func2 RETURN
> > >    next ->  func1 RETURN
> > > 
> > > which will turn to displaying the func2 entry like: "func2() {", since the
> > > next entry is not RETURN of the same type.  Generaly it is whatever entry 
> > > that follows, but definitelly not the RETURN entry of the same function.
> > > 
> > > The destructive case (trace_pipe) suffers from the similar issue,
> > > although it keeps only the current pointer.
> > > 
> > > </issue description>
> > > 
> > > 
> > > The following patch propose generic solution for both cases.
> > > It keeps the last read entry/return in the tracer private
> > > iterator storage and keeps track of the failed output.
> > > Whenever the output fails, next read will output previous
> > > 'not displayed' entry before processing next entry.
> > 
> > Have you added tests to make sure that the trace_seq buffer is indeed
> > filling up?  In the trace_pipe, this could happen when the user buffer
> > gets full.

that should be in the trace_output.c part of the patch,
each display function set the 'failed' flag appropriatelly

> > 
> > What I think you are seeing, is a buffer overflow during a run. If the
> > trace buffer overflows, it will leave gaps in the trace. Those abandoned
> > leaf functions are probably a result of a trace buffer wrap.
> > 
> > I don't thing this is the proper solution to the problem.

I can see other solution, which would need to move the look ahead
skip of the record in the 'get_return_for_leaf' function.

jirka
> 
> 
> But I guess it doesn't happen because the function graph tracer
> has too large entries, or it would do an endless loop while
> reading the trace file.
> 
> It's more likely a bug somewhere in the trace_seq_* functions.
> 
> I'm going to have a look.
> 
> Thanks.
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tracing - fix function graph trace to properly display failed entries
  2009-11-24 22:17     ` Frederic Weisbecker
  2009-11-24 22:26       ` Steven Rostedt
@ 2009-11-25  8:47       ` Jiri Olsa
  1 sibling, 0 replies; 15+ messages in thread
From: Jiri Olsa @ 2009-11-25  8:47 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: Steven Rostedt, mingo, linux-kernel

On Tue, Nov 24, 2009 at 11:17:51PM +0100, Frederic Weisbecker wrote:
> On Tue, Nov 24, 2009 at 10:14:36PM +0100, Frederic Weisbecker wrote:
> > But I guess it doesn't happen because the function graph tracer
> > has too large entries, or it would do an endless loop while
> > reading the trace file.
> 
> 
> Ah actually it won't as we don't check the return value
> of trace_print_line() from the seq show handler.
> There is no retry.
> 
> And also there is no need to retry. The buffer gets never
> entirely filled in the trace file reading path, as it
> is flushed after each entry. And no single trace fills the
> whole buffer. I just made some tests and we never reach that
> point (no TRACE_TYPE_PARTIAL_LINE) returned. So no overflow
> as far as I tested (there might be rare corner cases).

well, as I said in previous email, it's the seq_file
that overflows and causes another read syscall...

> 
> But it happens with trace_pipe reading.
> 
> There is another bug. Here is a sample from
> trace reading:
> 
>  0)               |  sys_newfstat() {
>  0)               |    vfs_fstat() {
>  0)   1.187 us    |      fget();
>  0)               |      vfs_getattr() {
>  0)               |        security_inode_getattr() {
>  0)   0.608 us    |              }
>  0)   2.951 us    |            }
>  0) + 65.349 us   |          }
>  0)               |          path_put() {
>  0)   0.608 us    |            dput();
>  0)   0.548 us    |            mntput_no_expire();
>  0)   2.748 us    |          }
>  0) + 74.472 us   |        }
> 
> 
> I think we are loosing some traces here, between security_inode_getattr()
> and path_put().
> 
> I'm not sure why. The problem related by Olsa would explain such
> symptoms, and what olsa reported is a real bug, but only
> in trace_pipe. There is also something else, probably more
> related to the fast path.
> 
please call me jirka :) can you see the issue with the patch applied?
if not I believe it's the issue I described

jirka

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tracing - fix function graph trace to properly display failed entries
  2009-11-25  8:45     ` Jiri Olsa
@ 2009-11-25 16:21       ` Steven Rostedt
  2009-12-04 10:46       ` Jiri Olsa
  1 sibling, 0 replies; 15+ messages in thread
From: Steven Rostedt @ 2009-11-25 16:21 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: Frederic Weisbecker, mingo, linux-kernel

On Wed, 2009-11-25 at 09:45 +0100, Jiri Olsa wrote:
> On Tue, Nov 24, 2009 at 10:14:36PM +0100, Frederic Weisbecker wrote:

> > 
> > Hmm, yeah in trace_pipe case we repeat until we have no space
> > left in trace_seq (TRACE_TYPE_PARTIAL_LINE), or until we
> > the user buffer is filled.
> > 
> > But indeed in case of trace file, we are using a seq file
> > so the buffer gets flushed after each entries.
> > 
> > Assuming the trace_seq is 4096 bytes long this is probably
> > enough for every function graph entries (even with headers and
> > interrupts), this is not something we are supposed to see in trace_seq.
> > 
> > I did not realized that when Jiri sent the first version of this patch.
> 
> sure, but it is not the trace_seq structure that overflows, it is the bare 
> seq_file underneath. The s_show calls "trace_print_seq":
> 
> void trace_print_seq(struct seq_file *m, struct trace_seq *s)
> {               
>         int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
>         
>         seq_write(m, s->buffer, len);
> 
>         trace_seq_init(s);
> }               
> 
> it flushes the trace_seq to the seq_file and reinits the trace_seq.
> But the seq_file will get filled in the end, ending up with
> not displayed output in that case I described in the first email.
> 

First note, the filling of the trace_seq is only an issue with
trace_pipe. The trace file never has this issue because it uses the
trace_seq once per event, and there exists no event that is bigger than
a page.


The trace_pipe fills as much as it can because it is doing it live and
wants to be as fast as possible, and it is passing information between
user and kernel. Thus it will try to pass as much to userspace as
possible.


Now any issue with the trace file is not caused by trace_seq. But lets
look at what is happening with trace_pipe.

In tracing_read_pipe, we do the following:

	len = iter->seq.len;

	ret = print_trace_line(iter);
	if (ret == TRACE_TYPE_PARTIAL_LINE) {
		iter->seq.len = len;  /* this removes any partial write */
		break;
	}

	if (ret != TRACE_TYPE_NO_CONSUME)
		trace_consume(iter);

	if (iter->seq.len >= cnt)
		break;


The above is contained in a loop. It stops when either there are no more
events, the trace_seq buffer is full or we fill the users buffer.

The print_trace_line is what eventually goes into the function graph
tracer calling print_graph_function.

The call get_return_for_leaf figures out if this is a leaf function or
not.

	ring_iter = iter->buffer_iter[iter->cpu];

	if (ring_iter)
		event = ring_buffer_iter_peak(ring_iter, NULL);
	else {
		ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
		event = ring_buffer_peak(iter->tr->buffer, iter->cpu,
					NULL);
	}

ring_iter is NULL on trace_pipe, so we consume the event. If the next
item is not a graph item, it will be consumed in the loop described
above and lost.

In the case of print_graph_entry_nested, where we don't want to discard
the consumed event, on success it returns: TRACE_TYPE_NO_CONSUME, so
that the event does not get consumed.

But if the buffer fills up, and we exit with TRACE_TYPE_PARTIAL_LINE, we
lost the current event :-(  That is, it was consumed and not read. The
next read by userspace that calls the find_next_entry_inc() will not
have the correct entry.

Looking at this deeper, I'm not sure the find_next_entry_inc is correct
here. If we break out of the loop by filling up the seq buffer, we lose
that event all together. I'll look deeper into this.

Thanks,

-- Steve



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tracing - fix function graph trace to properly display failed entries
  2009-11-25  8:45     ` Jiri Olsa
  2009-11-25 16:21       ` Steven Rostedt
@ 2009-12-04 10:46       ` Jiri Olsa
  2009-12-07 20:21         ` Frederic Weisbecker
  1 sibling, 1 reply; 15+ messages in thread
From: Jiri Olsa @ 2009-12-04 10:46 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: Steven Rostedt, mingo, linux-kernel

On Wed, Nov 25, 2009 at 09:45:25AM +0100, Jiri Olsa wrote:
> On Tue, Nov 24, 2009 at 10:14:36PM +0100, Frederic Weisbecker wrote:
> > On Tue, Nov 24, 2009 at 12:14:05PM -0500, Steven Rostedt wrote:
> > > On Tue, 2009-11-24 at 13:57 +0100, Jiri Olsa wrote:
> > > > Hi,
> > > > 
> > > > there's a case where the graph tracer might get confused and omits
> > > > display of a single record.  This applies for both destructive
> > > > (trace_pipe) and nondestructive (trace) cases.
> > > > 
> > > > 
> > > > <issue description>
> > > > 
> > > > The issue description for nondestructive case (trace) follows:
> > > > 
> > > > As the function_graph tracer goes through the trace entries (using "trace" file)
> > > > it keeps pointer to the current record and the next one:
> > > > 
> > > > current ->  func1 ENTRY
> > > >    next ->  func2 ENTRY
> > > >             func2 RETURN
> > > >             func1 RETURN
> > > > 
> > > > If it spots adjacent ENTRY and RETURN trace entries of the same function
> > > > and pid, it displays the "func2();" trace
> > > > 
> > > >             func1 ENTRY
> > > > current ->  func2 ENTRY 
> > > >    next ->  func2 RETURN
> > > >             func1 RETURN
> > > > 
> > > > and moves the next trace entry pointer behind the RETURN entry
> > > > 
> > > >             func1 ENTRY
> > > > current ->  func2 ENTRY 
> > > >             func2 RETURN
> > > >    next ->  func1 RETURN
> > > > 
> > > > so the next record peek will skip the RETURN entry and continue with
> > > > whatever is next.
> > > > 
> > > > It works ok but for one case. 
> > > > 
> > > > If the "func2()" trace does not make it to the seq_file read buffer, it needs
> > > 
> > > In the trace_pipe it may be possible to fill the trace_seq buffer if the
> > > buffer passed into userspace is larger than the trace_seq buffer. But
> > > the trace_seq buffer should never overflow on the "trace" case. If it
> > > is, then there's probably another bug.
> > 
> > 
> > 
> > Hmm, yeah in trace_pipe case we repeat until we have no space
> > left in trace_seq (TRACE_TYPE_PARTIAL_LINE), or until we
> > the user buffer is filled.
> > 
> > But indeed in case of trace file, we are using a seq file
> > so the buffer gets flushed after each entries.
> > 
> > Assuming the trace_seq is 4096 bytes long this is probably
> > enough for every function graph entries (even with headers and
> > interrupts), this is not something we are supposed to see in trace_seq.
> > 
> > I did not realized that when Jiri sent the first version of this patch.
> 
> sure, but it is not the trace_seq structure that overflows, it is the bare 
> seq_file underneath. The s_show calls "trace_print_seq":
> 
> void trace_print_seq(struct seq_file *m, struct trace_seq *s)
> {               
>         int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
>         
>         seq_write(m, s->buffer, len);
> 
>         trace_seq_init(s);
> }               
> 
> it flushes the trace_seq to the seq_file and reinits the trace_seq.
> But the seq_file will get filled in the end, ending up with
> not displayed output in that case I described in the first email.
> 

hi, any feedback on this?

thanks,
jirka


> > 
> >  
> > > 
> > > > to be processed again in the next read.  And here comes the issue: 
> > > > the next read will see following pointers setup for func2 processing:
> > > > 
> > > >             func1 ENTRY
> > > > current ->  func2 ENTRY 
> > > >             func2 RETURN
> > > >    next ->  func1 RETURN
> > > > 
> > > > which will turn to displaying the func2 entry like: "func2() {", since the
> > > > next entry is not RETURN of the same type.  Generaly it is whatever entry 
> > > > that follows, but definitelly not the RETURN entry of the same function.
> > > > 
> > > > The destructive case (trace_pipe) suffers from the similar issue,
> > > > although it keeps only the current pointer.
> > > > 
> > > > </issue description>
> > > > 
> > > > 
> > > > The following patch propose generic solution for both cases.
> > > > It keeps the last read entry/return in the tracer private
> > > > iterator storage and keeps track of the failed output.
> > > > Whenever the output fails, next read will output previous
> > > > 'not displayed' entry before processing next entry.
> > > 
> > > Have you added tests to make sure that the trace_seq buffer is indeed
> > > filling up?  In the trace_pipe, this could happen when the user buffer
> > > gets full.
> 
> that should be in the trace_output.c part of the patch,
> each display function set the 'failed' flag appropriatelly
> 
> > > 
> > > What I think you are seeing, is a buffer overflow during a run. If the
> > > trace buffer overflows, it will leave gaps in the trace. Those abandoned
> > > leaf functions are probably a result of a trace buffer wrap.
> > > 
> > > I don't thing this is the proper solution to the problem.
> 
> I can see other solution, which would need to move the look ahead
> skip of the record in the 'get_return_for_leaf' function.
> 
> jirka
> > 
> > 
> > But I guess it doesn't happen because the function graph tracer
> > has too large entries, or it would do an endless loop while
> > reading the trace file.
> > 
> > It's more likely a bug somewhere in the trace_seq_* functions.
> > 
> > I'm going to have a look.
> > 
> > Thanks.
> > 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tracing - fix function graph trace to properly display failed entries
  2009-12-04 10:46       ` Jiri Olsa
@ 2009-12-07 20:21         ` Frederic Weisbecker
  2009-12-07 20:26           ` Steven Rostedt
  0 siblings, 1 reply; 15+ messages in thread
From: Frederic Weisbecker @ 2009-12-07 20:21 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: Steven Rostedt, mingo, linux-kernel

On Fri, Dec 04, 2009 at 11:46:16AM +0100, Jiri Olsa wrote:
> > sure, but it is not the trace_seq structure that overflows, it is the bare 
> > seq_file underneath. The s_show calls "trace_print_seq":
> > 
> > void trace_print_seq(struct seq_file *m, struct trace_seq *s)
> > {               
> >         int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
> >         
> >         seq_write(m, s->buffer, len);
> > 
> >         trace_seq_init(s);
> > }               
> > 
> > it flushes the trace_seq to the seq_file and reinits the trace_seq.
> > But the seq_file will get filled in the end, ending up with
> > not displayed output in that case I described in the first email.
> > 
> 
> hi, any feedback on this?
> 
> thanks,
> jirka


Steven has done some tests and it looks like you were right, the
problem comes from the seq_file, not the trace_seq.

Steve, any news about it?

Thanks.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tracing - fix function graph trace to properly display failed entries
  2009-12-07 20:21         ` Frederic Weisbecker
@ 2009-12-07 20:26           ` Steven Rostedt
  2009-12-07 20:33             ` Jiri Olsa
  0 siblings, 1 reply; 15+ messages in thread
From: Steven Rostedt @ 2009-12-07 20:26 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: Jiri Olsa, mingo, linux-kernel

On Mon, 2009-12-07 at 21:21 +0100, Frederic Weisbecker wrote:

> Steven has done some tests and it looks like you were right, the
> problem comes from the seq_file, not the trace_seq.
> 
> Steve, any news about it?

Hehe, I got this message a second after I posted the series to fix the
issue.

I had it done last week, but I needed to finish testing today before
sending it.

-- Steve



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tracing - fix function graph trace to properly display failed entries
  2009-12-07 20:26           ` Steven Rostedt
@ 2009-12-07 20:33             ` Jiri Olsa
  0 siblings, 0 replies; 15+ messages in thread
From: Jiri Olsa @ 2009-12-07 20:33 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Frederic Weisbecker, mingo, linux-kernel

On Mon, Dec 07, 2009 at 03:26:25PM -0500, Steven Rostedt wrote:
> On Mon, 2009-12-07 at 21:21 +0100, Frederic Weisbecker wrote:
> 
> > Steven has done some tests and it looks like you were right, the
> > problem comes from the seq_file, not the trace_seq.
> > 
> > Steve, any news about it?
> 
> Hehe, I got this message a second after I posted the series to fix the
> issue.
> 
> I had it done last week, but I needed to finish testing today before
> sending it.
> 
> -- Steve
> 
> 
cool :) I'll check it

thanks,
jirka

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [tip:tracing/core] tracing: Fix function graph trace_pipe to properly display failed entries
  2009-11-24 12:57 [PATCH] tracing - fix function graph trace to properly display failed entries Jiri Olsa
  2009-11-24 17:14 ` Steven Rostedt
@ 2009-12-10  7:49 ` tip-bot for Jiri Olsa
  1 sibling, 0 replies; 15+ messages in thread
From: tip-bot for Jiri Olsa @ 2009-12-10  7:49 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, fweisbec, rostedt, tglx, jolsa

Commit-ID:  be1eca39319689aed7d3aedb9c3bece9469fe10f
Gitweb:     http://git.kernel.org/tip/be1eca39319689aed7d3aedb9c3bece9469fe10f
Author:     Jiri Olsa <jolsa@redhat.com>
AuthorDate: Tue, 24 Nov 2009 13:57:38 +0100
Committer:  Steven Rostedt <rostedt@goodmis.org>
CommitDate: Wed, 9 Dec 2009 14:09:06 -0500

tracing: Fix function graph trace_pipe to properly display failed entries

There is a case where the graph tracer might get confused and omits
displaying of a single record.  This applies mostly with the trace_pipe
since it is unlikely that the trace_seq buffer will overflow with the
trace file.

As the function_graph tracer goes through the trace entries keeping a
pointer to the current record:

current ->  func1 ENTRY
            func2 ENTRY
            func2 RETURN
            func1 RETURN

When an function ENTRY is encountered, it moves the pointer to the
next entry to check if the function is a nested or leaf function.

            func1 ENTRY
current ->  func2 ENTRY
            func2 RETURN
            func1 RETURN

If the rest of the writing of the function fills the trace_seq buffer,
then the trace_pipe read will ignore this entry. The next read will
Now start at the current location, but the first entry (func1) will
be discarded.

This patch keeps a copy of the current entry in the iterator private
storage and will keep track of when the trace_seq buffer fills. When
the trace_seq buffer fills, it will reuse the copy of the entry in the
next iteration.

[
  This patch has been largely modified by Steven Rostedt in order to
  clean it up and simplify it. The original idea and concept was from
  Jirka and for that, this patch will go under his name to give him
  the credit he deserves. But because this was modify by Steven Rostedt
  anything wrong with the patch should be blamed on Steven.
]

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1259067458-27143-1-git-send-email-jolsa@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_functions_graph.c |  165 +++++++++++++++++++++++++++-------
 1 files changed, 131 insertions(+), 34 deletions(-)

diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 45e6c01..a43d009 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,9 +14,20 @@
 #include "trace.h"
 #include "trace_output.h"
 
-struct fgraph_data {
+struct fgraph_cpu_data {
 	pid_t		last_pid;
 	int		depth;
+	int		ignore;
+};
+
+struct fgraph_data {
+	struct fgraph_cpu_data		*cpu_data;
+
+	/* Place to preserve last processed entry. */
+	struct ftrace_graph_ent_entry	ent;
+	struct ftrace_graph_ret_entry	ret;
+	int				failed;
+	int				cpu;
 };
 
 #define TRACE_GRAPH_INDENT	2
@@ -384,7 +395,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
 	if (!data)
 		return TRACE_TYPE_HANDLED;
 
-	last_pid = &(per_cpu_ptr(data, cpu)->last_pid);
+	last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
 
 	if (*last_pid == pid)
 		return TRACE_TYPE_HANDLED;
@@ -435,26 +446,49 @@ static struct ftrace_graph_ret_entry *
 get_return_for_leaf(struct trace_iterator *iter,
 		struct ftrace_graph_ent_entry *curr)
 {
-	struct ring_buffer_iter *ring_iter;
+	struct fgraph_data *data = iter->private;
+	struct ring_buffer_iter *ring_iter = NULL;
 	struct ring_buffer_event *event;
 	struct ftrace_graph_ret_entry *next;
 
-	ring_iter = iter->buffer_iter[iter->cpu];
+	/*
+	 * If the previous output failed to write to the seq buffer,
+	 * then we just reuse the data from before.
+	 */
+	if (data && data->failed) {
+		curr = &data->ent;
+		next = &data->ret;
+	} else {
 
-	/* First peek to compare current entry and the next one */
-	if (ring_iter)
-		event = ring_buffer_iter_peek(ring_iter, NULL);
-	else {
-	/* We need to consume the current entry to see the next one */
-		ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
-		event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
-					NULL);
-	}
+		ring_iter = iter->buffer_iter[iter->cpu];
+
+		/* First peek to compare current entry and the next one */
+		if (ring_iter)
+			event = ring_buffer_iter_peek(ring_iter, NULL);
+		else {
+			/*
+			 * We need to consume the current entry to see
+			 * the next one.
+			 */
+			ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+			event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
+						 NULL);
+		}
 
-	if (!event)
-		return NULL;
+		if (!event)
+			return NULL;
+
+		next = ring_buffer_event_data(event);
 
-	next = ring_buffer_event_data(event);
+		if (data) {
+			/*
+			 * Save current and next entries for later reference
+			 * if the output fails.
+			 */
+			data->ent = *curr;
+			data->ret = *next;
+		}
+	}
 
 	if (next->ent.type != TRACE_GRAPH_RET)
 		return NULL;
@@ -640,7 +674,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 
 	if (data) {
 		int cpu = iter->cpu;
-		int *depth = &(per_cpu_ptr(data, cpu)->depth);
+		int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
 
 		/*
 		 * Comments display at + 1 to depth. Since
@@ -688,7 +722,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
 
 	if (data) {
 		int cpu = iter->cpu;
-		int *depth = &(per_cpu_ptr(data, cpu)->depth);
+		int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
 
 		*depth = call->depth;
 	}
@@ -782,19 +816,34 @@ static enum print_line_t
 print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
 			struct trace_iterator *iter)
 {
-	int cpu = iter->cpu;
+	struct fgraph_data *data = iter->private;
 	struct ftrace_graph_ent *call = &field->graph_ent;
 	struct ftrace_graph_ret_entry *leaf_ret;
+	static enum print_line_t ret;
+	int cpu = iter->cpu;
 
 	if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
 		return TRACE_TYPE_PARTIAL_LINE;
 
 	leaf_ret = get_return_for_leaf(iter, field);
 	if (leaf_ret)
-		return print_graph_entry_leaf(iter, field, leaf_ret, s);
+		ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
 	else
-		return print_graph_entry_nested(iter, field, s, cpu);
+		ret = print_graph_entry_nested(iter, field, s, cpu);
 
+	if (data) {
+		/*
+		 * If we failed to write our output, then we need to make
+		 * note of it. Because we already consumed our entry.
+		 */
+		if (s->full) {
+			data->failed = 1;
+			data->cpu = cpu;
+		} else
+			data->failed = 0;
+	}
+
+	return ret;
 }
 
 static enum print_line_t
@@ -810,7 +859,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 
 	if (data) {
 		int cpu = iter->cpu;
-		int *depth = &(per_cpu_ptr(data, cpu)->depth);
+		int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
 
 		/*
 		 * Comments display at + 1 to depth. This is the
@@ -873,7 +922,7 @@ print_graph_comment(struct trace_seq *s,  struct trace_entry *ent,
 	int i;
 
 	if (data)
-		depth = per_cpu_ptr(data, iter->cpu)->depth;
+		depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
 
 	if (print_graph_prologue(iter, s, 0, 0))
 		return TRACE_TYPE_PARTIAL_LINE;
@@ -941,8 +990,33 @@ print_graph_comment(struct trace_seq *s,  struct trace_entry *ent,
 enum print_line_t
 print_graph_function(struct trace_iterator *iter)
 {
+	struct ftrace_graph_ent_entry *field;
+	struct fgraph_data *data = iter->private;
 	struct trace_entry *entry = iter->ent;
 	struct trace_seq *s = &iter->seq;
+	int cpu = iter->cpu;
+	int ret;
+
+	if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
+		per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
+		return TRACE_TYPE_HANDLED;
+	}
+
+	/*
+	 * If the last output failed, there's a possibility we need
+	 * to print out the missing entry which would never go out.
+	 */
+	if (data && data->failed) {
+		field = &data->ent;
+		iter->cpu = data->cpu;
+		ret = print_graph_entry(field, s, iter);
+		if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
+			per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
+			ret = TRACE_TYPE_NO_CONSUME;
+		}
+		iter->cpu = cpu;
+		return ret;
+	}
 
 	switch (entry->type) {
 	case TRACE_GRAPH_ENT: {
@@ -952,7 +1026,7 @@ print_graph_function(struct trace_iterator *iter)
 		 * sizeof(struct ftrace_graph_ent_entry) is very small,
 		 * it can be safely saved at the stack.
 		 */
-		struct ftrace_graph_ent_entry *field, saved;
+		struct ftrace_graph_ent_entry saved;
 		trace_assign_type(field, entry);
 		saved = *field;
 		return print_graph_entry(&saved, s, iter);
@@ -1030,31 +1104,54 @@ static void print_graph_headers(struct seq_file *s)
 static void graph_trace_open(struct trace_iterator *iter)
 {
 	/* pid and depth on the last trace processed */
-	struct fgraph_data *data = alloc_percpu(struct fgraph_data);
+	struct fgraph_data *data;
 	int cpu;
 
+	iter->private = NULL;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
-		pr_warning("function graph tracer: not enough memory\n");
-	else
-		for_each_possible_cpu(cpu) {
-			pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid);
-			int *depth = &(per_cpu_ptr(data, cpu)->depth);
-			*pid = -1;
-			*depth = 0;
-		}
+		goto out_err;
+
+	data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
+	if (!data->cpu_data)
+		goto out_err_free;
+
+	for_each_possible_cpu(cpu) {
+		pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
+		int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
+		int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
+		*pid = -1;
+		*depth = 0;
+		*ignore = 0;
+	}
 
 	iter->private = data;
+
+	return;
+
+ out_err_free:
+	kfree(data);
+ out_err:
+	pr_warning("function graph tracer: not enough memory\n");
 }
 
 static void graph_trace_close(struct trace_iterator *iter)
 {
-	free_percpu(iter->private);
+	struct fgraph_data *data = iter->private;
+
+	if (data) {
+		free_percpu(data->cpu_data);
+		kfree(data);
+	}
 }
 
 static struct tracer graph_trace __read_mostly = {
 	.name		= "function_graph",
 	.open		= graph_trace_open,
+	.pipe_open	= graph_trace_open,
 	.close		= graph_trace_close,
+	.pipe_close	= graph_trace_close,
 	.wait_pipe	= poll_wait_pipe,
 	.init		= graph_trace_init,
 	.reset		= graph_trace_reset,

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2009-12-10  7:49 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-11-24 12:57 [PATCH] tracing - fix function graph trace to properly display failed entries Jiri Olsa
2009-11-24 17:14 ` Steven Rostedt
2009-11-24 21:14   ` Frederic Weisbecker
2009-11-24 22:17     ` Frederic Weisbecker
2009-11-24 22:26       ` Steven Rostedt
2009-11-24 22:53         ` Frederic Weisbecker
2009-11-24 23:21           ` Steven Rostedt
2009-11-25  8:47       ` Jiri Olsa
2009-11-25  8:45     ` Jiri Olsa
2009-11-25 16:21       ` Steven Rostedt
2009-12-04 10:46       ` Jiri Olsa
2009-12-07 20:21         ` Frederic Weisbecker
2009-12-07 20:26           ` Steven Rostedt
2009-12-07 20:33             ` Jiri Olsa
2009-12-10  7:49 ` [tip:tracing/core] tracing: Fix function graph trace_pipe " tip-bot for Jiri Olsa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.