All of lore.kernel.org
 help / color / mirror / Atom feed
* [GIT PULL] perf callchain improvement and fixes
@ 2010-08-22 21:15 Frederic Weisbecker
  2010-08-22 21:15 ` [PATCH 1/3] perf: Keep track of the max depth of a callchain Frederic Weisbecker
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Frederic Weisbecker @ 2010-08-22 21:15 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, Frederic Weisbecker, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Paul Mackerras, Christoph Hellwig

Ingo,

Please pull the perf/core branch that can be found at:

git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing.git
	perf/core

Unfortunately we were missing a lot of callchains in multithread
cases. And this was not a regression, it was rather the fact I
was missing a part of the histogram processing: the collapsing.

Anyway, this is eventually fixed. Thanks a lot to Chistoph Hellwig
who reported this.

Thanks,
	Frederic
---

Frederic Weisbecker (3):
      perf: Keep track of the max depth of a callchain
      perf: Rename append_callchain into callchain_append
      perf: Support for callchains merge


 tools/perf/builtin-report.c |    3 +-
 tools/perf/util/callchain.c |   98 ++++++++++++++++++++++++++++++++++---------
 tools/perf/util/callchain.h |   25 +++++++----
 tools/perf/util/hist.c      |    4 +-
 tools/perf/util/sort.h      |    2 +-
 5 files changed, 100 insertions(+), 32 deletions(-)

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] perf: Keep track of the max depth of a callchain
  2010-08-22 21:15 [GIT PULL] perf callchain improvement and fixes Frederic Weisbecker
@ 2010-08-22 21:15 ` Frederic Weisbecker
  2010-08-22 21:15 ` [PATCH 2/3] perf: Rename append_callchain into callchain_append Frederic Weisbecker
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Frederic Weisbecker @ 2010-08-22 21:15 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, Frederic Weisbecker, Ingo Molnar, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Paul Mackerras, Christoph Hellwig

In order to implement callchains collapsing, we need to keep
track of the maximum depth in a histogram tree of callchains.
This way we'll avoid allocating an arbitrary temporary buffer
size on callchain merge time.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Christoph Hellwig <hch@infradead.org>
---
 tools/perf/util/callchain.c |   23 +++++++++++++----------
 tools/perf/util/callchain.h |   22 ++++++++++++++--------
 tools/perf/util/hist.c      |    2 +-
 tools/perf/util/sort.h      |    2 +-
 4 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index f231f43..f0b23f3 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -86,10 +86,10 @@ __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
  * sort them by hit
  */
 static void
-sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
+sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
 		u64 min_hit, struct callchain_param *param __used)
 {
-	__sort_chain_flat(rb_root, node, min_hit);
+	__sort_chain_flat(rb_root, &root->node, min_hit);
 }
 
 static void __sort_chain_graph_abs(struct callchain_node *node,
@@ -108,11 +108,11 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
 }
 
 static void
-sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_node *chain_root,
+sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
 		     u64 min_hit, struct callchain_param *param __used)
 {
-	__sort_chain_graph_abs(chain_root, min_hit);
-	rb_root->rb_node = chain_root->rb_root.rb_node;
+	__sort_chain_graph_abs(&chain_root->node, min_hit);
+	rb_root->rb_node = chain_root->node.rb_root.rb_node;
 }
 
 static void __sort_chain_graph_rel(struct callchain_node *node,
@@ -133,11 +133,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
 }
 
 static void
-sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root,
+sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
 		     u64 min_hit __used, struct callchain_param *param)
 {
-	__sort_chain_graph_rel(chain_root, param->min_percent / 100.0);
-	rb_root->rb_node = chain_root->rb_root.rb_node;
+	__sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0);
+	rb_root->rb_node = chain_root->node.rb_root.rb_node;
 }
 
 int register_callchain_param(struct callchain_param *param)
@@ -380,7 +380,7 @@ static void filter_context(struct ip_callchain *old, struct resolved_chain *new,
 }
 
 
-int append_chain(struct callchain_node *root, struct ip_callchain *chain,
+int append_chain(struct callchain_root *root, struct ip_callchain *chain,
 		 struct map_symbol *syms, u64 period)
 {
 	struct resolved_chain *filtered;
@@ -398,7 +398,10 @@ int append_chain(struct callchain_node *root, struct ip_callchain *chain,
 	if (!filtered->nr)
 		goto end;
 
-	__append_chain_children(root, filtered, 0, period);
+	__append_chain_children(&root->node, filtered, 0, period);
+
+	if (filtered->nr > root->max_depth)
+		root->max_depth = filtered->nr;
 end:
 	free(filtered);
 
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 624a96c..9b93a38 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -26,9 +26,14 @@ struct callchain_node {
 	u64			children_hit;
 };
 
+struct callchain_root {
+	u64			max_depth;
+	struct callchain_node	node;
+};
+
 struct callchain_param;
 
-typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_node *,
+typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *,
 				 u64, struct callchain_param *);
 
 struct callchain_param {
@@ -44,14 +49,15 @@ struct callchain_list {
 	struct list_head	list;
 };
 
-static inline void callchain_init(struct callchain_node *node)
+static inline void callchain_init(struct callchain_root *root)
 {
-	INIT_LIST_HEAD(&node->brothers);
-	INIT_LIST_HEAD(&node->children);
-	INIT_LIST_HEAD(&node->val);
+	INIT_LIST_HEAD(&root->node.brothers);
+	INIT_LIST_HEAD(&root->node.children);
+	INIT_LIST_HEAD(&root->node.val);
 
-	node->parent = NULL;
-	node->hit = 0;
+	root->node.parent = NULL;
+	root->node.hit = 0;
+	root->max_depth = 0;
 }
 
 static inline u64 cumul_hits(struct callchain_node *node)
@@ -60,7 +66,7 @@ static inline u64 cumul_hits(struct callchain_node *node)
 }
 
 int register_callchain_param(struct callchain_param *param);
-int append_chain(struct callchain_node *root, struct ip_callchain *chain,
+int append_chain(struct callchain_root *root, struct ip_callchain *chain,
 		 struct map_symbol *syms, u64 period);
 
 bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event);
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index be22ae6..e77ff77 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -87,7 +87,7 @@ static void hist_entry__add_cpumode_period(struct hist_entry *self,
 
 static struct hist_entry *hist_entry__new(struct hist_entry *template)
 {
-	size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_node) : 0;
+	size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
 	struct hist_entry *self = malloc(sizeof(*self) + callchain_size);
 
 	if (self != NULL) {
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 46e531d..0b91053 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -70,7 +70,7 @@ struct hist_entry {
 		struct hist_entry *pair;
 		struct rb_root	  sorted_chain;
 	};
-	struct callchain_node	callchain[0];
+	struct callchain_root	callchain[0];
 };
 
 enum sort_type {
-- 
1.6.2.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] perf: Rename append_callchain into callchain_append
  2010-08-22 21:15 [GIT PULL] perf callchain improvement and fixes Frederic Weisbecker
  2010-08-22 21:15 ` [PATCH 1/3] perf: Keep track of the max depth of a callchain Frederic Weisbecker
@ 2010-08-22 21:15 ` Frederic Weisbecker
  2010-08-22 21:15 ` [PATCH 3/3] perf: Support for callchains merge Frederic Weisbecker
  2010-08-23  9:20 ` [GIT PULL] perf callchain improvement and fixes Ingo Molnar
  3 siblings, 0 replies; 5+ messages in thread
From: Frederic Weisbecker @ 2010-08-22 21:15 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, Frederic Weisbecker, Ingo Molnar, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Paul Mackerras, Christoph Hellwig

Do that to start a consistant callchain API namespace.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Christoph Hellwig <hch@infradead.org>
---
 tools/perf/builtin-report.c |    3 ++-
 tools/perf/util/callchain.c |   23 +++++++++++------------
 tools/perf/util/callchain.h |    4 ++--
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 7d35a71..5de405d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -107,7 +107,8 @@ static int perf_session__add_hist_entry(struct perf_session *self,
 		goto out_free_syms;
 	err = 0;
 	if (symbol_conf.use_callchain) {
-		err = append_chain(he->callchain, data->callchain, syms, data->period);
+		err = callchain_append(he->callchain, data->callchain, syms,
+				       data->period);
 		if (err)
 			goto out_free_syms;
 	}
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index f0b23f3..59cc0e1 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -284,19 +284,18 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
 }
 
 static int
-__append_chain(struct callchain_node *root, struct resolved_chain *chain,
-	       unsigned int start, u64 period);
+append_chain(struct callchain_node *root, struct resolved_chain *chain,
+	     unsigned int start, u64 period);
 
 static void
-__append_chain_children(struct callchain_node *root,
-			struct resolved_chain *chain,
-			unsigned int start, u64 period)
+append_chain_children(struct callchain_node *root, struct resolved_chain *chain,
+		      unsigned int start, u64 period)
 {
 	struct callchain_node *rnode;
 
 	/* lookup in childrens */
 	chain_for_each_child(rnode, root) {
-		unsigned int ret = __append_chain(rnode, chain, start, period);
+		unsigned int ret = append_chain(rnode, chain, start, period);
 
 		if (!ret)
 			goto inc_children_hit;
@@ -309,8 +308,8 @@ inc_children_hit:
 }
 
 static int
-__append_chain(struct callchain_node *root, struct resolved_chain *chain,
-	       unsigned int start, u64 period)
+append_chain(struct callchain_node *root, struct resolved_chain *chain,
+	     unsigned int start, u64 period)
 {
 	struct callchain_list *cnode;
 	unsigned int i = start;
@@ -357,7 +356,7 @@ __append_chain(struct callchain_node *root, struct resolved_chain *chain,
 	}
 
 	/* We match the node and still have a part remaining */
-	__append_chain_children(root, chain, i, period);
+	append_chain_children(root, chain, i, period);
 
 	return 0;
 }
@@ -380,8 +379,8 @@ static void filter_context(struct ip_callchain *old, struct resolved_chain *new,
 }
 
 
-int append_chain(struct callchain_root *root, struct ip_callchain *chain,
-		 struct map_symbol *syms, u64 period)
+int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
+		     struct map_symbol *syms, u64 period)
 {
 	struct resolved_chain *filtered;
 
@@ -398,7 +397,7 @@ int append_chain(struct callchain_root *root, struct ip_callchain *chain,
 	if (!filtered->nr)
 		goto end;
 
-	__append_chain_children(&root->node, filtered, 0, period);
+	append_chain_children(&root->node, filtered, 0, period);
 
 	if (filtered->nr > root->max_depth)
 		root->max_depth = filtered->nr;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 9b93a38..85b50fb 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -66,8 +66,8 @@ static inline u64 cumul_hits(struct callchain_node *node)
 }
 
 int register_callchain_param(struct callchain_param *param);
-int append_chain(struct callchain_root *root, struct ip_callchain *chain,
-		 struct map_symbol *syms, u64 period);
+int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
+		     struct map_symbol *syms, u64 period);
 
 bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event);
 #endif	/* __PERF_CALLCHAIN_H */
-- 
1.6.2.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] perf: Support for callchains merge
  2010-08-22 21:15 [GIT PULL] perf callchain improvement and fixes Frederic Weisbecker
  2010-08-22 21:15 ` [PATCH 1/3] perf: Keep track of the max depth of a callchain Frederic Weisbecker
  2010-08-22 21:15 ` [PATCH 2/3] perf: Rename append_callchain into callchain_append Frederic Weisbecker
@ 2010-08-22 21:15 ` Frederic Weisbecker
  2010-08-23  9:20 ` [GIT PULL] perf callchain improvement and fixes Ingo Molnar
  3 siblings, 0 replies; 5+ messages in thread
From: Frederic Weisbecker @ 2010-08-22 21:15 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: LKML, Frederic Weisbecker, Christoph Hellwig, Ingo Molnar,
	Peter Zijlstra, Arnaldo Carvalho de Melo, Paul Mackerras

If we sort the histograms by comm, which is the default,
we need to merge some of them, typically different thread
histograms of a same process, or just same comm. But during
this merge, we forgot to merge callchains.

So imagine we have three threads (tids: 1000, 1001, 1002) that
belong to comm "foo".

tid 1000 got 100 events
tid 1001 got 10 events
tid 1002 got 3 events

Once we merge these histograms to get a per comm result, we'll
finally get:

"foo" got 113 events

The problem is if we merge 1000 and 1001 histograms into 1002, then
the end merge result, wrt callchains, will be only callchains that
belong to 1002.
This is because we haven't handled callchains in the merge. Only those
from one of the threads inside a common comm survive.

It means during this merge, we can lose a lot of callchains.

Fix this by implementing callchains merge and apply it on histograms
that collapse.

Reported-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
---
 tools/perf/util/callchain.c |   56 +++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/callchain.h |    1 +
 tools/perf/util/hist.c      |    2 +
 3 files changed, 59 insertions(+), 0 deletions(-)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 59cc0e1..e12d539 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -28,6 +28,9 @@ bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event)
 #define chain_for_each_child(child, parent)	\
 	list_for_each_entry(child, &parent->children, brothers)
 
+#define chain_for_each_child_safe(child, next, parent)	\
+	list_for_each_entry_safe(child, next, &parent->children, brothers)
+
 static void
 rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
 		    enum chain_mode mode)
@@ -406,3 +409,56 @@ end:
 
 	return 0;
 }
+
+static int
+merge_chain_branch(struct callchain_node *dst, struct callchain_node *src,
+		   struct resolved_chain *chain)
+{
+	struct callchain_node *child, *next_child;
+	struct callchain_list *list, *next_list;
+	int old_pos = chain->nr;
+	int err = 0;
+
+	list_for_each_entry_safe(list, next_list, &src->val, list) {
+		chain->ips[chain->nr].ip = list->ip;
+		chain->ips[chain->nr].ms = list->ms;
+		chain->nr++;
+		list_del(&list->list);
+		free(list);
+	}
+
+	if (src->hit)
+		append_chain_children(dst, chain, 0, src->hit);
+
+	chain_for_each_child_safe(child, next_child, src) {
+		err = merge_chain_branch(dst, child, chain);
+		if (err)
+			break;
+
+		list_del(&child->brothers);
+		free(child);
+	}
+
+	chain->nr = old_pos;
+
+	return err;
+}
+
+int callchain_merge(struct callchain_root *dst, struct callchain_root *src)
+{
+	struct resolved_chain *chain;
+	int err;
+
+	chain = malloc(sizeof(*chain) +
+		       src->max_depth * sizeof(struct resolved_ip));
+	if (!chain)
+		return -ENOMEM;
+
+	chain->nr = 0;
+
+	err = merge_chain_branch(&dst->node, &src->node, chain);
+
+	free(chain);
+
+	return err;
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 85b50fb..51a8f2b 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -68,6 +68,7 @@ static inline u64 cumul_hits(struct callchain_node *node)
 int register_callchain_param(struct callchain_param *param);
 int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
 		     struct map_symbol *syms, u64 period);
+int callchain_merge(struct callchain_root *dst, struct callchain_root *src);
 
 bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event);
 #endif	/* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index e77ff77..2022e87 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -226,6 +226,8 @@ static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he)
 
 		if (!cmp) {
 			iter->period += he->period;
+			if (symbol_conf.use_callchain)
+				callchain_merge(iter->callchain, he->callchain);
 			hist_entry__free(he);
 			return false;
 		}
-- 
1.6.2.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [GIT PULL] perf callchain improvement and fixes
  2010-08-22 21:15 [GIT PULL] perf callchain improvement and fixes Frederic Weisbecker
                   ` (2 preceding siblings ...)
  2010-08-22 21:15 ` [PATCH 3/3] perf: Support for callchains merge Frederic Weisbecker
@ 2010-08-23  9:20 ` Ingo Molnar
  3 siblings, 0 replies; 5+ messages in thread
From: Ingo Molnar @ 2010-08-23  9:20 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: LKML, Peter Zijlstra, Arnaldo Carvalho de Melo, Paul Mackerras,
	Christoph Hellwig


* Frederic Weisbecker <fweisbec@gmail.com> wrote:

> Ingo,
> 
> Please pull the perf/core branch that can be found at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing.git
> 	perf/core
> 
> Unfortunately we were missing a lot of callchains in multithread
> cases. And this was not a regression, it was rather the fact I
> was missing a part of the histogram processing: the collapsing.
> 
> Anyway, this is eventually fixed. Thanks a lot to Chistoph Hellwig
> who reported this.
> 
> Thanks,
> 	Frederic
> ---
> 
> Frederic Weisbecker (3):
>       perf: Keep track of the max depth of a callchain
>       perf: Rename append_callchain into callchain_append
>       perf: Support for callchains merge
> 
> 
>  tools/perf/builtin-report.c |    3 +-
>  tools/perf/util/callchain.c |   98 ++++++++++++++++++++++++++++++++++---------
>  tools/perf/util/callchain.h |   25 +++++++----
>  tools/perf/util/hist.c      |    4 +-
>  tools/perf/util/sort.h      |    2 +-
>  5 files changed, 100 insertions(+), 32 deletions(-)

Pulled, thanks a lot!

	Ingo

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-08-23  9:20 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-08-22 21:15 [GIT PULL] perf callchain improvement and fixes Frederic Weisbecker
2010-08-22 21:15 ` [PATCH 1/3] perf: Keep track of the max depth of a callchain Frederic Weisbecker
2010-08-22 21:15 ` [PATCH 2/3] perf: Rename append_callchain into callchain_append Frederic Weisbecker
2010-08-22 21:15 ` [PATCH 3/3] perf: Support for callchains merge Frederic Weisbecker
2010-08-23  9:20 ` [GIT PULL] perf callchain improvement and fixes Ingo Molnar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.