All of lore.kernel.org
 help / color / mirror / Atom feed
From: tip-bot for Namhyung Kim <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: acme@redhat.com, linux-kernel@vger.kernel.org, paulus@samba.org,
	hpa@zytor.com, mingo@kernel.org, a.p.zijlstra@chello.nl,
	torvalds@linux-foundation.org, namhyung.kim@lge.com,
	namhyung@kernel.org, jolsa@redhat.com, fweisbec@gmail.com,
	tglx@linutronix.de
Subject: [tip:perf/core] perf callchain: Convert children list to rbtree
Date: Wed, 23 Oct 2013 00:54:48 -0700	[thread overview]
Message-ID: <tip-e369517ce5f796945c6af047b4e8b1d650e03458@git.kernel.org> (raw)
In-Reply-To: <1381468543-25334-2-git-send-email-namhyung@kernel.org>

Commit-ID:  e369517ce5f796945c6af047b4e8b1d650e03458
Gitweb:     http://git.kernel.org/tip/e369517ce5f796945c6af047b4e8b1d650e03458
Author:     Namhyung Kim <namhyung.kim@lge.com>
AuthorDate: Fri, 11 Oct 2013 14:15:36 +0900
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Mon, 21 Oct 2013 17:33:23 -0300

perf callchain: Convert children list to rbtree

Current collapse stage has a scalability problem which can be reproduced
easily with a parallel kernel build.

This is because it needs to traverse every children of callchains
linearly during the collapse/merge stage.

Converting it to a rbtree reduced the overhead significantly.

On my 400MB perf.data file which recorded with make -j32 kernel build:

  $ time perf --no-pager report --stdio > /dev/null

before:
  real	6m22.073s
  user	6m18.683s
  sys	0m0.706s

after:
  real	0m20.780s
  user	0m19.962s
  sys	0m0.689s

During the perf report the overhead on append_chain_children went down
from 96.69% to 18.16%:

  -  18.16%  perf  perf                [.] append_chain_children
     - append_chain_children
        - 77.48% append_chain_children
           + 69.79% merge_chain_branch
           - 22.96% append_chain_children
              + 67.44% merge_chain_branch
              + 30.15% append_chain_children
              + 2.41% callchain_append
           + 7.25% callchain_append
        + 12.26% callchain_append
        + 10.22% merge_chain_branch
  +  11.58%  perf  perf                [.] dso__find_symbol
  +   8.02%  perf  perf                [.] sort__comm_cmp
  +   5.48%  perf  libc-2.17.so        [.] malloc_consolidate

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1381468543-25334-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/callchain.c | 147 +++++++++++++++++++++++++++++++++-----------
 tools/perf/util/callchain.h |  11 ++--
 2 files changed, 116 insertions(+), 42 deletions(-)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 482f680..e3970e3 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -21,12 +21,6 @@
 
 __thread struct callchain_cursor callchain_cursor;
 
-#define chain_for_each_child(child, parent)	\
-	list_for_each_entry(child, &parent->children, siblings)
-
-#define chain_for_each_child_safe(child, next, parent)	\
-	list_for_each_entry_safe(child, next, &parent->children, siblings)
-
 static void
 rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
 		    enum chain_mode mode)
@@ -71,10 +65,16 @@ static void
 __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
 		  u64 min_hit)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 
-	chain_for_each_child(child, node)
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
 		__sort_chain_flat(rb_root, child, min_hit);
+	}
 
 	if (node->hit && node->hit >= min_hit)
 		rb_insert_callchain(rb_root, node, CHAIN_FLAT);
@@ -94,11 +94,16 @@ sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
 static void __sort_chain_graph_abs(struct callchain_node *node,
 				   u64 min_hit)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 
 	node->rb_root = RB_ROOT;
+	n = rb_first(&node->rb_root_in);
+
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
 
-	chain_for_each_child(child, node) {
 		__sort_chain_graph_abs(child, min_hit);
 		if (callchain_cumul_hits(child) >= min_hit)
 			rb_insert_callchain(&node->rb_root, child,
@@ -117,13 +122,18 @@ sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
 static void __sort_chain_graph_rel(struct callchain_node *node,
 				   double min_percent)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 	u64 min_hit;
 
 	node->rb_root = RB_ROOT;
 	min_hit = ceil(node->children_hit * min_percent);
 
-	chain_for_each_child(child, node) {
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
 		__sort_chain_graph_rel(child, min_percent);
 		if (callchain_cumul_hits(child) >= min_hit)
 			rb_insert_callchain(&node->rb_root, child,
@@ -173,19 +183,26 @@ create_child(struct callchain_node *parent, bool inherit_children)
 		return NULL;
 	}
 	new->parent = parent;
-	INIT_LIST_HEAD(&new->children);
 	INIT_LIST_HEAD(&new->val);
 
 	if (inherit_children) {
-		struct callchain_node *next;
+		struct rb_node *n;
+		struct callchain_node *child;
+
+		new->rb_root_in = parent->rb_root_in;
+		parent->rb_root_in = RB_ROOT;
 
-		list_splice(&parent->children, &new->children);
-		INIT_LIST_HEAD(&parent->children);
+		n = rb_first(&new->rb_root_in);
+		while (n) {
+			child = rb_entry(n, struct callchain_node, rb_node_in);
+			child->parent = new;
+			n = rb_next(n);
+		}
 
-		chain_for_each_child(next, new)
-			next->parent = new;
+		/* make it the first child */
+		rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node);
+		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
 	}
-	list_add_tail(&new->siblings, &parent->children);
 
 	return new;
 }
@@ -223,7 +240,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 	}
 }
 
-static void
+static struct callchain_node *
 add_child(struct callchain_node *parent,
 	  struct callchain_cursor *cursor,
 	  u64 period)
@@ -235,6 +252,19 @@ add_child(struct callchain_node *parent,
 
 	new->children_hit = 0;
 	new->hit = period;
+	return new;
+}
+
+static s64 match_chain(struct callchain_cursor_node *node,
+		      struct callchain_list *cnode)
+{
+	struct symbol *sym = node->sym;
+
+	if (cnode->ms.sym && sym &&
+	    callchain_param.key == CCKEY_FUNCTION)
+		return cnode->ms.sym->start - sym->start;
+	else
+		return cnode->ip - node->ip;
 }
 
 /*
@@ -272,9 +302,33 @@ split_add_child(struct callchain_node *parent,
 
 	/* create a new child for the new branch if any */
 	if (idx_total < cursor->nr) {
+		struct callchain_node *first;
+		struct callchain_list *cnode;
+		struct callchain_cursor_node *node;
+		struct rb_node *p, **pp;
+
 		parent->hit = 0;
-		add_child(parent, cursor, period);
 		parent->children_hit += period;
+
+		node = callchain_cursor_current(cursor);
+		new = add_child(parent, cursor, period);
+
+		/*
+		 * This is second child since we moved parent's children
+		 * to new (first) child above.
+		 */
+		p = parent->rb_root_in.rb_node;
+		first = rb_entry(p, struct callchain_node, rb_node_in);
+		cnode = list_first_entry(&first->val, struct callchain_list,
+					 list);
+
+		if (match_chain(node, cnode) < 0)
+			pp = &p->rb_left;
+		else
+			pp = &p->rb_right;
+
+		rb_link_node(&new->rb_node_in, p, pp);
+		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
 	} else {
 		parent->hit = period;
 	}
@@ -291,16 +345,40 @@ append_chain_children(struct callchain_node *root,
 		      u64 period)
 {
 	struct callchain_node *rnode;
+	struct callchain_cursor_node *node;
+	struct rb_node **p = &root->rb_root_in.rb_node;
+	struct rb_node *parent = NULL;
+
+	node = callchain_cursor_current(cursor);
+	if (!node)
+		return;
 
 	/* lookup in childrens */
-	chain_for_each_child(rnode, root) {
-		unsigned int ret = append_chain(rnode, cursor, period);
+	while (*p) {
+		s64 ret;
+		struct callchain_list *cnode;
 
-		if (!ret)
+		parent = *p;
+		rnode = rb_entry(parent, struct callchain_node, rb_node_in);
+		cnode = list_first_entry(&rnode->val, struct callchain_list,
+					 list);
+
+		/* just check first entry */
+		ret = match_chain(node, cnode);
+		if (ret == 0) {
+			append_chain(rnode, cursor, period);
 			goto inc_children_hit;
+		}
+
+		if (ret < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
 	}
 	/* nothing in children, add to the current node */
-	add_child(root, cursor, period);
+	rnode = add_child(root, cursor, period);
+	rb_link_node(&rnode->rb_node_in, parent, p);
+	rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
 
 inc_children_hit:
 	root->children_hit += period;
@@ -325,28 +403,20 @@ append_chain(struct callchain_node *root,
 	 */
 	list_for_each_entry(cnode, &root->val, list) {
 		struct callchain_cursor_node *node;
-		struct symbol *sym;
 
 		node = callchain_cursor_current(cursor);
 		if (!node)
 			break;
 
-		sym = node->sym;
-
-		if (cnode->ms.sym && sym &&
-		    callchain_param.key == CCKEY_FUNCTION) {
-			if (cnode->ms.sym->start != sym->start)
-				break;
-		} else if (cnode->ip != node->ip)
+		if (match_chain(node, cnode) != 0)
 			break;
 
-		if (!found)
-			found = true;
+		found = true;
 
 		callchain_cursor_advance(cursor);
 	}
 
-	/* matches not, relay on the parent */
+	/* matches not, relay no the parent */
 	if (!found) {
 		cursor->curr = curr_snap;
 		cursor->pos = start;
@@ -395,8 +465,9 @@ merge_chain_branch(struct callchain_cursor *cursor,
 		   struct callchain_node *dst, struct callchain_node *src)
 {
 	struct callchain_cursor_node **old_last = cursor->last;
-	struct callchain_node *child, *next_child;
+	struct callchain_node *child;
 	struct callchain_list *list, *next_list;
+	struct rb_node *n;
 	int old_pos = cursor->nr;
 	int err = 0;
 
@@ -412,12 +483,16 @@ merge_chain_branch(struct callchain_cursor *cursor,
 		append_chain_children(dst, cursor, src->hit);
 	}
 
-	chain_for_each_child_safe(child, next_child, src) {
+	n = rb_first(&src->rb_root_in);
+	while (n) {
+		child = container_of(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+		rb_erase(&child->rb_node_in, &src->rb_root_in);
+
 		err = merge_chain_branch(cursor, dst, child);
 		if (err)
 			break;
 
-		list_del(&child->siblings);
 		free(child);
 	}
 
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 2b585bc..7bb3602 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -21,11 +21,11 @@ enum chain_order {
 
 struct callchain_node {
 	struct callchain_node	*parent;
-	struct list_head	siblings;
-	struct list_head	children;
 	struct list_head	val;
-	struct rb_node		rb_node; /* to sort nodes in an rbtree */
-	struct rb_root		rb_root; /* sorted tree of children */
+	struct rb_node		rb_node_in; /* to insert nodes in an rbtree */
+	struct rb_node		rb_node;    /* to sort nodes in an output tree */
+	struct rb_root		rb_root_in; /* input tree of children */
+	struct rb_root		rb_root;    /* sorted output tree of children */
 	unsigned int		val_nr;
 	u64			hit;
 	u64			children_hit;
@@ -86,13 +86,12 @@ extern __thread struct callchain_cursor callchain_cursor;
 
 static inline void callchain_init(struct callchain_root *root)
 {
-	INIT_LIST_HEAD(&root->node.siblings);
-	INIT_LIST_HEAD(&root->node.children);
 	INIT_LIST_HEAD(&root->node.val);
 
 	root->node.parent = NULL;
 	root->node.hit = 0;
 	root->node.children_hit = 0;
+	root->node.rb_root_in = RB_ROOT;
 	root->max_depth = 0;
 }
 

  reply	other threads:[~2013-10-23  7:55 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-11  5:15 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Namhyung Kim
2013-10-11  5:15 ` [PATCH 1/8] perf callchain: Convert children list to rbtree Namhyung Kim
2013-10-23  7:54   ` tip-bot for Namhyung Kim [this message]
2013-10-23 11:07     ` [tip:perf/core] " Frederic Weisbecker
2013-10-23 12:45       ` Arnaldo Carvalho de Melo
2013-10-11  5:15 ` [PATCH 2/8] perf ui/progress: Add new helper functions for progress bar Namhyung Kim
2013-10-21 18:13   ` Arnaldo Carvalho de Melo
2013-10-22 18:12     ` Namhyung Kim
2013-10-22 13:06       ` Arnaldo Carvalho de Melo
2013-10-11  5:15 ` [PATCH 3/8] perf tools: Show progress on histogram collapsing Namhyung Kim
2013-10-25 10:33   ` [tip:perf/core] " tip-bot for Namhyung Kim
2013-10-28 15:03   ` [tip:perf/urgent] perf script python: Fix mem leak due to missing Py_DECREFs on dict entries tip-bot for Joseph Schuchart
2013-10-11  5:15 ` [PATCH 4/8] perf tools: Use an accessor to read thread comm Namhyung Kim
2013-10-11  5:15 ` [PATCH 5/8] perf tools: Add time argument on comm setting Namhyung Kim
2013-10-11  5:15 ` [PATCH 6/8] perf tools: Add new comm infrastructure Namhyung Kim
2013-10-25 10:56   ` Frederic Weisbecker
2013-10-25 13:04     ` Arnaldo Carvalho de Melo
2013-10-25 15:33       ` David Ahern
2013-10-25 18:12         ` Frederic Weisbecker
2013-10-25 18:14           ` Arnaldo Carvalho de Melo
2013-10-25 18:19           ` David Ahern
2013-10-28  5:38             ` Namhyung Kim
2013-10-28  9:09               ` Frederic Weisbecker
2013-10-28  9:15                 ` Namhyung Kim
2013-10-28 10:12                   ` Frederic Weisbecker
2013-10-28 12:43                     ` Arnaldo Carvalho de Melo
2013-10-28 14:29                       ` Arnaldo Carvalho de Melo
2013-10-28 16:05                         ` Frederic Weisbecker
2013-10-28 17:01                           ` Arnaldo Carvalho de Melo
2013-10-28 17:48                             ` Arnaldo Carvalho de Melo
2013-10-29  9:20                               ` Frederic Weisbecker
2013-10-29 13:06                                 ` Arnaldo Carvalho de Melo
2013-10-11  5:15 ` [PATCH 7/8] perf tools: Compare hists comm by addresses Namhyung Kim
2013-11-04 20:19   ` [tip:perf/core] " tip-bot for Frederic Weisbecker
2013-10-11  5:15 ` [PATCH 8/8] perf tools: Get current comm instead of last one Namhyung Kim
2013-10-11  5:58 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Ingo Molnar
2013-10-11  7:34   ` Jiri Olsa
2013-10-11  8:24     ` Namhyung Kim
2013-10-11 12:59       ` Ingo Molnar
2013-10-11 13:04         ` Peter Zijlstra
2013-10-11 15:11     ` David Ahern
2013-10-11 15:20       ` David Ahern
2013-10-11 21:51         ` Andi Kleen
2013-10-11 22:04           ` David Ahern
2013-10-13 10:25             ` Jiri Olsa
2013-10-13 21:18               ` [RFC] perf record,top: Add callchain option into .perfconfig Jiri Olsa
2013-10-13 21:32                 ` Andi Kleen
2013-10-14  7:56               ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Ingo Molnar
2013-10-12 16:53         ` Ingo Molnar
2013-10-12 19:42           ` David Ahern
2013-10-13  5:23             ` Ingo Molnar
2013-10-25 19:09               ` RFP: Fixing "-ga -ag -g fp -g dwarf" was " Arnaldo Carvalho de Melo
2013-10-25 19:22                 ` David Ahern
2013-10-25 19:46                   ` Arnaldo Carvalho de Melo
2013-10-26 12:03                     ` Ingo Molnar
2013-10-26 12:35                       ` Jiri Olsa
2013-10-26 14:25                       ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
2013-10-26 14:25                         ` [PATCH 1/4] perf tools: Split -g and --call-graph for record command Jiri Olsa
2013-10-27 15:30                           ` David Ahern
2013-10-28 17:46                             ` Arnaldo Carvalho de Melo
2013-10-28 18:20                               ` David Ahern
2013-10-29  5:13                                 ` Namhyung Kim
2013-10-28  7:59                           ` Namhyung Kim
2013-10-29 10:18                             ` Jiri Olsa
2013-10-29 12:42                               ` Arnaldo Carvalho de Melo
2013-10-29  8:22                           ` [tip:perf/urgent] perf record: Split -g and --call-graph tip-bot for Jiri Olsa
2013-10-26 14:25                         ` [PATCH 2/4] perf tools: Split -G and --call-graph for top command Jiri Olsa
2013-10-27 15:34                           ` David Ahern
2013-10-28  8:06                             ` Namhyung Kim
2013-10-29  8:22                           ` [tip:perf/urgent] perf top: Split -G and --call-graph tip-bot for Jiri Olsa
2013-10-26 14:25                         ` [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig Jiri Olsa
2013-10-27 15:36                           ` David Ahern
2013-10-28  8:10                           ` Namhyung Kim
2013-10-29 10:18                             ` Jiri Olsa
2013-10-29 12:43                               ` Arnaldo Carvalho de Melo
2013-10-29 12:46                                 ` Ingo Molnar
2013-11-01 15:20                               ` Jiri Olsa
2013-10-26 14:25                         ` [PATCH 4/4] perf tools: Add readable output for callchain debug Jiri Olsa
2013-10-27 15:39                           ` David Ahern
2013-10-26 14:32                         ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
2013-10-27  6:56                           ` Ingo Molnar
2013-10-29 10:21                             ` Jiri Olsa
2013-10-29 10:25                               ` Ingo Molnar
2013-10-13 12:34 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Jiri Olsa
2013-10-14  1:06   ` Namhyung Kim
2013-10-14  4:50   ` Namhyung Kim
2013-10-14  8:01     ` Jiri Olsa
2013-10-14  8:41       ` Namhyung Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=tip-e369517ce5f796945c6af047b4e8b1d650e03458@git.kernel.org \
    --to=tipbot@zytor.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@redhat.com \
    --cc=fweisbec@gmail.com \
    --cc=hpa@zytor.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung.kim@lge.com \
    --cc=namhyung@kernel.org \
    --cc=paulus@samba.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.