All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v6 0/2] perf tool: improves DSO long names search speed with rbt
@ 2014-09-30 17:36 Waiman Long
  2014-09-30 17:36 ` [PATCH v6 1/2] perf tool: encapsulate dsos list head into struct dsos Waiman Long
  2014-09-30 17:36 ` [PATCH v6 2/2] perf tool: improves DSO long names lookup speed with rbtree Waiman Long
  0 siblings, 2 replies; 4+ messages in thread
From: Waiman Long @ 2014-09-30 17:36 UTC (permalink / raw)
  To: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo
  Cc: linux-kernel, Scott J Norton, Douglas Hatch, Don Zickus,
	Jiri Olsa, Adrian Hunter, Namhyung Kim, Waiman Long

v5->v6:
  - Change patch 2 to move type casting to dso__find_by_longname() and
    compare short names if long names match in dso__findlink_by_longname().

v4->v5:
  - Remove the unneeded dsos field from the DSO structure.
  - Add check in dso__delete() to make sure that the DSO isn't in
    a rbtree.
  - Revise the dso__findlink_by_longname() function to get rid of
    pointer arithmetic.

v3->v4:
  - As suggested by Arnaldo, keep the DSO linked list for iteration
    purpose and create a new dsos structure to host the dual list
    head and rbtree root for DSOs inside the machine structure.

v2->v3:
  - Move the rbtree linking operation from dso__set_long_name() to
    dsos__add(), where the list_add() operation was done.
  - Add a second patch to remove the linked list and iterates the
    DSO structures by going through them in the rbtree. This requires
    changes in quite a number of files, but it makes for neater code.
  - Rebased to the 3.17-rc5 kernel.

v1->v2:
 - Rename DSO longname RBtree find function to segregate its two
   different uses of searching and linking DSO into RB tree.

This patch set adds a rbtree for linking the DSO structures of the
perf tool sorted by their long names in additional to the linked list
which is of no specific order. The list enables fast iterations of
the DSOs and the rbtree root allows fast lookup by long name. The
latter use can significantly speed up DSO processing when a large
number of DSOs are beining profiled.

Waiman Long (2):
  perf tool: encapsulate dsos list head into struct dsos
  perf tool: improves DSO long names lookup speed with rbtree

 tools/perf/util/dso.c         |   85 +++++++++++++++++++++++++++++++++++-----
 tools/perf/util/dso.h         |    9 +++-
 tools/perf/util/header.c      |   32 +++++++++-------
 tools/perf/util/machine.c     |   25 ++++++------
 tools/perf/util/machine.h     |   13 +++++-
 tools/perf/util/probe-event.c |    3 +-
 tools/perf/util/symbol-elf.c  |    7 +++-
 7 files changed, 130 insertions(+), 44 deletions(-)


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v6 1/2] perf tool: encapsulate dsos list head into struct dsos
  2014-09-30 17:36 [PATCH v6 0/2] perf tool: improves DSO long names search speed with rbt Waiman Long
@ 2014-09-30 17:36 ` Waiman Long
  2014-09-30 17:36 ` [PATCH v6 2/2] perf tool: improves DSO long names lookup speed with rbtree Waiman Long
  1 sibling, 0 replies; 4+ messages in thread
From: Waiman Long @ 2014-09-30 17:36 UTC (permalink / raw)
  To: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo
  Cc: linux-kernel, Scott J Norton, Douglas Hatch, Don Zickus,
	Jiri Olsa, Adrian Hunter, Namhyung Kim, Waiman Long

This is a precursor patch to enable long name searching of DSOs
using the rbtree. In this patch, a new dsos structure is created
which contains only a list head structure for the moment. The new
dsos structure is used, in turn, in the machine structure for the
user_dsos and kernel_dsos fields. Only the following 3 dsos functions
are modified to accept the new dsos structure parameter instead
of list_head:
 - dsos__add()
 - dsos__find()
 - __dsos__findnew()

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
---
 tools/perf/util/dso.c         |   17 +++++++++--------
 tools/perf/util/dso.h         |    8 +++++---
 tools/perf/util/header.c      |   32 ++++++++++++++++++--------------
 tools/perf/util/machine.c     |   24 ++++++++++++------------
 tools/perf/util/machine.h     |   11 +++++++++--
 tools/perf/util/probe-event.c |    3 ++-
 tools/perf/util/symbol-elf.c  |    7 ++++++-
 7 files changed, 61 insertions(+), 41 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 55e39dc..901a58f 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -851,35 +851,36 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
 	return have_build_id;
 }
 
-void dsos__add(struct list_head *head, struct dso *dso)
+void dsos__add(struct dsos *dsos, struct dso *dso)
 {
-	list_add_tail(&dso->node, head);
+	list_add_tail(&dso->node, &dsos->head);
 }
 
-struct dso *dsos__find(const struct list_head *head, const char *name, bool cmp_short)
+struct dso *dsos__find(const struct dsos *dsos, const char *name,
+		       bool cmp_short)
 {
 	struct dso *pos;
 
 	if (cmp_short) {
-		list_for_each_entry(pos, head, node)
+		list_for_each_entry(pos, &dsos->head, node)
 			if (strcmp(pos->short_name, name) == 0)
 				return pos;
 		return NULL;
 	}
-	list_for_each_entry(pos, head, node)
+	list_for_each_entry(pos, &dsos->head, node)
 		if (strcmp(pos->long_name, name) == 0)
 			return pos;
 	return NULL;
 }
 
-struct dso *__dsos__findnew(struct list_head *head, const char *name)
+struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
 {
-	struct dso *dso = dsos__find(head, name, false);
+	struct dso *dso = dsos__find(dsos, name, false);
 
 	if (!dso) {
 		dso = dso__new(name);
 		if (dso != NULL) {
-			dsos__add(head, dso);
+			dsos__add(dsos, dso);
 			dso__set_basename(dso);
 		}
 	}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 5e463c0..91eec17 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -90,6 +90,8 @@ struct dso_cache {
 	char data[0];
 };
 
+struct dsos;
+
 struct dso {
 	struct list_head node;
 	struct rb_root	 symbols[MAP__NR_TYPES];
@@ -224,10 +226,10 @@ struct map *dso__new_map(const char *name);
 struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
 				const char *short_name, int dso_type);
 
-void dsos__add(struct list_head *head, struct dso *dso);
-struct dso *dsos__find(const struct list_head *head, const char *name,
+void dsos__add(struct dsos *dsos, struct dso *dso);
+struct dso *dsos__find(const struct dsos *dsos, const char *name,
 		       bool cmp_short);
-struct dso *__dsos__findnew(struct list_head *head, const char *name);
+struct dso *__dsos__findnew(struct dsos *dsos, const char *name);
 bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
 
 size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 158c787..ce0de00 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -214,11 +214,11 @@ static int machine__hit_all_dsos(struct machine *machine)
 {
 	int err;
 
-	err = __dsos__hit_all(&machine->kernel_dsos);
+	err = __dsos__hit_all(&machine->kernel_dsos.head);
 	if (err)
 		return err;
 
-	return __dsos__hit_all(&machine->user_dsos);
+	return __dsos__hit_all(&machine->user_dsos.head);
 }
 
 int dsos__hit_all(struct perf_session *session)
@@ -288,11 +288,12 @@ static int machine__write_buildid_table(struct machine *machine, int fd)
 		umisc = PERF_RECORD_MISC_GUEST_USER;
 	}
 
-	err = __dsos__write_buildid_table(&machine->kernel_dsos, machine,
+	err = __dsos__write_buildid_table(&machine->kernel_dsos.head, machine,
 					  machine->pid, kmisc, fd);
 	if (err == 0)
-		err = __dsos__write_buildid_table(&machine->user_dsos, machine,
-						  machine->pid, umisc, fd);
+		err = __dsos__write_buildid_table(&machine->user_dsos.head,
+						  machine, machine->pid, umisc,
+						  fd);
 	return err;
 }
 
@@ -455,9 +456,10 @@ static int __dsos__cache_build_ids(struct list_head *head,
 
 static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
 {
-	int ret = __dsos__cache_build_ids(&machine->kernel_dsos, machine,
+	int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine,
 					  debugdir);
-	ret |= __dsos__cache_build_ids(&machine->user_dsos, machine, debugdir);
+	ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine,
+				       debugdir);
 	return ret;
 }
 
@@ -483,8 +485,10 @@ static int perf_session__cache_build_ids(struct perf_session *session)
 
 static bool machine__read_build_ids(struct machine *machine, bool with_hits)
 {
-	bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits);
-	ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits);
+	bool ret;
+
+	ret  = __dsos__read_build_ids(&machine->kernel_dsos.head, with_hits);
+	ret |= __dsos__read_build_ids(&machine->user_dsos.head, with_hits);
 	return ret;
 }
 
@@ -1548,7 +1552,7 @@ static int __event_process_build_id(struct build_id_event *bev,
 				    struct perf_session *session)
 {
 	int err = -1;
-	struct list_head *head;
+	struct dsos *dsos;
 	struct machine *machine;
 	u16 misc;
 	struct dso *dso;
@@ -1563,22 +1567,22 @@ static int __event_process_build_id(struct build_id_event *bev,
 	switch (misc) {
 	case PERF_RECORD_MISC_KERNEL:
 		dso_type = DSO_TYPE_KERNEL;
-		head = &machine->kernel_dsos;
+		dsos = &machine->kernel_dsos;
 		break;
 	case PERF_RECORD_MISC_GUEST_KERNEL:
 		dso_type = DSO_TYPE_GUEST_KERNEL;
-		head = &machine->kernel_dsos;
+		dsos = &machine->kernel_dsos;
 		break;
 	case PERF_RECORD_MISC_USER:
 	case PERF_RECORD_MISC_GUEST_USER:
 		dso_type = DSO_TYPE_USER;
-		head = &machine->user_dsos;
+		dsos = &machine->user_dsos;
 		break;
 	default:
 		goto out;
 	}
 
-	dso = __dsos__findnew(head, filename);
+	dso = __dsos__findnew(dsos, filename);
 	if (dso != NULL) {
 		char sbuild_id[BUILD_ID_SIZE * 2 + 1];
 
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b2ec38b..49a75ec 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -17,8 +17,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 {
 	map_groups__init(&machine->kmaps);
 	RB_CLEAR_NODE(&machine->rb_node);
-	INIT_LIST_HEAD(&machine->user_dsos);
-	INIT_LIST_HEAD(&machine->kernel_dsos);
+	INIT_LIST_HEAD(&machine->user_dsos.head);
+	INIT_LIST_HEAD(&machine->kernel_dsos.head);
 
 	machine->threads = RB_ROOT;
 	INIT_LIST_HEAD(&machine->dead_threads);
@@ -72,11 +72,11 @@ out_delete:
 	return NULL;
 }
 
-static void dsos__delete(struct list_head *dsos)
+static void dsos__delete(struct dsos *dsos)
 {
 	struct dso *pos, *n;
 
-	list_for_each_entry_safe(pos, n, dsos, node) {
+	list_for_each_entry_safe(pos, n, &dsos->head, node) {
 		list_del(&pos->node);
 		dso__delete(pos);
 	}
@@ -477,23 +477,23 @@ struct map *machine__new_module(struct machine *machine, u64 start,
 size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
 {
 	struct rb_node *nd;
-	size_t ret = __dsos__fprintf(&machines->host.kernel_dsos, fp) +
-		     __dsos__fprintf(&machines->host.user_dsos, fp);
+	size_t ret = __dsos__fprintf(&machines->host.kernel_dsos.head, fp) +
+		     __dsos__fprintf(&machines->host.user_dsos.head, fp);
 
 	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
-		ret += __dsos__fprintf(&pos->kernel_dsos, fp);
-		ret += __dsos__fprintf(&pos->user_dsos, fp);
+		ret += __dsos__fprintf(&pos->kernel_dsos.head, fp);
+		ret += __dsos__fprintf(&pos->user_dsos.head, fp);
 	}
 
 	return ret;
 }
 
-size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
+size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp,
 				     bool (skip)(struct dso *dso, int parm), int parm)
 {
-	return __dsos__fprintf_buildid(&machine->kernel_dsos, fp, skip, parm) +
-	       __dsos__fprintf_buildid(&machine->user_dsos, fp, skip, parm);
+	return __dsos__fprintf_buildid(&m->kernel_dsos.head, fp, skip, parm) +
+	       __dsos__fprintf_buildid(&m->user_dsos.head, fp, skip, parm);
 }
 
 size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
@@ -994,7 +994,7 @@ static bool machine__uses_kcore(struct machine *machine)
 {
 	struct dso *dso;
 
-	list_for_each_entry(dso, &machine->kernel_dsos, node) {
+	list_for_each_entry(dso, &machine->kernel_dsos.head, node) {
 		if (dso__is_kcore(dso))
 			return true;
 	}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 6a6bcc1..37b3487 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -22,6 +22,13 @@ extern const char *ref_reloc_sym_names[];
 
 struct vdso_info;
 
+/*
+ * DSOs are put into a list for fast iteration.
+ */
+struct dsos {
+	struct list_head head;
+};
+
 struct machine {
 	struct rb_node	  rb_node;
 	pid_t		  pid;
@@ -32,8 +39,8 @@ struct machine {
 	struct list_head  dead_threads;
 	struct thread	  *last_match;
 	struct vdso_info  *vdso_info;
-	struct list_head  user_dsos;
-	struct list_head  kernel_dsos;
+	struct dsos	  user_dsos;
+	struct dsos	  kernel_dsos;
 	struct map_groups kmaps;
 	struct map	  *vmlinux_maps[MAP__NR_TYPES];
 	u64		  kernel_start;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index be37b5a..c150ca4 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -184,7 +184,8 @@ static struct dso *kernel_get_module_dso(const char *module)
 	const char *vmlinux_name;
 
 	if (module) {
-		list_for_each_entry(dso, &host_machine->kernel_dsos, node) {
+		list_for_each_entry(dso, &host_machine->kernel_dsos.head,
+				    node) {
 			if (strncmp(dso->short_name + 1, module,
 				    dso->short_name_len - 2) == 0)
 				goto found;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 2a92e10..1e23a5b 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -6,6 +6,7 @@
 #include <inttypes.h>
 
 #include "symbol.h"
+#include "machine.h"
 #include "vdso.h"
 #include <symbol/kallsyms.h>
 #include "debug.h"
@@ -929,7 +930,11 @@ int dso__load_sym(struct dso *dso, struct map *map,
 				}
 				curr_dso->symtab_type = dso->symtab_type;
 				map_groups__insert(kmap->kmaps, curr_map);
-				dsos__add(&dso->node, curr_dso);
+				/*
+				 * The new DSO should go to the kernel DSOS
+				 */
+				dsos__add(&map->groups->machine->kernel_dsos,
+					  curr_dso);
 				dso__set_loaded(curr_dso, map->type);
 			} else
 				curr_dso = curr_map->dso;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v6 2/2] perf tool: improves DSO long names lookup speed with rbtree
  2014-09-30 17:36 [PATCH v6 0/2] perf tool: improves DSO long names search speed with rbt Waiman Long
  2014-09-30 17:36 ` [PATCH v6 1/2] perf tool: encapsulate dsos list head into struct dsos Waiman Long
@ 2014-09-30 17:36 ` Waiman Long
  2014-10-03  5:26   ` [tip:perf/core] perf symbols: Improve " tip-bot for Waiman Long
  1 sibling, 1 reply; 4+ messages in thread
From: Waiman Long @ 2014-09-30 17:36 UTC (permalink / raw)
  To: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo
  Cc: linux-kernel, Scott J Norton, Douglas Hatch, Don Zickus,
	Jiri Olsa, Adrian Hunter, Namhyung Kim, Waiman Long

With workload that spawns and destroys many threads and processes,
it was found that perf-mem could took a long time to post-process
the perf data after the target workload had completed its operation.
The performance bottleneck was found to be the lookup and insertion
of the new DSO structures (thousands of them in this case).

In a dual-socket Ivy-Bridge E7-4890 v2 machine (30-core, 60-thread),
the perf profile below shows what perf was doing after the profiled
AIM7 shared workload completed:

-     83.94%  perf  libc-2.11.3.so     [.] __strcmp_sse42
   - __strcmp_sse42
      - 99.82% map__new
           machine__process_mmap_event
           perf_session_deliver_event
           perf_session__process_event
           __perf_session__process_events
           cmd_record
           cmd_mem
           run_builtin
           main
           __libc_start_main
-     13.17%  perf  perf               [.] __dsos__findnew
     __dsos__findnew
     map__new
     machine__process_mmap_event
     perf_session_deliver_event
     perf_session__process_event
     __perf_session__process_events
     cmd_record
     cmd_mem
     run_builtin
     main
     __libc_start_main

So about 97% of CPU times were spent in the map__new() function
trying to insert new DSO entry into the DSO linked list. The whole
post-processing step took about 9 minutes.

The DSO structures are currently searched linearly. So the total
processing time will be proportional to n^2.

To overcome this performance problem, the DSO code is modified to
also put the DSO structures in a RB tree sorted by its long name
in additional to being in a simple linked list. With this change,
the processing time will become proportional to n*log(n) which will
be much quicker for large n. However, the short name will still be
searched using the old linear searching method.  With that patch
in place, the same perf-mem post-processing step took less than 30
seconds to complete.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
---
 tools/perf/util/dso.c     |   70 ++++++++++++++++++++++++++++++++++++++++++--
 tools/perf/util/dso.h     |    1 +
 tools/perf/util/machine.c |    1 +
 tools/perf/util/machine.h |    4 ++-
 4 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 901a58f..0247acf 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -653,6 +653,65 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
 	return dso;
 }
 
+/*
+ * Find a matching entry and/or link current entry to RB tree.
+ * Either one of the dso or name parameter must be non-NULL or the
+ * function will not work.
+ */
+static struct dso *dso__findlink_by_longname(struct rb_root *root,
+					     struct dso *dso, const char *name)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node  *parent = NULL;
+
+	if (!name)
+		name = dso->long_name;
+	/*
+	 * Find node with the matching name
+	 */
+	while (*p) {
+		struct dso *this = rb_entry(*p, struct dso, rb_node);
+		int rc = strcmp(name, this->long_name);
+
+		parent = *p;
+		if (rc == 0) {
+			/*
+			 * In case the new DSO is a duplicate of an existing
+			 * one, print an one-time warning & put the new entry
+			 * at the end of the list of duplicates.
+			 */
+			if (!dso || (dso == this))
+				return this;	/* Find matching dso */
+			/*
+			 * The core kernel DSOs may have duplicated long name.
+			 * In this case, the short name should be different.
+			 * Comparing the short names to differentiate the DSOs.
+			 */
+			rc = strcmp(dso->short_name, this->short_name);
+			if (rc == 0) {
+				pr_err("Duplicated dso name: %s\n", name);
+				return NULL;
+			}
+		}
+		if (rc < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+	if (dso) {
+		/* Add new node and rebalance tree */
+		rb_link_node(&dso->rb_node, parent, p);
+		rb_insert_color(&dso->rb_node, root);
+	}
+	return NULL;
+}
+
+static inline struct dso *
+dso__find_by_longname(const struct rb_root *root, const char *name)
+{
+	return dso__findlink_by_longname((struct rb_root *)root, NULL, name);
+}
+
 void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
 {
 	if (name == NULL)
@@ -755,6 +814,7 @@ struct dso *dso__new(const char *name)
 		dso->a2l_fails = 1;
 		dso->kernel = DSO_TYPE_USER;
 		dso->needs_swap = DSO_SWAP__UNSET;
+		RB_CLEAR_NODE(&dso->rb_node);
 		INIT_LIST_HEAD(&dso->node);
 		INIT_LIST_HEAD(&dso->data.open_entry);
 	}
@@ -765,6 +825,10 @@ struct dso *dso__new(const char *name)
 void dso__delete(struct dso *dso)
 {
 	int i;
+
+	if (!RB_EMPTY_NODE(&dso->rb_node))
+		pr_err("DSO %s is still in rbtree when being deleted!\n",
+		       dso->long_name);
 	for (i = 0; i < MAP__NR_TYPES; ++i)
 		symbols__delete(&dso->symbols[i]);
 
@@ -854,6 +918,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
 void dsos__add(struct dsos *dsos, struct dso *dso)
 {
 	list_add_tail(&dso->node, &dsos->head);
+	dso__findlink_by_longname(&dsos->root, dso, NULL);
 }
 
 struct dso *dsos__find(const struct dsos *dsos, const char *name,
@@ -867,10 +932,7 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name,
 				return pos;
 		return NULL;
 	}
-	list_for_each_entry(pos, &dsos->head, node)
-		if (strcmp(pos->long_name, name) == 0)
-			return pos;
-	return NULL;
+	return dso__find_by_longname(&dsos->root, name);
 }
 
 struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 91eec17..9ecd68b 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -94,6 +94,7 @@ struct dsos;
 
 struct dso {
 	struct list_head node;
+	struct rb_node	 rb_node;	/* rbtree node sorted by long name */
 	struct rb_root	 symbols[MAP__NR_TYPES];
 	struct rb_root	 symbol_names[MAP__NR_TYPES];
 	void		 *a2l;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 49a75ec..b7d477f 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -77,6 +77,7 @@ static void dsos__delete(struct dsos *dsos)
 	struct dso *pos, *n;
 
 	list_for_each_entry_safe(pos, n, &dsos->head, node) {
+		RB_CLEAR_NODE(&pos->rb_node);
 		list_del(&pos->node);
 		dso__delete(pos);
 	}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 37b3487..5a5f765 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -23,10 +23,12 @@ extern const char *ref_reloc_sym_names[];
 struct vdso_info;
 
 /*
- * DSOs are put into a list for fast iteration.
+ * DSOs are put into both a list for fast iteration and rbtree for fast
+ * long name lookup.
  */
 struct dsos {
 	struct list_head head;
+	struct rb_root   root;	/* rbtree root sorted by long name */
 };
 
 struct machine {
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [tip:perf/core] perf symbols: Improve DSO long names lookup speed with rbtree
  2014-09-30 17:36 ` [PATCH v6 2/2] perf tool: improves DSO long names lookup speed with rbtree Waiman Long
@ 2014-10-03  5:26   ` tip-bot for Waiman Long
  0 siblings, 0 replies; 4+ messages in thread
From: tip-bot for Waiman Long @ 2014-10-03  5:26 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: acme, linux-kernel, paulus, mingo, hpa, mingo, jolsa, peterz,
	namhyung, Waiman.Long, adrian.hunter, doug.hatch, tglx,
	scott.norton, dzickus

Commit-ID:  4598a0a6d22fadfb7b37f2b44ee7fdcb24632fcf
Gitweb:     http://git.kernel.org/tip/4598a0a6d22fadfb7b37f2b44ee7fdcb24632fcf
Author:     Waiman Long <Waiman.Long@hp.com>
AuthorDate: Tue, 30 Sep 2014 13:36:15 -0400
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Wed, 1 Oct 2014 14:39:57 -0300

perf symbols: Improve DSO long names lookup speed with rbtree

With workload that spawns and destroys many threads and processes, it
was found that perf-mem could took a long time to post-process the perf
data after the target workload had completed its operation.

The performance bottleneck was found to be the lookup and insertion of
the new DSO structures (thousands of them in this case).

In a dual-socket Ivy-Bridge E7-4890 v2 machine (30-core, 60-thread), the
perf profile below shows what perf was doing after the profiled AIM7
shared workload completed:

-     83.94%  perf  libc-2.11.3.so     [.] __strcmp_sse42
   - __strcmp_sse42
      - 99.82% map__new
           machine__process_mmap_event
           perf_session_deliver_event
           perf_session__process_event
           __perf_session__process_events
           cmd_record
           cmd_mem
           run_builtin
           main
           __libc_start_main
-     13.17%  perf  perf               [.] __dsos__findnew
     __dsos__findnew
     map__new
     machine__process_mmap_event
     perf_session_deliver_event
     perf_session__process_event
     __perf_session__process_events
     cmd_record
     cmd_mem
     run_builtin
     main
     __libc_start_main

So about 97% of CPU times were spent in the map__new() function trying
to insert new DSO entry into the DSO linked list. The whole
post-processing step took about 9 minutes.

The DSO structures are currently searched linearly. So the total
processing time will be proportional to n^2.

To overcome this performance problem, the DSO code is modified to also
put the DSO structures in a RB tree sorted by its long name in
additional to being in a simple linked list. With this change, the
processing time will become proportional to n*log(n) which will be much
quicker for large n. However, the short name will still be searched
using the old linear searching method.  With that patch in place, the
same perf-mem post-processing step took less than 30 seconds to
complete.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Douglas Hatch <doug.hatch@hp.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Scott J Norton <scott.norton@hp.com>
Link: http://lkml.kernel.org/r/1412098575-27863-3-git-send-email-Waiman.Long@hp.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dso.c     | 70 ++++++++++++++++++++++++++++++++++++++++++++---
 tools/perf/util/dso.h     |  5 +++-
 tools/perf/util/machine.c |  1 +
 3 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 901a58f..0247acf 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -653,6 +653,65 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
 	return dso;
 }
 
+/*
+ * Find a matching entry and/or link current entry to RB tree.
+ * Either one of the dso or name parameter must be non-NULL or the
+ * function will not work.
+ */
+static struct dso *dso__findlink_by_longname(struct rb_root *root,
+					     struct dso *dso, const char *name)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node  *parent = NULL;
+
+	if (!name)
+		name = dso->long_name;
+	/*
+	 * Find node with the matching name
+	 */
+	while (*p) {
+		struct dso *this = rb_entry(*p, struct dso, rb_node);
+		int rc = strcmp(name, this->long_name);
+
+		parent = *p;
+		if (rc == 0) {
+			/*
+			 * In case the new DSO is a duplicate of an existing
+			 * one, print an one-time warning & put the new entry
+			 * at the end of the list of duplicates.
+			 */
+			if (!dso || (dso == this))
+				return this;	/* Find matching dso */
+			/*
+			 * The core kernel DSOs may have duplicated long name.
+			 * In this case, the short name should be different.
+			 * Comparing the short names to differentiate the DSOs.
+			 */
+			rc = strcmp(dso->short_name, this->short_name);
+			if (rc == 0) {
+				pr_err("Duplicated dso name: %s\n", name);
+				return NULL;
+			}
+		}
+		if (rc < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+	if (dso) {
+		/* Add new node and rebalance tree */
+		rb_link_node(&dso->rb_node, parent, p);
+		rb_insert_color(&dso->rb_node, root);
+	}
+	return NULL;
+}
+
+static inline struct dso *
+dso__find_by_longname(const struct rb_root *root, const char *name)
+{
+	return dso__findlink_by_longname((struct rb_root *)root, NULL, name);
+}
+
 void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
 {
 	if (name == NULL)
@@ -755,6 +814,7 @@ struct dso *dso__new(const char *name)
 		dso->a2l_fails = 1;
 		dso->kernel = DSO_TYPE_USER;
 		dso->needs_swap = DSO_SWAP__UNSET;
+		RB_CLEAR_NODE(&dso->rb_node);
 		INIT_LIST_HEAD(&dso->node);
 		INIT_LIST_HEAD(&dso->data.open_entry);
 	}
@@ -765,6 +825,10 @@ struct dso *dso__new(const char *name)
 void dso__delete(struct dso *dso)
 {
 	int i;
+
+	if (!RB_EMPTY_NODE(&dso->rb_node))
+		pr_err("DSO %s is still in rbtree when being deleted!\n",
+		       dso->long_name);
 	for (i = 0; i < MAP__NR_TYPES; ++i)
 		symbols__delete(&dso->symbols[i]);
 
@@ -854,6 +918,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
 void dsos__add(struct dsos *dsos, struct dso *dso)
 {
 	list_add_tail(&dso->node, &dsos->head);
+	dso__findlink_by_longname(&dsos->root, dso, NULL);
 }
 
 struct dso *dsos__find(const struct dsos *dsos, const char *name,
@@ -867,10 +932,7 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name,
 				return pos;
 		return NULL;
 	}
-	list_for_each_entry(pos, &dsos->head, node)
-		if (strcmp(pos->long_name, name) == 0)
-			return pos;
-	return NULL;
+	return dso__find_by_longname(&dsos->root, name);
 }
 
 struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index b63dc98..acb651a 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -91,14 +91,17 @@ struct dso_cache {
 };
 
 /*
- * DSOs are put into a list for fast iteration.
+ * DSOs are put into both a list for fast iteration and rbtree for fast
+ * long name lookup.
  */
 struct dsos {
 	struct list_head head;
+	struct rb_root	 root;	/* rbtree root sorted by long name */
 };
 
 struct dso {
 	struct list_head node;
+	struct rb_node	 rb_node;	/* rbtree node sorted by long name */
 	struct rb_root	 symbols[MAP__NR_TYPES];
 	struct rb_root	 symbol_names[MAP__NR_TYPES];
 	void		 *a2l;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 49a75ec..b7d477f 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -77,6 +77,7 @@ static void dsos__delete(struct dsos *dsos)
 	struct dso *pos, *n;
 
 	list_for_each_entry_safe(pos, n, &dsos->head, node) {
+		RB_CLEAR_NODE(&pos->rb_node);
 		list_del(&pos->node);
 		dso__delete(pos);
 	}

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2014-10-03  5:27 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-30 17:36 [PATCH v6 0/2] perf tool: improves DSO long names search speed with rbt Waiman Long
2014-09-30 17:36 ` [PATCH v6 1/2] perf tool: encapsulate dsos list head into struct dsos Waiman Long
2014-09-30 17:36 ` [PATCH v6 2/2] perf tool: improves DSO long names lookup speed with rbtree Waiman Long
2014-10-03  5:26   ` [tip:perf/core] perf symbols: Improve " tip-bot for Waiman Long

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.