All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 0/2] perf tool: improves DSO long names search speed with rbtree
@ 2014-09-29 20:07 Waiman Long
  2014-09-29 20:07 ` [PATCH v5 1/2] perf tool: encapsulate dsos list head into struct dsos Waiman Long
  2014-09-29 20:07 ` [PATCH v5 2/2] perf tool: improves DSO long names lookup speed with rbtree Waiman Long
  0 siblings, 2 replies; 7+ messages in thread
From: Waiman Long @ 2014-09-29 20:07 UTC (permalink / raw)
  To: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo
  Cc: linux-kernel, Scott J Norton, Douglas Hatch, Don Zickus,
	Jiri Olsa, Adrian Hunter, Namhyung Kim, Waiman Long

v4->v5:
  - Remove the unneeded dsos field from the DSO structure.
  - Add check in dso__delete() to make sure that the DSO isn't in
    a rbtree.
  - Revise the dso__findlink_by_longname() function to get rid of
    pointer arithmetic.

v3->v4:
  - As suggested by Arnaldo, keep the DSO linked list for iteration
    purpose and create a new dsos structure to host the dual list
    head and rbtree root for DSOs inside the machine structure.

v2->v3:
  - Move the rbtree linking operation from dso__set_long_name() to
    dsos__add(), where the list_add() operation was done.
  - Add a second patch to remove the linked list and iterates the
    DSO structures by going through them in the rbtree. This requires
    changes in quite a number of files, but it makes for neater code.
  - Rebased to the 3.17-rc5 kernel.

v1->v2:
 - Rename DSO longname RBtree find function to segregate its two
   different uses of searching and linking DSO into RB tree.

This patch set adds a rbtree for linking the DSO structures of the
perf tool sorted by their long names in additional to the linked list
which is of no specific order. The list enables fast iterations of
the DSOs and the rbtree root allows fast lookup by long name. The
latter use can significantly speed up DSO processing when a large
number of DSOs are beining profiled.

Waiman Long (2):
  perf tool: encapsulate dsos list head into struct dsos
  perf tool: improves DSO long names lookup speed with rbtree

 tools/perf/util/dso.c         |   87 +++++++++++++++++++++++++++++++++++-----
 tools/perf/util/dso.h         |    9 +++-
 tools/perf/util/header.c      |   32 ++++++++-------
 tools/perf/util/machine.c     |   25 ++++++------
 tools/perf/util/machine.h     |   13 +++++-
 tools/perf/util/probe-event.c |    3 +-
 tools/perf/util/symbol-elf.c  |    7 +++-
 7 files changed, 132 insertions(+), 44 deletions(-)


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v5 1/2] perf tool: encapsulate dsos list head into struct dsos
  2014-09-29 20:07 [PATCH v5 0/2] perf tool: improves DSO long names search speed with rbtree Waiman Long
@ 2014-09-29 20:07 ` Waiman Long
  2014-10-03  5:26   ` [tip:perf/core] perf symbols: Encapsulate " tip-bot for Waiman Long
  2014-09-29 20:07 ` [PATCH v5 2/2] perf tool: improves DSO long names lookup speed with rbtree Waiman Long
  1 sibling, 1 reply; 7+ messages in thread
From: Waiman Long @ 2014-09-29 20:07 UTC (permalink / raw)
  To: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo
  Cc: linux-kernel, Scott J Norton, Douglas Hatch, Don Zickus,
	Jiri Olsa, Adrian Hunter, Namhyung Kim, Waiman Long

This is a precursor patch to enable long name searching of DSOs
using the rbtree. In this patch, a new dsos structure is created
which contains only a list head structure for the moment. The new
dsos structure is used, in turn, in the machine structure for the
user_dsos and kernel_dsos fields. Only the following 3 dsos functions
are modified to accept the new dsos structure parameter instead
of list_head:
 - dsos__add()
 - dsos__find()
 - __dsos__findnew()

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
---
 tools/perf/util/dso.c         |   17 +++++++++--------
 tools/perf/util/dso.h         |    8 +++++---
 tools/perf/util/header.c      |   32 ++++++++++++++++++--------------
 tools/perf/util/machine.c     |   24 ++++++++++++------------
 tools/perf/util/machine.h     |   11 +++++++++--
 tools/perf/util/probe-event.c |    3 ++-
 tools/perf/util/symbol-elf.c  |    7 ++++++-
 7 files changed, 61 insertions(+), 41 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 55e39dc..901a58f 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -851,35 +851,36 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
 	return have_build_id;
 }
 
-void dsos__add(struct list_head *head, struct dso *dso)
+void dsos__add(struct dsos *dsos, struct dso *dso)
 {
-	list_add_tail(&dso->node, head);
+	list_add_tail(&dso->node, &dsos->head);
 }
 
-struct dso *dsos__find(const struct list_head *head, const char *name, bool cmp_short)
+struct dso *dsos__find(const struct dsos *dsos, const char *name,
+		       bool cmp_short)
 {
 	struct dso *pos;
 
 	if (cmp_short) {
-		list_for_each_entry(pos, head, node)
+		list_for_each_entry(pos, &dsos->head, node)
 			if (strcmp(pos->short_name, name) == 0)
 				return pos;
 		return NULL;
 	}
-	list_for_each_entry(pos, head, node)
+	list_for_each_entry(pos, &dsos->head, node)
 		if (strcmp(pos->long_name, name) == 0)
 			return pos;
 	return NULL;
 }
 
-struct dso *__dsos__findnew(struct list_head *head, const char *name)
+struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
 {
-	struct dso *dso = dsos__find(head, name, false);
+	struct dso *dso = dsos__find(dsos, name, false);
 
 	if (!dso) {
 		dso = dso__new(name);
 		if (dso != NULL) {
-			dsos__add(head, dso);
+			dsos__add(dsos, dso);
 			dso__set_basename(dso);
 		}
 	}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 5e463c0..91eec17 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -90,6 +90,8 @@ struct dso_cache {
 	char data[0];
 };
 
+struct dsos;
+
 struct dso {
 	struct list_head node;
 	struct rb_root	 symbols[MAP__NR_TYPES];
@@ -224,10 +226,10 @@ struct map *dso__new_map(const char *name);
 struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
 				const char *short_name, int dso_type);
 
-void dsos__add(struct list_head *head, struct dso *dso);
-struct dso *dsos__find(const struct list_head *head, const char *name,
+void dsos__add(struct dsos *dsos, struct dso *dso);
+struct dso *dsos__find(const struct dsos *dsos, const char *name,
 		       bool cmp_short);
-struct dso *__dsos__findnew(struct list_head *head, const char *name);
+struct dso *__dsos__findnew(struct dsos *dsos, const char *name);
 bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
 
 size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 158c787..ce0de00 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -214,11 +214,11 @@ static int machine__hit_all_dsos(struct machine *machine)
 {
 	int err;
 
-	err = __dsos__hit_all(&machine->kernel_dsos);
+	err = __dsos__hit_all(&machine->kernel_dsos.head);
 	if (err)
 		return err;
 
-	return __dsos__hit_all(&machine->user_dsos);
+	return __dsos__hit_all(&machine->user_dsos.head);
 }
 
 int dsos__hit_all(struct perf_session *session)
@@ -288,11 +288,12 @@ static int machine__write_buildid_table(struct machine *machine, int fd)
 		umisc = PERF_RECORD_MISC_GUEST_USER;
 	}
 
-	err = __dsos__write_buildid_table(&machine->kernel_dsos, machine,
+	err = __dsos__write_buildid_table(&machine->kernel_dsos.head, machine,
 					  machine->pid, kmisc, fd);
 	if (err == 0)
-		err = __dsos__write_buildid_table(&machine->user_dsos, machine,
-						  machine->pid, umisc, fd);
+		err = __dsos__write_buildid_table(&machine->user_dsos.head,
+						  machine, machine->pid, umisc,
+						  fd);
 	return err;
 }
 
@@ -455,9 +456,10 @@ static int __dsos__cache_build_ids(struct list_head *head,
 
 static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
 {
-	int ret = __dsos__cache_build_ids(&machine->kernel_dsos, machine,
+	int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine,
 					  debugdir);
-	ret |= __dsos__cache_build_ids(&machine->user_dsos, machine, debugdir);
+	ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine,
+				       debugdir);
 	return ret;
 }
 
@@ -483,8 +485,10 @@ static int perf_session__cache_build_ids(struct perf_session *session)
 
 static bool machine__read_build_ids(struct machine *machine, bool with_hits)
 {
-	bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits);
-	ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits);
+	bool ret;
+
+	ret  = __dsos__read_build_ids(&machine->kernel_dsos.head, with_hits);
+	ret |= __dsos__read_build_ids(&machine->user_dsos.head, with_hits);
 	return ret;
 }
 
@@ -1548,7 +1552,7 @@ static int __event_process_build_id(struct build_id_event *bev,
 				    struct perf_session *session)
 {
 	int err = -1;
-	struct list_head *head;
+	struct dsos *dsos;
 	struct machine *machine;
 	u16 misc;
 	struct dso *dso;
@@ -1563,22 +1567,22 @@ static int __event_process_build_id(struct build_id_event *bev,
 	switch (misc) {
 	case PERF_RECORD_MISC_KERNEL:
 		dso_type = DSO_TYPE_KERNEL;
-		head = &machine->kernel_dsos;
+		dsos = &machine->kernel_dsos;
 		break;
 	case PERF_RECORD_MISC_GUEST_KERNEL:
 		dso_type = DSO_TYPE_GUEST_KERNEL;
-		head = &machine->kernel_dsos;
+		dsos = &machine->kernel_dsos;
 		break;
 	case PERF_RECORD_MISC_USER:
 	case PERF_RECORD_MISC_GUEST_USER:
 		dso_type = DSO_TYPE_USER;
-		head = &machine->user_dsos;
+		dsos = &machine->user_dsos;
 		break;
 	default:
 		goto out;
 	}
 
-	dso = __dsos__findnew(head, filename);
+	dso = __dsos__findnew(dsos, filename);
 	if (dso != NULL) {
 		char sbuild_id[BUILD_ID_SIZE * 2 + 1];
 
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b2ec38b..49a75ec 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -17,8 +17,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 {
 	map_groups__init(&machine->kmaps);
 	RB_CLEAR_NODE(&machine->rb_node);
-	INIT_LIST_HEAD(&machine->user_dsos);
-	INIT_LIST_HEAD(&machine->kernel_dsos);
+	INIT_LIST_HEAD(&machine->user_dsos.head);
+	INIT_LIST_HEAD(&machine->kernel_dsos.head);
 
 	machine->threads = RB_ROOT;
 	INIT_LIST_HEAD(&machine->dead_threads);
@@ -72,11 +72,11 @@ out_delete:
 	return NULL;
 }
 
-static void dsos__delete(struct list_head *dsos)
+static void dsos__delete(struct dsos *dsos)
 {
 	struct dso *pos, *n;
 
-	list_for_each_entry_safe(pos, n, dsos, node) {
+	list_for_each_entry_safe(pos, n, &dsos->head, node) {
 		list_del(&pos->node);
 		dso__delete(pos);
 	}
@@ -477,23 +477,23 @@ struct map *machine__new_module(struct machine *machine, u64 start,
 size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
 {
 	struct rb_node *nd;
-	size_t ret = __dsos__fprintf(&machines->host.kernel_dsos, fp) +
-		     __dsos__fprintf(&machines->host.user_dsos, fp);
+	size_t ret = __dsos__fprintf(&machines->host.kernel_dsos.head, fp) +
+		     __dsos__fprintf(&machines->host.user_dsos.head, fp);
 
 	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
-		ret += __dsos__fprintf(&pos->kernel_dsos, fp);
-		ret += __dsos__fprintf(&pos->user_dsos, fp);
+		ret += __dsos__fprintf(&pos->kernel_dsos.head, fp);
+		ret += __dsos__fprintf(&pos->user_dsos.head, fp);
 	}
 
 	return ret;
 }
 
-size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
+size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp,
 				     bool (skip)(struct dso *dso, int parm), int parm)
 {
-	return __dsos__fprintf_buildid(&machine->kernel_dsos, fp, skip, parm) +
-	       __dsos__fprintf_buildid(&machine->user_dsos, fp, skip, parm);
+	return __dsos__fprintf_buildid(&m->kernel_dsos.head, fp, skip, parm) +
+	       __dsos__fprintf_buildid(&m->user_dsos.head, fp, skip, parm);
 }
 
 size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
@@ -994,7 +994,7 @@ static bool machine__uses_kcore(struct machine *machine)
 {
 	struct dso *dso;
 
-	list_for_each_entry(dso, &machine->kernel_dsos, node) {
+	list_for_each_entry(dso, &machine->kernel_dsos.head, node) {
 		if (dso__is_kcore(dso))
 			return true;
 	}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 6a6bcc1..37b3487 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -22,6 +22,13 @@ extern const char *ref_reloc_sym_names[];
 
 struct vdso_info;
 
+/*
+ * DSOs are put into a list for fast iteration.
+ */
+struct dsos {
+	struct list_head head;
+};
+
 struct machine {
 	struct rb_node	  rb_node;
 	pid_t		  pid;
@@ -32,8 +39,8 @@ struct machine {
 	struct list_head  dead_threads;
 	struct thread	  *last_match;
 	struct vdso_info  *vdso_info;
-	struct list_head  user_dsos;
-	struct list_head  kernel_dsos;
+	struct dsos	  user_dsos;
+	struct dsos	  kernel_dsos;
 	struct map_groups kmaps;
 	struct map	  *vmlinux_maps[MAP__NR_TYPES];
 	u64		  kernel_start;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index be37b5a..c150ca4 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -184,7 +184,8 @@ static struct dso *kernel_get_module_dso(const char *module)
 	const char *vmlinux_name;
 
 	if (module) {
-		list_for_each_entry(dso, &host_machine->kernel_dsos, node) {
+		list_for_each_entry(dso, &host_machine->kernel_dsos.head,
+				    node) {
 			if (strncmp(dso->short_name + 1, module,
 				    dso->short_name_len - 2) == 0)
 				goto found;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 2a92e10..1e23a5b 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -6,6 +6,7 @@
 #include <inttypes.h>
 
 #include "symbol.h"
+#include "machine.h"
 #include "vdso.h"
 #include <symbol/kallsyms.h>
 #include "debug.h"
@@ -929,7 +930,11 @@ int dso__load_sym(struct dso *dso, struct map *map,
 				}
 				curr_dso->symtab_type = dso->symtab_type;
 				map_groups__insert(kmap->kmaps, curr_map);
-				dsos__add(&dso->node, curr_dso);
+				/*
+				 * The new DSO should go to the kernel DSOS
+				 */
+				dsos__add(&map->groups->machine->kernel_dsos,
+					  curr_dso);
 				dso__set_loaded(curr_dso, map->type);
 			} else
 				curr_dso = curr_map->dso;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v5 2/2] perf tool: improves DSO long names lookup speed with rbtree
  2014-09-29 20:07 [PATCH v5 0/2] perf tool: improves DSO long names search speed with rbtree Waiman Long
  2014-09-29 20:07 ` [PATCH v5 1/2] perf tool: encapsulate dsos list head into struct dsos Waiman Long
@ 2014-09-29 20:07 ` Waiman Long
  2014-09-30 15:21   ` Arnaldo Carvalho de Melo
  1 sibling, 1 reply; 7+ messages in thread
From: Waiman Long @ 2014-09-29 20:07 UTC (permalink / raw)
  To: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo
  Cc: linux-kernel, Scott J Norton, Douglas Hatch, Don Zickus,
	Jiri Olsa, Adrian Hunter, Namhyung Kim, Waiman Long

With workload that spawns and destroys many threads and processes,
it was found that perf-mem could took a long time to post-process
the perf data after the target workload had completed its operation.
The performance bottleneck was found to be the lookup and insertion
of the new DSO structures (thousands of them in this case).

In a dual-socket Ivy-Bridge E7-4890 v2 machine (30-core, 60-thread),
the perf profile below shows what perf was doing after the profiled
AIM7 shared workload completed:

-     83.94%  perf  libc-2.11.3.so     [.] __strcmp_sse42
   - __strcmp_sse42
      - 99.82% map__new
           machine__process_mmap_event
           perf_session_deliver_event
           perf_session__process_event
           __perf_session__process_events
           cmd_record
           cmd_mem
           run_builtin
           main
           __libc_start_main
-     13.17%  perf  perf               [.] __dsos__findnew
     __dsos__findnew
     map__new
     machine__process_mmap_event
     perf_session_deliver_event
     perf_session__process_event
     __perf_session__process_events
     cmd_record
     cmd_mem
     run_builtin
     main
     __libc_start_main

So about 97% of CPU times were spent in the map__new() function
trying to insert new DSO entry into the DSO linked list. The whole
post-processing step took about 9 minutes.

The DSO structures are currently searched linearly. So the total
processing time will be proportional to n^2.

To overcome this performance problem, the DSO code is modified to
also put the DSO structures in a RB tree sorted by its long name
in additional to being in a simple linked list. With this change,
the processing time will become proportional to n*log(n) which will
be much quicker for large n. However, the short name will still be
searched using the old linear searching method.  With that patch
in place, the same perf-mem post-processing step took less than 30
seconds to complete.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
---
 tools/perf/util/dso.c     |   72 ++++++++++++++++++++++++++++++++++++++++++--
 tools/perf/util/dso.h     |    1 +
 tools/perf/util/machine.c |    1 +
 tools/perf/util/machine.h |    4 ++-
 4 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 901a58f..9a81c03 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -653,6 +653,67 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
 	return dso;
 }
 
+/*
+ * Find a matching entry and/or link current entry to RB tree.
+ * Either one of the dso or name parameter must be non-NULL or the
+ * function will not work.
+ */
+static struct dso *dso__findlink_by_longname(struct rb_root *root,
+					     struct dso *dso, const char *name)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node  *parent = NULL;
+	int warned = false;
+
+	if (!name)
+		name = dso->long_name;
+	/*
+	 * Find node with the matching name
+	 */
+	while (*p) {
+		struct dso *this = rb_entry(*p, struct dso, rb_node);
+		int rc = strcmp(name, this->long_name);
+
+		parent = *p;
+		if (rc == 0) {
+			/*
+			 * In case the new DSO is a duplicate of an existing
+			 * one, print an one-time warning & put the new entry
+			 * at the end of the list of duplicates.
+			 */
+			if (!dso || (dso == this))
+				return this;	/* Find matching dso */
+			/*
+			 * The core kernel DSOs may have duplicated long name.
+			 * (See dso__load_sym()). Don't print warning for them.
+			 */
+			if (!warned && !strstr(name, "kernel.kallsyms")
+				    && !strstr(name, "/vmlinux")) {
+				pr_warning("Duplicated dso long name: %s\n",
+					   name);
+				warned = true;
+			}
+			rc = 1;
+		}
+		if (rc < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+	if (dso) {
+		/* Add new node and rebalance tree */
+		rb_link_node(&dso->rb_node, parent, p);
+		rb_insert_color(&dso->rb_node, root);
+	}
+	return NULL;
+}
+
+static inline struct dso *
+dso__find_by_longname(struct rb_root *root, const char *name)
+{
+	return dso__findlink_by_longname(root, NULL, name);
+}
+
 void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
 {
 	if (name == NULL)
@@ -755,6 +816,7 @@ struct dso *dso__new(const char *name)
 		dso->a2l_fails = 1;
 		dso->kernel = DSO_TYPE_USER;
 		dso->needs_swap = DSO_SWAP__UNSET;
+		RB_CLEAR_NODE(&dso->rb_node);
 		INIT_LIST_HEAD(&dso->node);
 		INIT_LIST_HEAD(&dso->data.open_entry);
 	}
@@ -765,6 +827,10 @@ struct dso *dso__new(const char *name)
 void dso__delete(struct dso *dso)
 {
 	int i;
+
+	if (!RB_EMPTY_NODE(&dso->rb_node))
+		pr_err("DSO %s is still in rbtree when being deleted!\n",
+		       dso->long_name);
 	for (i = 0; i < MAP__NR_TYPES; ++i)
 		symbols__delete(&dso->symbols[i]);
 
@@ -854,6 +920,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
 void dsos__add(struct dsos *dsos, struct dso *dso)
 {
 	list_add_tail(&dso->node, &dsos->head);
+	dso__findlink_by_longname(&dsos->root, dso, NULL);
 }
 
 struct dso *dsos__find(const struct dsos *dsos, const char *name,
@@ -867,10 +934,7 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name,
 				return pos;
 		return NULL;
 	}
-	list_for_each_entry(pos, &dsos->head, node)
-		if (strcmp(pos->long_name, name) == 0)
-			return pos;
-	return NULL;
+	return dso__find_by_longname((struct rb_root *)&dsos->root, name);
 }
 
 struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 91eec17..9ecd68b 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -94,6 +94,7 @@ struct dsos;
 
 struct dso {
 	struct list_head node;
+	struct rb_node	 rb_node;	/* rbtree node sorted by long name */
 	struct rb_root	 symbols[MAP__NR_TYPES];
 	struct rb_root	 symbol_names[MAP__NR_TYPES];
 	void		 *a2l;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 49a75ec..b7d477f 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -77,6 +77,7 @@ static void dsos__delete(struct dsos *dsos)
 	struct dso *pos, *n;
 
 	list_for_each_entry_safe(pos, n, &dsos->head, node) {
+		RB_CLEAR_NODE(&pos->rb_node);
 		list_del(&pos->node);
 		dso__delete(pos);
 	}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 37b3487..5a5f765 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -23,10 +23,12 @@ extern const char *ref_reloc_sym_names[];
 struct vdso_info;
 
 /*
- * DSOs are put into a list for fast iteration.
+ * DSOs are put into both a list for fast iteration and rbtree for fast
+ * long name lookup.
  */
 struct dsos {
 	struct list_head head;
+	struct rb_root   root;	/* rbtree root sorted by long name */
 };
 
 struct machine {
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v5 2/2] perf tool: improves DSO long names lookup speed with rbtree
  2014-09-29 20:07 ` [PATCH v5 2/2] perf tool: improves DSO long names lookup speed with rbtree Waiman Long
@ 2014-09-30 15:21   ` Arnaldo Carvalho de Melo
  2014-09-30 16:49     ` Waiman Long
  0 siblings, 1 reply; 7+ messages in thread
From: Arnaldo Carvalho de Melo @ 2014-09-30 15:21 UTC (permalink / raw)
  To: Waiman Long
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, linux-kernel,
	Scott J Norton, Douglas Hatch, Don Zickus, Jiri Olsa,
	Adrian Hunter, Namhyung Kim

Em Mon, Sep 29, 2014 at 04:07:29PM -0400, Waiman Long escreveu:
> With workload that spawns and destroys many threads and processes,
> it was found that perf-mem could took a long time to post-process
> the perf data after the target workload had completed its operation.
> The performance bottleneck was found to be the lookup and insertion
> of the new DSO structures (thousands of them in this case).
> 
> In a dual-socket Ivy-Bridge E7-4890 v2 machine (30-core, 60-thread),
> the perf profile below shows what perf was doing after the profiled
> AIM7 shared workload completed:
> 
> -     83.94%  perf  libc-2.11.3.so     [.] __strcmp_sse42
>    - __strcmp_sse42
>       - 99.82% map__new
>            machine__process_mmap_event
>            perf_session_deliver_event
>            perf_session__process_event
>            __perf_session__process_events
>            cmd_record
>            cmd_mem
>            run_builtin
>            main
>            __libc_start_main
> -     13.17%  perf  perf               [.] __dsos__findnew
>      __dsos__findnew
>      map__new
>      machine__process_mmap_event
>      perf_session_deliver_event
>      perf_session__process_event
>      __perf_session__process_events
>      cmd_record
>      cmd_mem
>      run_builtin
>      main
>      __libc_start_main
> 
> So about 97% of CPU times were spent in the map__new() function
> trying to insert new DSO entry into the DSO linked list. The whole
> post-processing step took about 9 minutes.
> 
> The DSO structures are currently searched linearly. So the total
> processing time will be proportional to n^2.
> 
> To overcome this performance problem, the DSO code is modified to
> also put the DSO structures in a RB tree sorted by its long name
> in additional to being in a simple linked list. With this change,
> the processing time will become proportional to n*log(n) which will
> be much quicker for large n. However, the short name will still be
> searched using the old linear searching method.  With that patch
> in place, the same perf-mem post-processing step took less than 30
> seconds to complete.
> 
> Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> ---
>  tools/perf/util/dso.c     |   72 ++++++++++++++++++++++++++++++++++++++++++--
>  tools/perf/util/dso.h     |    1 +
>  tools/perf/util/machine.c |    1 +
>  tools/perf/util/machine.h |    4 ++-
>  4 files changed, 73 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
> index 901a58f..9a81c03 100644
> --- a/tools/perf/util/dso.c
> +++ b/tools/perf/util/dso.c
> @@ -653,6 +653,67 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
>  	return dso;
>  }
>  
> +/*
> + * Find a matching entry and/or link current entry to RB tree.
> + * Either one of the dso or name parameter must be non-NULL or the
> + * function will not work.
> + */
> +static struct dso *dso__findlink_by_longname(struct rb_root *root,
> +					     struct dso *dso, const char *name)
> +{
> +	struct rb_node **p = &root->rb_node;
> +	struct rb_node  *parent = NULL;
> +	int warned = false;
> +
> +	if (!name)
> +		name = dso->long_name;
> +	/*
> +	 * Find node with the matching name
> +	 */
> +	while (*p) {
> +		struct dso *this = rb_entry(*p, struct dso, rb_node);
> +		int rc = strcmp(name, this->long_name);
> +
> +		parent = *p;
> +		if (rc == 0) {
> +			/*
> +			 * In case the new DSO is a duplicate of an existing
> +			 * one, print an one-time warning & put the new entry
> +			 * at the end of the list of duplicates.
> +			 */
> +			if (!dso || (dso == this))
> +				return this;	/* Find matching dso */
> +			/*
> +			 * The core kernel DSOs may have duplicated long name.
> +			 * (See dso__load_sym()). Don't print warning for them.
> +			 */
> +			if (!warned && !strstr(name, "kernel.kallsyms")
> +				    && !strstr(name, "/vmlinux")) {
> +				pr_warning("Duplicated dso long name: %s\n",
> +					   name);
> +				warned = true;

I still wonder if in this case we should just return, i.e. why would we
want to have multiple entries with the same name here? Anyway, I guess
it doesn't hurt, right?

Something to be further investigated to find a better solution, but I
guess that the patch as-is now should provide that speedup without
introducing any new oddities. Will apply.

> +			}
> +			rc = 1;
> +		}
> +		if (rc < 0)
> +			p = &parent->rb_left;
> +		else
> +			p = &parent->rb_right;
> +	}
> +	if (dso) {
> +		/* Add new node and rebalance tree */
> +		rb_link_node(&dso->rb_node, parent, p);
> +		rb_insert_color(&dso->rb_node, root);
> +	}
> +	return NULL;
> +}
> +
> +static inline struct dso *
> +dso__find_by_longname(struct rb_root *root, const char *name)
> +{
> +	return dso__findlink_by_longname(root, NULL, name);
> +}
> +
>  void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
>  {
>  	if (name == NULL)
> @@ -755,6 +816,7 @@ struct dso *dso__new(const char *name)
>  		dso->a2l_fails = 1;
>  		dso->kernel = DSO_TYPE_USER;
>  		dso->needs_swap = DSO_SWAP__UNSET;
> +		RB_CLEAR_NODE(&dso->rb_node);
>  		INIT_LIST_HEAD(&dso->node);
>  		INIT_LIST_HEAD(&dso->data.open_entry);
>  	}
> @@ -765,6 +827,10 @@ struct dso *dso__new(const char *name)
>  void dso__delete(struct dso *dso)
>  {
>  	int i;
> +
> +	if (!RB_EMPTY_NODE(&dso->rb_node))
> +		pr_err("DSO %s is still in rbtree when being deleted!\n",
> +		       dso->long_name);
>  	for (i = 0; i < MAP__NR_TYPES; ++i)
>  		symbols__delete(&dso->symbols[i]);
>  
> @@ -854,6 +920,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
>  void dsos__add(struct dsos *dsos, struct dso *dso)
>  {
>  	list_add_tail(&dso->node, &dsos->head);
> +	dso__findlink_by_longname(&dsos->root, dso, NULL);
>  }
>  
>  struct dso *dsos__find(const struct dsos *dsos, const char *name,
> @@ -867,10 +934,7 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name,
>  				return pos;
>  		return NULL;
>  	}
> -	list_for_each_entry(pos, &dsos->head, node)
> -		if (strcmp(pos->long_name, name) == 0)
> -			return pos;
> -	return NULL;
> +	return dso__find_by_longname((struct rb_root *)&dsos->root, name);

Why do you need this cast? Humm, because in the end it will get to a
function that either does insertion or does a simple search. Ok, I think
that dso__find_by_longname is the closest to that thing where the cast
should be applied, after making dso__find_by_longname receive a const
rb_root pointer.

I.e. the dso__find_by_longname name implies it will not change any of
its parameters, its supposed to be a simple search. I will do this
change while applying it.

- Arnaldo

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v5 2/2] perf tool: improves DSO long names lookup speed with rbtree
  2014-09-30 15:21   ` Arnaldo Carvalho de Melo
@ 2014-09-30 16:49     ` Waiman Long
  2014-09-30 17:38       ` Waiman Long
  0 siblings, 1 reply; 7+ messages in thread
From: Waiman Long @ 2014-09-30 16:49 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, linux-kernel,
	Scott J Norton, Douglas Hatch, Don Zickus, Jiri Olsa,
	Adrian Hunter, Namhyung Kim

On 09/30/2014 11:21 AM, Arnaldo Carvalho de Melo wrote:
> Em Mon, Sep 29, 2014 at 04:07:29PM -0400, Waiman Long escreveu:
>> With workload that spawns and destroys many threads and processes,
>> it was found that perf-mem could took a long time to post-process
>> the perf data after the target workload had completed its operation.
>> The performance bottleneck was found to be the lookup and insertion
>> of the new DSO structures (thousands of them in this case).
>>
>> In a dual-socket Ivy-Bridge E7-4890 v2 machine (30-core, 60-thread),
>> the perf profile below shows what perf was doing after the profiled
>> AIM7 shared workload completed:
>>
>> -     83.94%  perf  libc-2.11.3.so     [.] __strcmp_sse42
>>     - __strcmp_sse42
>>        - 99.82% map__new
>>             machine__process_mmap_event
>>             perf_session_deliver_event
>>             perf_session__process_event
>>             __perf_session__process_events
>>             cmd_record
>>             cmd_mem
>>             run_builtin
>>             main
>>             __libc_start_main
>> -     13.17%  perf  perf               [.] __dsos__findnew
>>       __dsos__findnew
>>       map__new
>>       machine__process_mmap_event
>>       perf_session_deliver_event
>>       perf_session__process_event
>>       __perf_session__process_events
>>       cmd_record
>>       cmd_mem
>>       run_builtin
>>       main
>>       __libc_start_main
>>
>> So about 97% of CPU times were spent in the map__new() function
>> trying to insert new DSO entry into the DSO linked list. The whole
>> post-processing step took about 9 minutes.
>>
>> The DSO structures are currently searched linearly. So the total
>> processing time will be proportional to n^2.
>>
>> To overcome this performance problem, the DSO code is modified to
>> also put the DSO structures in a RB tree sorted by its long name
>> in additional to being in a simple linked list. With this change,
>> the processing time will become proportional to n*log(n) which will
>> be much quicker for large n. However, the short name will still be
>> searched using the old linear searching method.  With that patch
>> in place, the same perf-mem post-processing step took less than 30
>> seconds to complete.
>>
>> Signed-off-by: Waiman Long<Waiman.Long@hp.com>
>> ---
>>   tools/perf/util/dso.c     |   72 ++++++++++++++++++++++++++++++++++++++++++--
>>   tools/perf/util/dso.h     |    1 +
>>   tools/perf/util/machine.c |    1 +
>>   tools/perf/util/machine.h |    4 ++-
>>   4 files changed, 73 insertions(+), 5 deletions(-)
>>
>> diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
>> index 901a58f..9a81c03 100644
>> --- a/tools/perf/util/dso.c
>> +++ b/tools/perf/util/dso.c
>> @@ -653,6 +653,67 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
>>   	return dso;
>>   }
>>
>> +/*
>> + * Find a matching entry and/or link current entry to RB tree.
>> + * Either one of the dso or name parameter must be non-NULL or the
>> + * function will not work.
>> + */
>> +static struct dso *dso__findlink_by_longname(struct rb_root *root,
>> +					     struct dso *dso, const char *name)
>> +{
>> +	struct rb_node **p =&root->rb_node;
>> +	struct rb_node  *parent = NULL;
>> +	int warned = false;
>> +
>> +	if (!name)
>> +		name = dso->long_name;
>> +	/*
>> +	 * Find node with the matching name
>> +	 */
>> +	while (*p) {
>> +		struct dso *this = rb_entry(*p, struct dso, rb_node);
>> +		int rc = strcmp(name, this->long_name);
>> +
>> +		parent = *p;
>> +		if (rc == 0) {
>> +			/*
>> +			 * In case the new DSO is a duplicate of an existing
>> +			 * one, print an one-time warning&  put the new entry
>> +			 * at the end of the list of duplicates.
>> +			 */
>> +			if (!dso || (dso == this))
>> +				return this;	/* Find matching dso */
>> +			/*
>> +			 * The core kernel DSOs may have duplicated long name.
>> +			 * (See dso__load_sym()). Don't print warning for them.
>> +			 */
>> +			if (!warned&&  !strstr(name, "kernel.kallsyms")
>> +				&&  !strstr(name, "/vmlinux")) {
>> +				pr_warning("Duplicated dso long name: %s\n",
>> +					   name);
>> +				warned = true;
> I still wonder if in this case we should just return, i.e. why would we
> want to have multiple entries with the same name here? Anyway, I guess
> it doesn't hurt, right?
>
> Something to be further investigated to find a better solution, but I
> guess that the patch as-is now should provide that speedup without
> introducing any new oddities. Will apply.

If I don't add the kernel name check, I will get a warning every time I 
run mem recording with the workloads that I am using. So it is happening 
in the current code. I think the short name may be different. I will do 
more test to find out. If that is the case, an alternative is to do a 
short name comparison if the long name match.

>> +			}
>> +			rc = 1;
>> +		}
>> +		if (rc<  0)
>> +			p =&parent->rb_left;
>> +		else
>> +			p =&parent->rb_right;
>> +	}
>> +	if (dso) {
>> +		/* Add new node and rebalance tree */
>> +		rb_link_node(&dso->rb_node, parent, p);
>> +		rb_insert_color(&dso->rb_node, root);
>> +	}
>> +	return NULL;
>> +}
>> +
>> +static inline struct dso *
>> +dso__find_by_longname(struct rb_root *root, const char *name)
>> +{
>> +	return dso__findlink_by_longname(root, NULL, name);
>> +}
>> +
>>   void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
>>   {
>>   	if (name == NULL)
>> @@ -755,6 +816,7 @@ struct dso *dso__new(const char *name)
>>   		dso->a2l_fails = 1;
>>   		dso->kernel = DSO_TYPE_USER;
>>   		dso->needs_swap = DSO_SWAP__UNSET;
>> +		RB_CLEAR_NODE(&dso->rb_node);
>>   		INIT_LIST_HEAD(&dso->node);
>>   		INIT_LIST_HEAD(&dso->data.open_entry);
>>   	}
>> @@ -765,6 +827,10 @@ struct dso *dso__new(const char *name)
>>   void dso__delete(struct dso *dso)
>>   {
>>   	int i;
>> +
>> +	if (!RB_EMPTY_NODE(&dso->rb_node))
>> +		pr_err("DSO %s is still in rbtree when being deleted!\n",
>> +		       dso->long_name);
>>   	for (i = 0; i<  MAP__NR_TYPES; ++i)
>>   		symbols__delete(&dso->symbols[i]);
>>
>> @@ -854,6 +920,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
>>   void dsos__add(struct dsos *dsos, struct dso *dso)
>>   {
>>   	list_add_tail(&dso->node,&dsos->head);
>> +	dso__findlink_by_longname(&dsos->root, dso, NULL);
>>   }
>>
>>   struct dso *dsos__find(const struct dsos *dsos, const char *name,
>> @@ -867,10 +934,7 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name,
>>   				return pos;
>>   		return NULL;
>>   	}
>> -	list_for_each_entry(pos,&dsos->head, node)
>> -		if (strcmp(pos->long_name, name) == 0)
>> -			return pos;
>> -	return NULL;
>> +	return dso__find_by_longname((struct rb_root *)&dsos->root, name);
> Why do you need this cast? Humm, because in the end it will get to a
> function that either does insertion or does a simple search. Ok, I think
> that dso__find_by_longname is the closest to that thing where the cast
> should be applied, after making dso__find_by_longname receive a const
> rb_root pointer.
>
> I.e. the dso__find_by_longname name implies it will not change any of
> its parameters, its supposed to be a simple search. I will do this
> change while applying it.
>
> - Arnaldo

Yes, you are right. I should do the casting in dso__find_by_longname(). 
Please make the adjustment.

Thanks,
Longman

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v5 2/2] perf tool: improves DSO long names lookup speed with rbtree
  2014-09-30 16:49     ` Waiman Long
@ 2014-09-30 17:38       ` Waiman Long
  0 siblings, 0 replies; 7+ messages in thread
From: Waiman Long @ 2014-09-30 17:38 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar, linux-kernel,
	Scott J Norton, Douglas Hatch, Don Zickus, Jiri Olsa,
	Adrian Hunter, Namhyung Kim

On 09/30/2014 12:49 PM, Waiman Long wrote:
> On 09/30/2014 11:21 AM, Arnaldo Carvalho de Melo wrote:
>> Em Mon, Sep 29, 2014 at 04:07:29PM -0400, Waiman Long escreveu:
>>> With workload that spawns and destroys many threads and processes,
>>> it was found that perf-mem could took a long time to post-process
>>> the perf data after the target workload had completed its operation.
>>> The performance bottleneck was found to be the lookup and insertion
>>> of the new DSO structures (thousands of them in this case).
>>>
>>> In a dual-socket Ivy-Bridge E7-4890 v2 machine (30-core, 60-thread),
>>> the perf profile below shows what perf was doing after the profiled
>>> AIM7 shared workload completed:
>>>
>>> -     83.94%  perf  libc-2.11.3.so     [.] __strcmp_sse42
>>>     - __strcmp_sse42
>>>        - 99.82% map__new
>>>             machine__process_mmap_event
>>>             perf_session_deliver_event
>>>             perf_session__process_event
>>>             __perf_session__process_events
>>>             cmd_record
>>>             cmd_mem
>>>             run_builtin
>>>             main
>>>             __libc_start_main
>>> -     13.17%  perf  perf               [.] __dsos__findnew
>>>       __dsos__findnew
>>>       map__new
>>>       machine__process_mmap_event
>>>       perf_session_deliver_event
>>>       perf_session__process_event
>>>       __perf_session__process_events
>>>       cmd_record
>>>       cmd_mem
>>>       run_builtin
>>>       main
>>>       __libc_start_main
>>>
>>> So about 97% of CPU times were spent in the map__new() function
>>> trying to insert new DSO entry into the DSO linked list. The whole
>>> post-processing step took about 9 minutes.
>>>
>>> The DSO structures are currently searched linearly. So the total
>>> processing time will be proportional to n^2.
>>>
>>> To overcome this performance problem, the DSO code is modified to
>>> also put the DSO structures in a RB tree sorted by its long name
>>> in additional to being in a simple linked list. With this change,
>>> the processing time will become proportional to n*log(n) which will
>>> be much quicker for large n. However, the short name will still be
>>> searched using the old linear searching method.  With that patch
>>> in place, the same perf-mem post-processing step took less than 30
>>> seconds to complete.
>>>
>>> Signed-off-by: Waiman Long<Waiman.Long@hp.com>
>>> ---
>>>   tools/perf/util/dso.c     |   72 
>>> ++++++++++++++++++++++++++++++++++++++++++--
>>>   tools/perf/util/dso.h     |    1 +
>>>   tools/perf/util/machine.c |    1 +
>>>   tools/perf/util/machine.h |    4 ++-
>>>   4 files changed, 73 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
>>> index 901a58f..9a81c03 100644
>>> --- a/tools/perf/util/dso.c
>>> +++ b/tools/perf/util/dso.c
>>> @@ -653,6 +653,67 @@ struct dso *dso__kernel_findnew(struct machine 
>>> *machine, const char *name,
>>>       return dso;
>>>   }
>>>
>>> +/*
>>> + * Find a matching entry and/or link current entry to RB tree.
>>> + * Either one of the dso or name parameter must be non-NULL or the
>>> + * function will not work.
>>> + */
>>> +static struct dso *dso__findlink_by_longname(struct rb_root *root,
>>> +                         struct dso *dso, const char *name)
>>> +{
>>> +    struct rb_node **p =&root->rb_node;
>>> +    struct rb_node  *parent = NULL;
>>> +    int warned = false;
>>> +
>>> +    if (!name)
>>> +        name = dso->long_name;
>>> +    /*
>>> +     * Find node with the matching name
>>> +     */
>>> +    while (*p) {
>>> +        struct dso *this = rb_entry(*p, struct dso, rb_node);
>>> +        int rc = strcmp(name, this->long_name);
>>> +
>>> +        parent = *p;
>>> +        if (rc == 0) {
>>> +            /*
>>> +             * In case the new DSO is a duplicate of an existing
>>> +             * one, print an one-time warning&  put the new entry
>>> +             * at the end of the list of duplicates.
>>> +             */
>>> +            if (!dso || (dso == this))
>>> +                return this;    /* Find matching dso */
>>> +            /*
>>> +             * The core kernel DSOs may have duplicated long name.
>>> +             * (See dso__load_sym()). Don't print warning for them.
>>> +             */
>>> +            if (!warned&&  !strstr(name, "kernel.kallsyms")
>>> + &&  !strstr(name, "/vmlinux")) {
>>> +                pr_warning("Duplicated dso long name: %s\n",
>>> +                       name);
>>> +                warned = true;
>> I still wonder if in this case we should just return, i.e. why would we
>> want to have multiple entries with the same name here? Anyway, I guess
>> it doesn't hurt, right?
>>
>> Something to be further investigated to find a better solution, but I
>> guess that the patch as-is now should provide that speedup without
>> introducing any new oddities. Will apply.
>
> If I don't add the kernel name check, I will get a warning every time 
> I run mem recording with the workloads that I am using. So it is 
> happening in the current code. I think the short name may be 
> different. I will do more test to find out. If that is the case, an 
> alternative is to do a short name comparison if the long name match.
>

The short names are indeed different when the long names match. I have 
just sent out the v6 patch with the change. Hopefully that will address 
your remaining concern about this patch.

-Longman

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tip:perf/core] perf symbols: Encapsulate dsos list head into struct dsos
  2014-09-29 20:07 ` [PATCH v5 1/2] perf tool: encapsulate dsos list head into struct dsos Waiman Long
@ 2014-10-03  5:26   ` tip-bot for Waiman Long
  0 siblings, 0 replies; 7+ messages in thread
From: tip-bot for Waiman Long @ 2014-10-03  5:26 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: acme, linux-kernel, paulus, mingo, hpa, mingo, jolsa, peterz,
	namhyung, Waiman.Long, adrian.hunter, doug.hatch, tglx,
	scott.norton, dzickus

Commit-ID:  8fa7d87f91479f7124142ca4ad93a37b80f8c1c0
Gitweb:     http://git.kernel.org/tip/8fa7d87f91479f7124142ca4ad93a37b80f8c1c0
Author:     Waiman Long <Waiman.Long@hp.com>
AuthorDate: Mon, 29 Sep 2014 16:07:28 -0400
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Tue, 30 Sep 2014 12:11:49 -0300

perf symbols: Encapsulate dsos list head into struct dsos

This is a precursor patch to enable long name searching of DSOs using
a rbtree.

In this patch, a new dsos structure is created which contains only a
list head structure for the moment.

The new dsos structure is used, in turn, in the machine structure for
the user_dsos and kernel_dsos fields.

Only the following 3 dsos functions are modified to accept the new dsos
structure parameter instead of list_head:

 - dsos__add()
 - dsos__find()
 - __dsos__findnew()

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Douglas Hatch <doug.hatch@hp.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Scott J Norton <scott.norton@hp.com>
Link: http://lkml.kernel.org/r/1412021249-19201-2-git-send-email-Waiman.Long@hp.com
[ Move struct dsos to dso.h to reduce the dso methods depends on machine.h ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dso.c         | 17 +++++++++--------
 tools/perf/util/dso.h         | 13 ++++++++++---
 tools/perf/util/header.c      | 32 ++++++++++++++++++--------------
 tools/perf/util/machine.c     | 24 ++++++++++++------------
 tools/perf/util/machine.h     |  5 +++--
 tools/perf/util/probe-event.c |  3 ++-
 tools/perf/util/symbol-elf.c  |  7 ++++++-
 7 files changed, 60 insertions(+), 41 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 55e39dc..901a58f 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -851,35 +851,36 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
 	return have_build_id;
 }
 
-void dsos__add(struct list_head *head, struct dso *dso)
+void dsos__add(struct dsos *dsos, struct dso *dso)
 {
-	list_add_tail(&dso->node, head);
+	list_add_tail(&dso->node, &dsos->head);
 }
 
-struct dso *dsos__find(const struct list_head *head, const char *name, bool cmp_short)
+struct dso *dsos__find(const struct dsos *dsos, const char *name,
+		       bool cmp_short)
 {
 	struct dso *pos;
 
 	if (cmp_short) {
-		list_for_each_entry(pos, head, node)
+		list_for_each_entry(pos, &dsos->head, node)
 			if (strcmp(pos->short_name, name) == 0)
 				return pos;
 		return NULL;
 	}
-	list_for_each_entry(pos, head, node)
+	list_for_each_entry(pos, &dsos->head, node)
 		if (strcmp(pos->long_name, name) == 0)
 			return pos;
 	return NULL;
 }
 
-struct dso *__dsos__findnew(struct list_head *head, const char *name)
+struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
 {
-	struct dso *dso = dsos__find(head, name, false);
+	struct dso *dso = dsos__find(dsos, name, false);
 
 	if (!dso) {
 		dso = dso__new(name);
 		if (dso != NULL) {
-			dsos__add(head, dso);
+			dsos__add(dsos, dso);
 			dso__set_basename(dso);
 		}
 	}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 5e463c0..b63dc98 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -90,6 +90,13 @@ struct dso_cache {
 	char data[0];
 };
 
+/*
+ * DSOs are put into a list for fast iteration.
+ */
+struct dsos {
+	struct list_head head;
+};
+
 struct dso {
 	struct list_head node;
 	struct rb_root	 symbols[MAP__NR_TYPES];
@@ -224,10 +231,10 @@ struct map *dso__new_map(const char *name);
 struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
 				const char *short_name, int dso_type);
 
-void dsos__add(struct list_head *head, struct dso *dso);
-struct dso *dsos__find(const struct list_head *head, const char *name,
+void dsos__add(struct dsos *dsos, struct dso *dso);
+struct dso *dsos__find(const struct dsos *dsos, const char *name,
 		       bool cmp_short);
-struct dso *__dsos__findnew(struct list_head *head, const char *name);
+struct dso *__dsos__findnew(struct dsos *dsos, const char *name);
 bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
 
 size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 158c787..ce0de00 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -214,11 +214,11 @@ static int machine__hit_all_dsos(struct machine *machine)
 {
 	int err;
 
-	err = __dsos__hit_all(&machine->kernel_dsos);
+	err = __dsos__hit_all(&machine->kernel_dsos.head);
 	if (err)
 		return err;
 
-	return __dsos__hit_all(&machine->user_dsos);
+	return __dsos__hit_all(&machine->user_dsos.head);
 }
 
 int dsos__hit_all(struct perf_session *session)
@@ -288,11 +288,12 @@ static int machine__write_buildid_table(struct machine *machine, int fd)
 		umisc = PERF_RECORD_MISC_GUEST_USER;
 	}
 
-	err = __dsos__write_buildid_table(&machine->kernel_dsos, machine,
+	err = __dsos__write_buildid_table(&machine->kernel_dsos.head, machine,
 					  machine->pid, kmisc, fd);
 	if (err == 0)
-		err = __dsos__write_buildid_table(&machine->user_dsos, machine,
-						  machine->pid, umisc, fd);
+		err = __dsos__write_buildid_table(&machine->user_dsos.head,
+						  machine, machine->pid, umisc,
+						  fd);
 	return err;
 }
 
@@ -455,9 +456,10 @@ static int __dsos__cache_build_ids(struct list_head *head,
 
 static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
 {
-	int ret = __dsos__cache_build_ids(&machine->kernel_dsos, machine,
+	int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine,
 					  debugdir);
-	ret |= __dsos__cache_build_ids(&machine->user_dsos, machine, debugdir);
+	ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine,
+				       debugdir);
 	return ret;
 }
 
@@ -483,8 +485,10 @@ static int perf_session__cache_build_ids(struct perf_session *session)
 
 static bool machine__read_build_ids(struct machine *machine, bool with_hits)
 {
-	bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits);
-	ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits);
+	bool ret;
+
+	ret  = __dsos__read_build_ids(&machine->kernel_dsos.head, with_hits);
+	ret |= __dsos__read_build_ids(&machine->user_dsos.head, with_hits);
 	return ret;
 }
 
@@ -1548,7 +1552,7 @@ static int __event_process_build_id(struct build_id_event *bev,
 				    struct perf_session *session)
 {
 	int err = -1;
-	struct list_head *head;
+	struct dsos *dsos;
 	struct machine *machine;
 	u16 misc;
 	struct dso *dso;
@@ -1563,22 +1567,22 @@ static int __event_process_build_id(struct build_id_event *bev,
 	switch (misc) {
 	case PERF_RECORD_MISC_KERNEL:
 		dso_type = DSO_TYPE_KERNEL;
-		head = &machine->kernel_dsos;
+		dsos = &machine->kernel_dsos;
 		break;
 	case PERF_RECORD_MISC_GUEST_KERNEL:
 		dso_type = DSO_TYPE_GUEST_KERNEL;
-		head = &machine->kernel_dsos;
+		dsos = &machine->kernel_dsos;
 		break;
 	case PERF_RECORD_MISC_USER:
 	case PERF_RECORD_MISC_GUEST_USER:
 		dso_type = DSO_TYPE_USER;
-		head = &machine->user_dsos;
+		dsos = &machine->user_dsos;
 		break;
 	default:
 		goto out;
 	}
 
-	dso = __dsos__findnew(head, filename);
+	dso = __dsos__findnew(dsos, filename);
 	if (dso != NULL) {
 		char sbuild_id[BUILD_ID_SIZE * 2 + 1];
 
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b2ec38b..49a75ec 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -17,8 +17,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 {
 	map_groups__init(&machine->kmaps);
 	RB_CLEAR_NODE(&machine->rb_node);
-	INIT_LIST_HEAD(&machine->user_dsos);
-	INIT_LIST_HEAD(&machine->kernel_dsos);
+	INIT_LIST_HEAD(&machine->user_dsos.head);
+	INIT_LIST_HEAD(&machine->kernel_dsos.head);
 
 	machine->threads = RB_ROOT;
 	INIT_LIST_HEAD(&machine->dead_threads);
@@ -72,11 +72,11 @@ out_delete:
 	return NULL;
 }
 
-static void dsos__delete(struct list_head *dsos)
+static void dsos__delete(struct dsos *dsos)
 {
 	struct dso *pos, *n;
 
-	list_for_each_entry_safe(pos, n, dsos, node) {
+	list_for_each_entry_safe(pos, n, &dsos->head, node) {
 		list_del(&pos->node);
 		dso__delete(pos);
 	}
@@ -477,23 +477,23 @@ struct map *machine__new_module(struct machine *machine, u64 start,
 size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
 {
 	struct rb_node *nd;
-	size_t ret = __dsos__fprintf(&machines->host.kernel_dsos, fp) +
-		     __dsos__fprintf(&machines->host.user_dsos, fp);
+	size_t ret = __dsos__fprintf(&machines->host.kernel_dsos.head, fp) +
+		     __dsos__fprintf(&machines->host.user_dsos.head, fp);
 
 	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
-		ret += __dsos__fprintf(&pos->kernel_dsos, fp);
-		ret += __dsos__fprintf(&pos->user_dsos, fp);
+		ret += __dsos__fprintf(&pos->kernel_dsos.head, fp);
+		ret += __dsos__fprintf(&pos->user_dsos.head, fp);
 	}
 
 	return ret;
 }
 
-size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
+size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp,
 				     bool (skip)(struct dso *dso, int parm), int parm)
 {
-	return __dsos__fprintf_buildid(&machine->kernel_dsos, fp, skip, parm) +
-	       __dsos__fprintf_buildid(&machine->user_dsos, fp, skip, parm);
+	return __dsos__fprintf_buildid(&m->kernel_dsos.head, fp, skip, parm) +
+	       __dsos__fprintf_buildid(&m->user_dsos.head, fp, skip, parm);
 }
 
 size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
@@ -994,7 +994,7 @@ static bool machine__uses_kcore(struct machine *machine)
 {
 	struct dso *dso;
 
-	list_for_each_entry(dso, &machine->kernel_dsos, node) {
+	list_for_each_entry(dso, &machine->kernel_dsos.head, node) {
 		if (dso__is_kcore(dso))
 			return true;
 	}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 6a6bcc1..2b651a7 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -4,6 +4,7 @@
 #include <sys/types.h>
 #include <linux/rbtree.h>
 #include "map.h"
+#include "dso.h"
 #include "event.h"
 
 struct addr_location;
@@ -32,8 +33,8 @@ struct machine {
 	struct list_head  dead_threads;
 	struct thread	  *last_match;
 	struct vdso_info  *vdso_info;
-	struct list_head  user_dsos;
-	struct list_head  kernel_dsos;
+	struct dsos	  user_dsos;
+	struct dsos	  kernel_dsos;
 	struct map_groups kmaps;
 	struct map	  *vmlinux_maps[MAP__NR_TYPES];
 	u64		  kernel_start;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index be37b5a..c150ca4 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -184,7 +184,8 @@ static struct dso *kernel_get_module_dso(const char *module)
 	const char *vmlinux_name;
 
 	if (module) {
-		list_for_each_entry(dso, &host_machine->kernel_dsos, node) {
+		list_for_each_entry(dso, &host_machine->kernel_dsos.head,
+				    node) {
 			if (strncmp(dso->short_name + 1, module,
 				    dso->short_name_len - 2) == 0)
 				goto found;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 2a92e10..1e23a5b 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -6,6 +6,7 @@
 #include <inttypes.h>
 
 #include "symbol.h"
+#include "machine.h"
 #include "vdso.h"
 #include <symbol/kallsyms.h>
 #include "debug.h"
@@ -929,7 +930,11 @@ int dso__load_sym(struct dso *dso, struct map *map,
 				}
 				curr_dso->symtab_type = dso->symtab_type;
 				map_groups__insert(kmap->kmaps, curr_map);
-				dsos__add(&dso->node, curr_dso);
+				/*
+				 * The new DSO should go to the kernel DSOS
+				 */
+				dsos__add(&map->groups->machine->kernel_dsos,
+					  curr_dso);
 				dso__set_loaded(curr_dso, map->type);
 			} else
 				curr_dso = curr_map->dso;

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-10-03  5:27 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-29 20:07 [PATCH v5 0/2] perf tool: improves DSO long names search speed with rbtree Waiman Long
2014-09-29 20:07 ` [PATCH v5 1/2] perf tool: encapsulate dsos list head into struct dsos Waiman Long
2014-10-03  5:26   ` [tip:perf/core] perf symbols: Encapsulate " tip-bot for Waiman Long
2014-09-29 20:07 ` [PATCH v5 2/2] perf tool: improves DSO long names lookup speed with rbtree Waiman Long
2014-09-30 15:21   ` Arnaldo Carvalho de Melo
2014-09-30 16:49     ` Waiman Long
2014-09-30 17:38       ` Waiman Long

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.