All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf/powerpc: Cache the DWARF debug info
@ 2014-10-21 18:56 Sukadev Bhattiprolu
  2014-10-21 19:08 ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 12+ messages in thread
From: Sukadev Bhattiprolu @ 2014-10-21 18:56 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Jiri Olsa; +Cc: Anton Blanchard, linux-kernel

>From 773a3608a0cd2daf02e244cb9ffbf5bb6a0e724e Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Tue, 21 Oct 2014 13:20:22 -0500
Subject: [PATCH 1/1] perf/powerpc: Cache DWARF debug info

Cache the DWARF debug info for DSO so we don't have to rebuild it for
each address in the DSO (duh!).

	$ time /tmp/perf.orig report -g > /tmp/report.1

	real	0m1.845s
	user	0m0.963s
	sys	0m0.879s

	$ time /tmp/perf report -g > /tmp/report.2

	real	0m0.089s
	user	0m0.082s
	sys	0m0.006s

	$ diff /tmp/report.1 /tmp/report.2
	$

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/util/skip-callchain-idx.c | 91 ++++++++++++++++++++---
 1 file changed, 82 insertions(+), 9 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
index d73ef8b..bfe254d 100644
--- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -38,6 +38,63 @@ static const Dwfl_Callbacks offline_callbacks = {
 	.section_address = dwfl_offline_section_address,
 };
 
+struct list_head perf_dwfl_files;
+
+struct perf_dwfl_node {
+	struct list_head node;
+	char *exec_file;
+	Dwfl *dwfl;
+};
+
+static void perf_init_dwfl(void)
+{
+	static int dwfl_inited;
+
+	if (!dwfl_inited) {
+		dwfl_inited = 1;
+		INIT_LIST_HEAD(&perf_dwfl_files);
+	}
+}
+
+static Dwfl *perf_dwfl_find(const char *file_name)
+{
+	struct list_head *pos;
+	struct perf_dwfl_node *node;
+
+	perf_init_dwfl();
+
+	list_for_each(pos, &perf_dwfl_files) {
+		node = list_entry(pos, struct perf_dwfl_node, node);
+		if (!strcmp(node->exec_file, file_name))
+			return node->dwfl;
+	}
+
+	return NULL;
+}
+
+
+/*
+ * Return 1 if were able to cache the DWARF debug info. 0 otherwise.
+ */
+static int perf_dwfl_cache(const char *file_name, Dwfl *dwfl)
+{
+	struct perf_dwfl_node *dwfl_node;
+
+	dwfl_node = malloc(sizeof(struct perf_dwfl_node));
+
+	if (!dwfl_node) {
+		pr_debug("%s(): Unable to alloc memory\n", __func__);
+		return 0;
+	}
+	INIT_LIST_HEAD(&dwfl_node->node);
+	dwfl_node->dwfl = dwfl;
+	dwfl_node->exec_file = strdup(file_name);
+
+	list_add(&dwfl_node->node, &perf_dwfl_files);
+
+	return 1;
+}
+
 
 /*
  * Use the DWARF expression for the Call-frame-address and determine
@@ -155,16 +212,26 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
 	Dwarf_Addr	start = pc;
 	Dwarf_Addr	end = pc;
 	bool		signalp;
+	int		cached;
 
-	dwfl = dwfl_begin(&offline_callbacks);
+	cached = 1;
+	dwfl = perf_dwfl_find(exec_file);
 	if (!dwfl) {
-		pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
-		return -1;
-	}
-
-	if (dwfl_report_offline(dwfl, "",  exec_file, -1) == NULL) {
-		pr_debug("dwfl_report_offline() failed %s\n", dwarf_errmsg(-1));
-		goto out;
+		dwfl = dwfl_begin(&offline_callbacks);
+		if (!dwfl) {
+			pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
+			return -1;
+		}
+
+		if (dwfl_report_offline(dwfl, "",  exec_file, -1) == NULL) {
+			pr_debug("dwfl_report_offline() failed %s\n",
+							dwarf_errmsg(-1));
+			goto out;
+		}
+		/*
+		 * If we fail to alloc memory, we lose the benefit of caching
+		 */
+		cached = perf_dwfl_cache(exec_file, dwfl);
 	}
 
 	mod = dwfl_addrmodule(dwfl, pc);
@@ -194,7 +261,13 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
 	rc = check_return_reg(ra_regno, frame);
 
 out:
-	dwfl_end(dwfl);
+	/*
+	 * In the unlikely event that we were unable to cache the debug
+	 * info, release it so we don't leak fds.
+	 */
+	if (!cached)
+		dwfl_end(dwfl);
+
 	return rc;
 }
 
-- 
1.8.4.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/powerpc: Cache the DWARF debug info
  2014-10-21 18:56 [PATCH] perf/powerpc: Cache the DWARF debug info Sukadev Bhattiprolu
@ 2014-10-21 19:08 ` Arnaldo Carvalho de Melo
  2014-10-22  0:09   ` Sukadev Bhattiprolu
  0 siblings, 1 reply; 12+ messages in thread
From: Arnaldo Carvalho de Melo @ 2014-10-21 19:08 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Jiri Olsa, Anton Blanchard, linux-kernel

Em Tue, Oct 21, 2014 at 11:56:10AM -0700, Sukadev Bhattiprolu escreveu:
> >From 773a3608a0cd2daf02e244cb9ffbf5bb6a0e724e Mon Sep 17 00:00:00 2001
> From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
> Date: Tue, 21 Oct 2014 13:20:22 -0500
> Subject: [PATCH 1/1] perf/powerpc: Cache DWARF debug info

Jiri, isn't his related to that dsos caching thing you created?

Anyway, there is a per machine struct (struct machine) where things like
this should be put, please do not add globals or static variables in
functions.

- Arnaldo
 
> Cache the DWARF debug info for DSO so we don't have to rebuild it for
> each address in the DSO (duh!).
> 
> 	$ time /tmp/perf.orig report -g > /tmp/report.1
> 
> 	real	0m1.845s
> 	user	0m0.963s
> 	sys	0m0.879s
> 
> 	$ time /tmp/perf report -g > /tmp/report.2
> 
> 	real	0m0.089s
> 	user	0m0.082s
> 	sys	0m0.006s
> 
> 	$ diff /tmp/report.1 /tmp/report.2
> 	$
> 
> Reported-by: Anton Blanchard <anton@samba.org>
> Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
> ---
>  tools/perf/arch/powerpc/util/skip-callchain-idx.c | 91 ++++++++++++++++++++---
>  1 file changed, 82 insertions(+), 9 deletions(-)
> 
> diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
> index d73ef8b..bfe254d 100644
> --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
> +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
> @@ -38,6 +38,63 @@ static const Dwfl_Callbacks offline_callbacks = {
>  	.section_address = dwfl_offline_section_address,
>  };
>  
> +struct list_head perf_dwfl_files;
> +
> +struct perf_dwfl_node {
> +	struct list_head node;
> +	char *exec_file;
> +	Dwfl *dwfl;
> +};
> +
> +static void perf_init_dwfl(void)
> +{
> +	static int dwfl_inited;
> +
> +	if (!dwfl_inited) {
> +		dwfl_inited = 1;
> +		INIT_LIST_HEAD(&perf_dwfl_files);
> +	}
> +}
> +
> +static Dwfl *perf_dwfl_find(const char *file_name)
> +{
> +	struct list_head *pos;
> +	struct perf_dwfl_node *node;
> +
> +	perf_init_dwfl();
> +
> +	list_for_each(pos, &perf_dwfl_files) {
> +		node = list_entry(pos, struct perf_dwfl_node, node);
> +		if (!strcmp(node->exec_file, file_name))
> +			return node->dwfl;
> +	}
> +
> +	return NULL;
> +}
> +
> +
> +/*
> + * Return 1 if were able to cache the DWARF debug info. 0 otherwise.
> + */
> +static int perf_dwfl_cache(const char *file_name, Dwfl *dwfl)
> +{
> +	struct perf_dwfl_node *dwfl_node;
> +
> +	dwfl_node = malloc(sizeof(struct perf_dwfl_node));
> +
> +	if (!dwfl_node) {
> +		pr_debug("%s(): Unable to alloc memory\n", __func__);
> +		return 0;
> +	}
> +	INIT_LIST_HEAD(&dwfl_node->node);
> +	dwfl_node->dwfl = dwfl;
> +	dwfl_node->exec_file = strdup(file_name);
> +
> +	list_add(&dwfl_node->node, &perf_dwfl_files);
> +
> +	return 1;
> +}
> +
>  
>  /*
>   * Use the DWARF expression for the Call-frame-address and determine
> @@ -155,16 +212,26 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
>  	Dwarf_Addr	start = pc;
>  	Dwarf_Addr	end = pc;
>  	bool		signalp;
> +	int		cached;
>  
> -	dwfl = dwfl_begin(&offline_callbacks);
> +	cached = 1;
> +	dwfl = perf_dwfl_find(exec_file);
>  	if (!dwfl) {
> -		pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
> -		return -1;
> -	}
> -
> -	if (dwfl_report_offline(dwfl, "",  exec_file, -1) == NULL) {
> -		pr_debug("dwfl_report_offline() failed %s\n", dwarf_errmsg(-1));
> -		goto out;
> +		dwfl = dwfl_begin(&offline_callbacks);
> +		if (!dwfl) {
> +			pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
> +			return -1;
> +		}
> +
> +		if (dwfl_report_offline(dwfl, "",  exec_file, -1) == NULL) {
> +			pr_debug("dwfl_report_offline() failed %s\n",
> +							dwarf_errmsg(-1));
> +			goto out;
> +		}
> +		/*
> +		 * If we fail to alloc memory, we lose the benefit of caching
> +		 */
> +		cached = perf_dwfl_cache(exec_file, dwfl);
>  	}
>  
>  	mod = dwfl_addrmodule(dwfl, pc);
> @@ -194,7 +261,13 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
>  	rc = check_return_reg(ra_regno, frame);
>  
>  out:
> -	dwfl_end(dwfl);
> +	/*
> +	 * In the unlikely event that we were unable to cache the debug
> +	 * info, release it so we don't leak fds.
> +	 */
> +	if (!cached)
> +		dwfl_end(dwfl);
> +
>  	return rc;
>  }
>  
> -- 
> 1.8.4.2

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/powerpc: Cache the DWARF debug info
  2014-10-21 19:08 ` Arnaldo Carvalho de Melo
@ 2014-10-22  0:09   ` Sukadev Bhattiprolu
  2014-10-22  8:17     ` Jiri Olsa
  2014-10-30  6:42     ` [tip:perf/core] perf tools powerpc: " tip-bot for Sukadev Bhattiprolu
  0 siblings, 2 replies; 12+ messages in thread
From: Sukadev Bhattiprolu @ 2014-10-22  0:09 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo; +Cc: Jiri Olsa, Anton Blanchard, linux-kernel

Arnaldo Carvalho de Melo [acme@kernel.org] wrote:
| Em Tue, Oct 21, 2014 at 11:56:10AM -0700, Sukadev Bhattiprolu escreveu:
| > >From 773a3608a0cd2daf02e244cb9ffbf5bb6a0e724e Mon Sep 17 00:00:00 2001
| > From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
| > Date: Tue, 21 Oct 2014 13:20:22 -0500
| > Subject: [PATCH 1/1] perf/powerpc: Cache DWARF debug info
| 
| Jiri, isn't his related to that dsos caching thing you created?
| 
| Anyway, there is a per machine struct (struct machine) where things like
| this should be put, please do not add globals or static variables in
| functions.

Good point. How about something like this:
---

Cache the DWARF debug info for DSO so we don't have to rebuild it for
each address in the DSO.

Note that dso__new() uses calloc() so don't need to set dso->dwfl
to NULL.

	$ /tmp/perf.orig --version
	perf version 3.18.rc1.gc2661b8
	$ /tmp/perf.new --version
	perf version 3.18.rc1.g402d62
	$ perf stat -e cycles,instructions /tmp/perf.orig report -g > orig

	 Performance counter stats for '/tmp/perf.orig report -g':

	     6,428,177,183 cycles                    #    0.000 GHz
	     4,176,288,391 instructions              #    0.65  insns per cycle

	       1.840666132 seconds time elapsed

	$ perf stat -e cycles,instructions /tmp/perf.new report -g > new

	 Performance counter stats for '/tmp/perf.new report -g':

	       305,773,142 cycles                    #    0.000 GHz
	       276,048,272 instructions              #    0.90  insns per cycle

	       0.087693543 seconds time elapsed
	$ diff orig new
	$

Changelog[v2]:
	[Arnaldo Carvalho] Cache in existing global objects rather than create
		new static/globals in functions.

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/util/skip-callchain-idx.c | 33 +++++++++++++++--------
 tools/perf/util/dso.h                             |  1 +
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
index d73ef8b..9892b0f 100644
--- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -145,7 +145,7 @@ static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc)
  *		yet used)
  *	-1 in case of errors
  */
-static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
+static int check_return_addr(struct dso *dso, Dwarf_Addr pc)
 {
 	int		rc = -1;
 	Dwfl		*dwfl;
@@ -156,15 +156,27 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
 	Dwarf_Addr	end = pc;
 	bool		signalp;
 
-	dwfl = dwfl_begin(&offline_callbacks);
-	if (!dwfl) {
-		pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
-		return -1;
-	}
+	dwfl = dso->dwfl;
 
-	if (dwfl_report_offline(dwfl, "",  exec_file, -1) == NULL) {
-		pr_debug("dwfl_report_offline() failed %s\n", dwarf_errmsg(-1));
-		goto out;
+	if (!dwfl) {
+		dwfl = dwfl_begin(&offline_callbacks);
+		if (!dwfl) {
+			pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
+			return -1;
+		}
+
+		if (dwfl_report_offline(dwfl, "", dso->long_name, -1) == NULL) {
+			pr_debug("dwfl_report_offline() failed %s\n",
+						dwarf_errmsg(-1));
+			/*
+			 * We normally cache the DWARF debug info and never
+			 * call dwfl_end(). But to prevent fd leak, free in
+			 * case of error.
+			 */
+			dwfl_end(dwfl);
+			goto out;
+		}
+		dso->dwfl = dwfl;
 	}
 
 	mod = dwfl_addrmodule(dwfl, pc);
@@ -194,7 +206,6 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
 	rc = check_return_reg(ra_regno, frame);
 
 out:
-	dwfl_end(dwfl);
 	return rc;
 }
 
@@ -246,7 +257,7 @@ int arch_skip_callchain_idx(struct machine *machine, struct thread *thread,
 		return skip_slot;
 	}
 
-	rc = check_return_addr(dso->long_name, ip);
+	rc = check_return_addr(dso, ip);
 
 	pr_debug("DSO %s, nr %" PRIx64 ", ip 0x%" PRIx64 "rc %d\n",
 				dso->long_name, chain->nr, ip, rc);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index acb651a..3c9b391 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -127,6 +127,7 @@ struct dso {
 	const char	 *long_name;
 	u16		 long_name_len;
 	u16		 short_name_len;
+	void		*dwfl;			/* DWARF debug info */
 
 	/* dso data file */
 	struct {


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/powerpc: Cache the DWARF debug info
  2014-10-22  0:09   ` Sukadev Bhattiprolu
@ 2014-10-22  8:17     ` Jiri Olsa
  2014-10-22 17:46       ` Sukadev Bhattiprolu
  2014-10-30  6:42     ` [tip:perf/core] perf tools powerpc: " tip-bot for Sukadev Bhattiprolu
  1 sibling, 1 reply; 12+ messages in thread
From: Jiri Olsa @ 2014-10-22  8:17 UTC (permalink / raw)
  To: Sukadev Bhattiprolu
  Cc: Arnaldo Carvalho de Melo, Anton Blanchard, linux-kernel

On Tue, Oct 21, 2014 at 05:09:58PM -0700, Sukadev Bhattiprolu wrote:
> Arnaldo Carvalho de Melo [acme@kernel.org] wrote:
> | Em Tue, Oct 21, 2014 at 11:56:10AM -0700, Sukadev Bhattiprolu escreveu:
> | > >From 773a3608a0cd2daf02e244cb9ffbf5bb6a0e724e Mon Sep 17 00:00:00 2001
> | > From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
> | > Date: Tue, 21 Oct 2014 13:20:22 -0500
> | > Subject: [PATCH 1/1] perf/powerpc: Cache DWARF debug info
> | 
> | Jiri, isn't his related to that dsos caching thing you created?

well, yea.. but Sukadev needs dw handle on top of that

> | 
> | Anyway, there is a per machine struct (struct machine) where things like
> | this should be put, please do not add globals or static variables in
> | functions.
> 
> Good point. How about something like this:

yep, and the code got much shorter ;-)

SNIP

> --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
> +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
> @@ -145,7 +145,7 @@ static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc)
>   *		yet used)
>   *	-1 in case of errors
>   */
> -static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
> +static int check_return_addr(struct dso *dso, Dwarf_Addr pc)
>  {
>  	int		rc = -1;
>  	Dwfl		*dwfl;
> @@ -156,15 +156,27 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
>  	Dwarf_Addr	end = pc;
>  	bool		signalp;
>  
> -	dwfl = dwfl_begin(&offline_callbacks);
> -	if (!dwfl) {
> -		pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
> -		return -1;
> -	}
> +	dwfl = dso->dwfl;
>  
> -	if (dwfl_report_offline(dwfl, "",  exec_file, -1) == NULL) {
> -		pr_debug("dwfl_report_offline() failed %s\n", dwarf_errmsg(-1));
> -		goto out;
> +	if (!dwfl) {
> +		dwfl = dwfl_begin(&offline_callbacks);
> +		if (!dwfl) {
> +			pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
> +			return -1;
> +		}
> +
> +		if (dwfl_report_offline(dwfl, "", dso->long_name, -1) == NULL) {
> +			pr_debug("dwfl_report_offline() failed %s\n",
> +						dwarf_errmsg(-1));
> +			/*
> +			 * We normally cache the DWARF debug info and never
> +			 * call dwfl_end(). But to prevent fd leak, free in
> +			 * case of error.
> +			 */
> +			dwfl_end(dwfl);
> +			goto out;
> +		}
> +		dso->dwfl = dwfl;

so by this we get powerpc arch code sharing dw handle via dso object,
but we have lot of generic code too ;-)

could you make this happen for unwind__get_entries.. probably
both sharing same generic code I guess

thanks,
jirka

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/powerpc: Cache the DWARF debug info
  2014-10-22  8:17     ` Jiri Olsa
@ 2014-10-22 17:46       ` Sukadev Bhattiprolu
  2014-10-23 13:37         ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 12+ messages in thread
From: Sukadev Bhattiprolu @ 2014-10-22 17:46 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: Arnaldo Carvalho de Melo, Anton Blanchard, linux-kernel

Jiri Olsa [jolsa@redhat.com] wrote:
| > +			goto out;
| > +		}
| > +		dso->dwfl = dwfl;
| 
| so by this we get powerpc arch code sharing dw handle via dso object,
| but we have lot of generic code too ;-)

Well, this applies to powerpc...

| 
| could you make this happen for unwind__get_entries.. probably
| both sharing same generic code I guess

and unwind_get_entries() applies only to x86 and arm right ? ;-)
Or at least thats what the config/Makefile says.

I can take a look at unwind_get_entries(), but can you please merge
this fix for now, since the current performance is bad?

| 
| thanks,
| jirka


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/powerpc: Cache the DWARF debug info
  2014-10-22 17:46       ` Sukadev Bhattiprolu
@ 2014-10-23 13:37         ` Arnaldo Carvalho de Melo
  2014-10-23 14:12           ` Jiri Olsa
  0 siblings, 1 reply; 12+ messages in thread
From: Arnaldo Carvalho de Melo @ 2014-10-23 13:37 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Jiri Olsa, Anton Blanchard, linux-kernel

Em Wed, Oct 22, 2014 at 10:46:59AM -0700, Sukadev Bhattiprolu escreveu:
> Jiri Olsa [jolsa@redhat.com] wrote:
> | > +			goto out;
> | > +		}
> | > +		dso->dwfl = dwfl;
> | 
> | so by this we get powerpc arch code sharing dw handle via dso object,
> | but we have lot of generic code too ;-)
> 
> Well, this applies to powerpc...
> 
> | 
> | could you make this happen for unwind__get_entries.. probably
> | both sharing same generic code I guess
> 
> and unwind_get_entries() applies only to x86 and arm right ? ;-)
> Or at least thats what the config/Makefile says.
 
> I can take a look at unwind_get_entries(), but can you please merge
> this fix for now, since the current performance is bad?

Right, I think the way it is now is a good compromise, i.e. you seem to
be using the right place to cache this, this is restricted to powerpc,
i.e. if leaks or excessive memory usage happens in workloads with lots
of DSOs having dwfl handlers open at the same times happens, it doesn't
affect users in other arches.

Jiri: do you agree?

I'm applying it on my local tree, as I need to change some of those
functions, places where machine and thread are being passed are being
changed to receive just a thread pointer, since the machine where a
thread is can be obtained, after a patch I added on my local tree, can
be obtained from thread->mg->machine.

- Arnaldo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/powerpc: Cache the DWARF debug info
  2014-10-23 13:37         ` Arnaldo Carvalho de Melo
@ 2014-10-23 14:12           ` Jiri Olsa
  2014-10-23 14:26             ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 12+ messages in thread
From: Jiri Olsa @ 2014-10-23 14:12 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Sukadev Bhattiprolu, Anton Blanchard, linux-kernel

On Thu, Oct 23, 2014 at 10:37:24AM -0300, Arnaldo Carvalho de Melo wrote:
> Em Wed, Oct 22, 2014 at 10:46:59AM -0700, Sukadev Bhattiprolu escreveu:
> > Jiri Olsa [jolsa@redhat.com] wrote:
> > | > +			goto out;
> > | > +		}
> > | > +		dso->dwfl = dwfl;
> > | 
> > | so by this we get powerpc arch code sharing dw handle via dso object,
> > | but we have lot of generic code too ;-)
> > 
> > Well, this applies to powerpc...
> > 
> > | 
> > | could you make this happen for unwind__get_entries.. probably
> > | both sharing same generic code I guess
> > 
> > and unwind_get_entries() applies only to x86 and arm right ? ;-)
> > Or at least thats what the config/Makefile says.
>  
> > I can take a look at unwind_get_entries(), but can you please merge
> > this fix for now, since the current performance is bad?
> 
> Right, I think the way it is now is a good compromise, i.e. you seem to
> be using the right place to cache this, this is restricted to powerpc,
> i.e. if leaks or excessive memory usage happens in workloads with lots
> of DSOs having dwfl handlers open at the same times happens, it doesn't
> affect users in other arches.
> 
> Jiri: do you agree?

well it's powerpc specific now.. anyway the code in the patch
to open the dwfl is generic and should be in in generic
place.. like in some extern function that the x86 would call
to get the dwfl handle

also the current patch leaks the dso->dwfl, which is never freed -> dwfl_end-ed,
dwfl_end should be called of in dso__delete I think

jirka

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/powerpc: Cache the DWARF debug info
  2014-10-23 14:12           ` Jiri Olsa
@ 2014-10-23 14:26             ` Arnaldo Carvalho de Melo
  2014-10-23 15:13               ` Jiri Olsa
  0 siblings, 1 reply; 12+ messages in thread
From: Arnaldo Carvalho de Melo @ 2014-10-23 14:26 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: Sukadev Bhattiprolu, Anton Blanchard, linux-kernel

Em Thu, Oct 23, 2014 at 04:12:13PM +0200, Jiri Olsa escreveu:
> On Thu, Oct 23, 2014 at 10:37:24AM -0300, Arnaldo Carvalho de Melo wrote:
> > Em Wed, Oct 22, 2014 at 10:46:59AM -0700, Sukadev Bhattiprolu escreveu:
> > > Jiri Olsa [jolsa@redhat.com] wrote:
> > > | > +			goto out;
> > > | > +		}
> > > | > +		dso->dwfl = dwfl;

> > > | so by this we get powerpc arch code sharing dw handle via dso object,
> > > | but we have lot of generic code too ;-)

> > > Well, this applies to powerpc...

> > > | could you make this happen for unwind__get_entries.. probably
> > > | both sharing same generic code I guess

> > > and unwind_get_entries() applies only to x86 and arm right ? ;-)
> > > Or at least thats what the config/Makefile says.

> > > I can take a look at unwind_get_entries(), but can you please merge
> > > this fix for now, since the current performance is bad?

> > Right, I think the way it is now is a good compromise, i.e. you seem to
> > be using the right place to cache this, this is restricted to powerpc,
> > i.e. if leaks or excessive memory usage happens in workloads with lots
> > of DSOs having dwfl handlers open at the same times happens, it doesn't
> > affect users in other arches.
> > 
> > Jiri: do you agree?
> 
> well it's powerpc specific now.. anyway the code in the patch
> to open the dwfl is generic and should be in in generic
> place.. like in some extern function that the x86 would call
> to get the dwfl handle
> 
> also the current patch leaks the dso->dwfl, which is never freed -> dwfl_end-ed,
> dwfl_end should be called of in dso__delete I think

Yeah, as my comment implies, I guess those are all valid concerns, i.e.
the patch needs more work, I was willing to accept it as-is because it
would hurt just Sukadev (i.e. powerpc), as he seems to be in a hurry to
get the performance improved :-)

I will remove it from my tree for now, as in the end what I'm doing
doesn't touch those specific functions.

But I think this will go on dragging extra work, i.e.: how to limit the
number of dwfl handlers used? Should we have just a front end cache like
what is done for machine__findnew_thread() (with just the last hit) and
perhaps then have a few slots for keeping N dwfl open and when that
number is up we check the one with less queries and close it?

Jiri, are you doing that on that cache stuff you did? I mean how do
you keep this stuff:

/*
 * Global list of open DSOs and the counter.
 */
static LIST_HEAD(dso__data_open);
static long dso__data_open_cnt;

Also this should not be global at all, this should be on struct machine,
since a DSO that is present on a machine may have the same name as the
dso on another machine (two guests, hosts, etc) and thus should not be
kept on the same list, etc.

So reading a bit more you seem to check rlimit, do LRUing when hitting
the limit, etc, that is why I thought about that stuff when Sukadev
first posted this patch...

Sukadev, all this is in tools/perf/util/dso.c

That is why I thought it would be a compromise to put what he did, it
would not make the existing situation that much worse, work needs to be
done in this area :-\

- Arnaldo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/powerpc: Cache the DWARF debug info
  2014-10-23 14:26             ` Arnaldo Carvalho de Melo
@ 2014-10-23 15:13               ` Jiri Olsa
  2014-10-23 15:33                 ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 12+ messages in thread
From: Jiri Olsa @ 2014-10-23 15:13 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Sukadev Bhattiprolu, Anton Blanchard, linux-kernel

On Thu, Oct 23, 2014 at 11:26:34AM -0300, Arnaldo Carvalho de Melo wrote:
> Em Thu, Oct 23, 2014 at 04:12:13PM +0200, Jiri Olsa escreveu:
> > On Thu, Oct 23, 2014 at 10:37:24AM -0300, Arnaldo Carvalho de Melo wrote:
> > > Em Wed, Oct 22, 2014 at 10:46:59AM -0700, Sukadev Bhattiprolu escreveu:
> > > > Jiri Olsa [jolsa@redhat.com] wrote:
> > > > | > +			goto out;
> > > > | > +		}
> > > > | > +		dso->dwfl = dwfl;
> 
> > > > | so by this we get powerpc arch code sharing dw handle via dso object,
> > > > | but we have lot of generic code too ;-)
> 
> > > > Well, this applies to powerpc...
> 
> > > > | could you make this happen for unwind__get_entries.. probably
> > > > | both sharing same generic code I guess
> 
> > > > and unwind_get_entries() applies only to x86 and arm right ? ;-)
> > > > Or at least thats what the config/Makefile says.
> 
> > > > I can take a look at unwind_get_entries(), but can you please merge
> > > > this fix for now, since the current performance is bad?
> 
> > > Right, I think the way it is now is a good compromise, i.e. you seem to
> > > be using the right place to cache this, this is restricted to powerpc,
> > > i.e. if leaks or excessive memory usage happens in workloads with lots
> > > of DSOs having dwfl handlers open at the same times happens, it doesn't
> > > affect users in other arches.
> > > 
> > > Jiri: do you agree?
> > 
> > well it's powerpc specific now.. anyway the code in the patch
> > to open the dwfl is generic and should be in in generic
> > place.. like in some extern function that the x86 would call
> > to get the dwfl handle
> > 
> > also the current patch leaks the dso->dwfl, which is never freed -> dwfl_end-ed,
> > dwfl_end should be called of in dso__delete I think
> 
> Yeah, as my comment implies, I guess those are all valid concerns, i.e.
> the patch needs more work, I was willing to accept it as-is because it
> would hurt just Sukadev (i.e. powerpc), as he seems to be in a hurry to
> get the performance improved :-)
> 
> I will remove it from my tree for now, as in the end what I'm doing
> doesn't touch those specific functions.
> 
> But I think this will go on dragging extra work, i.e.: how to limit the
> number of dwfl handlers used? Should we have just a front end cache like
> what is done for machine__findnew_thread() (with just the last hit) and
> perhaps then have a few slots for keeping N dwfl open and when that
> number is up we check the one with less queries and close it?

I think this can stay in 'struct dso' which is limited
by that code you show below

I think we need just add generic functions that allocates/destroys
the dwfl handle and lazy allocate&store this handle whenever it's
needed and destroy it in dso__delete

> 
> Jiri, are you doing that on that cache stuff you did? I mean how do
> you keep this stuff:
> 
> /*
>  * Global list of open DSOs and the counter.
>  */
> static LIST_HEAD(dso__data_open);
> static long dso__data_open_cnt;
> 
> Also this should not be global at all, this should be on struct machine,
> since a DSO that is present on a machine may have the same name as the
> dso on another machine (two guests, hosts, etc) and thus should not be
> kept on the same list, etc.

the justification for this to be static is that dso__data_open_cnt
is checked against the rlimit for user and the point is to keep
'some amount' of dso object opened, so we dont waste time by reopening

the 'some amount' is currently the half of the allowed limit for user

you can check the logic in open_dso and check_data_close functions

> So reading a bit more you seem to check rlimit, do LRUing when hitting
> the limit, etc, that is why I thought about that stuff when Sukadev
> first posted this patch...
> 
> Sukadev, all this is in tools/perf/util/dso.c
> 
> That is why I thought it would be a compromise to put what he did, it
> would not make the existing situation that much worse, work needs to be
> done in this area :-\

I think we just need to put that libdw handle into dso object
as I suggested above

jirka

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/powerpc: Cache the DWARF debug info
  2014-10-23 15:13               ` Jiri Olsa
@ 2014-10-23 15:33                 ` Arnaldo Carvalho de Melo
  2014-10-23 15:39                   ` Jiri Olsa
  0 siblings, 1 reply; 12+ messages in thread
From: Arnaldo Carvalho de Melo @ 2014-10-23 15:33 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: Sukadev Bhattiprolu, Anton Blanchard, linux-kernel

Em Thu, Oct 23, 2014 at 05:13:06PM +0200, Jiri Olsa escreveu:
> On Thu, Oct 23, 2014 at 11:26:34AM -0300, Arnaldo Carvalho de Melo wrote:
> > That is why I thought it would be a compromise to put what he did, it
> > would not make the existing situation that much worse, work needs to be
> > done in this area :-\
 
> I think we just need to put that libdw handle into dso object
> as I suggested above

Isn't it there already?

The patch does:

+++ b/tools/perf/util/dso.h
@@ -127,6 +127,7 @@ struct dso {
        const char       *long_name;
        u16              long_name_len;
        u16              short_name_len;
+       void            *dwfl;                  /* DWARF debug info */


----------

What you want, on top of that, at a minimum, we somehow limit the number
of simultaneously dwfl_begin()'ed DSOs, right?

- Arnaldo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf/powerpc: Cache the DWARF debug info
  2014-10-23 15:33                 ` Arnaldo Carvalho de Melo
@ 2014-10-23 15:39                   ` Jiri Olsa
  0 siblings, 0 replies; 12+ messages in thread
From: Jiri Olsa @ 2014-10-23 15:39 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Sukadev Bhattiprolu, Anton Blanchard, linux-kernel

On Thu, Oct 23, 2014 at 12:33:22PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Thu, Oct 23, 2014 at 05:13:06PM +0200, Jiri Olsa escreveu:
> > On Thu, Oct 23, 2014 at 11:26:34AM -0300, Arnaldo Carvalho de Melo wrote:
> > > That is why I thought it would be a compromise to put what he did, it
> > > would not make the existing situation that much worse, work needs to be
> > > done in this area :-\
>  
> > I think we just need to put that libdw handle into dso object
> > as I suggested above
> 
> Isn't it there already?
> 
> The patch does:
> 
> +++ b/tools/perf/util/dso.h
> @@ -127,6 +127,7 @@ struct dso {
>         const char       *long_name;
>         u16              long_name_len;
>         u16              short_name_len;
> +       void            *dwfl;                  /* DWARF debug info */
> 
> 
> ----------
> 
> What you want, on top of that, at a minimum, we somehow limit the number
> of simultaneously dwfl_begin()'ed DSOs, right?

no, the patch is doing that.. as I wrote:

---
I think this can stay in 'struct dso' which is limited
by that code you show below (dso__data_open code)

I think we need just add generic functions that allocates/destroys
the dwfl handle and lazy allocate&store this handle whenever it's
needed and destroy it in dso__delete
---

so just:
  1) generic function to do the lazy allocation of dwlf handle
  2) store the handle within dso object
  3) destroy it in dso__delete

the limitation for opened dso objects number should cover
the dwlf limit

thanks,
jirka

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [tip:perf/core] perf tools powerpc: Cache the DWARF debug info
  2014-10-22  0:09   ` Sukadev Bhattiprolu
  2014-10-22  8:17     ` Jiri Olsa
@ 2014-10-30  6:42     ` tip-bot for Sukadev Bhattiprolu
  1 sibling, 0 replies; 12+ messages in thread
From: tip-bot for Sukadev Bhattiprolu @ 2014-10-30  6:42 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, tglx, jolsa, hpa, mingo, anton, acme, sukadev, anton

Commit-ID:  7d073b335edc8d97af730c2e3b83ed6642bd3c27
Gitweb:     http://git.kernel.org/tip/7d073b335edc8d97af730c2e3b83ed6642bd3c27
Author:     Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
AuthorDate: Tue, 21 Oct 2014 17:09:58 -0700
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Wed, 29 Oct 2014 10:32:46 -0200

perf tools powerpc: Cache the DWARF debug info

Cache the DWARF debug info for DSO so we don't have to rebuild it for each
address in the DSO.

Note that dso__new() uses calloc() so don't need to set dso->dwfl to NULL.

	$ /tmp/perf.orig --version
	perf version 3.18.rc1.gc2661b8
	$ /tmp/perf.new --version
	perf version 3.18.rc1.g402d62
	$ perf stat -e cycles,instructions /tmp/perf.orig report -g > orig

	 Performance counter stats for '/tmp/perf.orig report -g':

	     6,428,177,183 cycles                    #    0.000 GHz
	     4,176,288,391 instructions              #    0.65  insns per cycle

	       1.840666132 seconds time elapsed

	$ perf stat -e cycles,instructions /tmp/perf.new report -g > new

	 Performance counter stats for '/tmp/perf.new report -g':

	       305,773,142 cycles                    #    0.000 GHz
	       276,048,272 instructions              #    0.90  insns per cycle

	       0.087693543 seconds time elapsed
	$ diff orig new
	$

Changelog[v2]:

[Arnaldo Carvalho] Cache in existing global objects rather than create
                   new static/globals in functions.

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Anton Blanchard <anton@au1.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20141022000958.GB2228@us.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/powerpc/util/skip-callchain-idx.c | 33 +++++++++++++++--------
 tools/perf/util/dso.h                             |  1 +
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
index d73ef8b..9892b0f 100644
--- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -145,7 +145,7 @@ static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc)
  *		yet used)
  *	-1 in case of errors
  */
-static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
+static int check_return_addr(struct dso *dso, Dwarf_Addr pc)
 {
 	int		rc = -1;
 	Dwfl		*dwfl;
@@ -156,15 +156,27 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
 	Dwarf_Addr	end = pc;
 	bool		signalp;
 
-	dwfl = dwfl_begin(&offline_callbacks);
-	if (!dwfl) {
-		pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
-		return -1;
-	}
+	dwfl = dso->dwfl;
 
-	if (dwfl_report_offline(dwfl, "",  exec_file, -1) == NULL) {
-		pr_debug("dwfl_report_offline() failed %s\n", dwarf_errmsg(-1));
-		goto out;
+	if (!dwfl) {
+		dwfl = dwfl_begin(&offline_callbacks);
+		if (!dwfl) {
+			pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
+			return -1;
+		}
+
+		if (dwfl_report_offline(dwfl, "", dso->long_name, -1) == NULL) {
+			pr_debug("dwfl_report_offline() failed %s\n",
+						dwarf_errmsg(-1));
+			/*
+			 * We normally cache the DWARF debug info and never
+			 * call dwfl_end(). But to prevent fd leak, free in
+			 * case of error.
+			 */
+			dwfl_end(dwfl);
+			goto out;
+		}
+		dso->dwfl = dwfl;
 	}
 
 	mod = dwfl_addrmodule(dwfl, pc);
@@ -194,7 +206,6 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
 	rc = check_return_reg(ra_regno, frame);
 
 out:
-	dwfl_end(dwfl);
 	return rc;
 }
 
@@ -246,7 +257,7 @@ int arch_skip_callchain_idx(struct machine *machine, struct thread *thread,
 		return skip_slot;
 	}
 
-	rc = check_return_addr(dso->long_name, ip);
+	rc = check_return_addr(dso, ip);
 
 	pr_debug("DSO %s, nr %" PRIx64 ", ip 0x%" PRIx64 "rc %d\n",
 				dso->long_name, chain->nr, ip, rc);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index acb651a..3c9b391 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -127,6 +127,7 @@ struct dso {
 	const char	 *long_name;
 	u16		 long_name_len;
 	u16		 short_name_len;
+	void		*dwfl;			/* DWARF debug info */
 
 	/* dso data file */
 	struct {

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2014-10-30  6:42 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-10-21 18:56 [PATCH] perf/powerpc: Cache the DWARF debug info Sukadev Bhattiprolu
2014-10-21 19:08 ` Arnaldo Carvalho de Melo
2014-10-22  0:09   ` Sukadev Bhattiprolu
2014-10-22  8:17     ` Jiri Olsa
2014-10-22 17:46       ` Sukadev Bhattiprolu
2014-10-23 13:37         ` Arnaldo Carvalho de Melo
2014-10-23 14:12           ` Jiri Olsa
2014-10-23 14:26             ` Arnaldo Carvalho de Melo
2014-10-23 15:13               ` Jiri Olsa
2014-10-23 15:33                 ` Arnaldo Carvalho de Melo
2014-10-23 15:39                   ` Jiri Olsa
2014-10-30  6:42     ` [tip:perf/core] perf tools powerpc: " tip-bot for Sukadev Bhattiprolu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.