All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf trace: Add support for printing call chains on sys_exit events.
@ 2016-04-08 11:34 Milian Wolff
  2016-04-08 17:57 ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 5+ messages in thread
From: Milian Wolff @ 2016-04-08 11:34 UTC (permalink / raw)
  To: linux-perf-users; +Cc: Arnaldo Carvalho de Melo, Jiri Olsa, Milian Wolff

Now, one can print the call chain for every encountered sys_exit
event, e.g.:

    $ perf trace -e nanosleep --call-graph dwarf path/to/ex_sleep
    1005.757 (1000.090 ms): ex_sleep/13167 nanosleep(...) = 0
                    20379e syscall_slow_exit_work ([kernel.kallsyms])
                    203bc4 syscall_return_slowpath ([kernel.kallsyms])
                    797248 int_ret_from_sys_call ([kernel.kallsyms])
                7f3991cd2600 __nanosleep (/usr/lib/libc-2.23.so)
                7f39926ab08d [unknown] (/usr/lib/libQt5Core.so.5.6.0)
                7f39925fe1bb QThread::sleep (/usr/lib/libQt5Core.so.5.6.0)
                5614801e4a2d main (path/to/ex_sleep)
                7f3991c3b710 __libc_start_main (/usr/lib/libc-2.23.so)
                5614801e4aa9 _start (path/to/ex_sleep)

Note that it is advised to increase the number of mmap pages to
prevent event losses when using this new feature. Often, adding
`-m 10M` to the `perf trace` invocation is enough.

This feature is also available in strace when built with libunwind
via `strace -k`. Performance wise, this solution is much better:

    $ time find path/to/linux &> /dev/null

    real    0m0.051s
    user    0m0.013s
    sys     0m0.037s

    $ time perf trace -m 800M --call-graph dwarf find path/to/linux &> /dev/null

    real    0m2.624s
    user    0m1.203s
    sys     0m1.333s

    $ time strace -k find path/to/linux  &> /dev/null

    real    0m35.398s
    user    0m10.403s
    sys     0m23.173s

Note that it is currently not possible to configure the print output.
Adding such a feature, similar to what is available in `perf script`
via its `--fields` knob can be added later on.

Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
---
 tools/perf/builtin-script.c |  4 ++--
 tools/perf/builtin-trace.c  | 27 +++++++++++++++++++++++++++
 tools/perf/util/session.c   | 39 +++++++++++++++++++++------------------
 tools/perf/util/session.h   |  3 ++-
 4 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3770c3d..e79d6f1 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -580,7 +580,7 @@ static void print_sample_bts(struct perf_sample *sample,
 			}
 		}
 		perf_evsel__print_ip(evsel, sample, al, print_opts,
-				     scripting_max_stack);
+				     scripting_max_stack, stdout);
 	}
 
 	/* print branch_to information */
@@ -790,7 +790,7 @@ static void process_event(struct perf_script *script,
 
 		perf_evsel__print_ip(evsel, sample, al,
 				     output[attr->type].print_ip_opts,
-				     scripting_max_stack);
+				     scripting_max_stack, stdout);
 	}
 
 	if (PRINT_FIELD(IREGS))
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d309f45..f3b5e07 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -34,6 +34,7 @@
 #include "trace-event.h"
 #include "util/parse-events.h"
 #include "util/bpf-loader.h"
+#include "util/callchain.h"
 
 #include <libaudit.h>
 #include <stdlib.h>
@@ -2161,6 +2162,23 @@ signed_print:
 		goto signed_print;
 
 	fputc('\n', trace->output);
+
+	if (sample->callchain) {
+		struct addr_location al;
+		/* TODO: user-configurable print_opts */
+		unsigned int print_opts = PRINT_IP_OPT_IP
+					| PRINT_IP_OPT_SYM
+					| PRINT_IP_OPT_DSO;
+
+		if (machine__resolve(trace->host, &al, sample) < 0) {
+			pr_err("problem processing %d event, skipping it.\n",
+			    event->header.type);
+			err = -1;
+			goto out_put;
+		}
+		perf_evsel__print_ip(evsel, sample, &al, print_opts,
+				     scripting_max_stack, trace->output);
+	}
 out:
 	ttrace->entry_pending = false;
 	err = 0;
@@ -3225,6 +3243,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Trace pagefaults", parse_pagefaults, "maj"),
 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
+	OPT_CALLBACK(0, "call-graph", &trace.opts,
+		     "record_mode[,record_size]", record_callchain_help,
+		     &record_parse_callchain_opt),
 	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
 			"per thread proc mmap processing timeout in ms"),
 	OPT_END()
@@ -3252,6 +3273,12 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		trace.opts.sample_time = true;
 	}
 
+	if (trace.opts.callgraph_set) {
+		symbol_conf.use_callchain = true;
+		callchain_param.enabled = true;
+		symbol__init(NULL);
+	}
+
 	if (trace.evlist->nr_entries > 0)
 		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ef37055..bbac0ef 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1955,7 +1955,8 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
 
 void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample,
 			  struct addr_location *al,
-			  unsigned int print_opts, unsigned int stack_depth)
+			  unsigned int print_opts, unsigned int stack_depth,
+			  FILE *fp)
 {
 	struct callchain_cursor_node *node;
 	int print_ip = print_opts & PRINT_IP_OPT_IP;
@@ -1992,33 +1993,35 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample,
 				goto next;
 
 			if (print_ip)
-				printf("%c%16" PRIx64, s, node->ip);
+				fprintf(fp, "%c%16" PRIx64, s, node->ip);
 
 			if (node->map)
 				addr = node->map->map_ip(node->map, node->ip);
 
 			if (print_sym) {
-				printf(" ");
+				fprintf(fp, " ");
 				if (print_symoffset) {
 					node_al.addr = addr;
 					node_al.map  = node->map;
-					symbol__fprintf_symname_offs(node->sym, &node_al, stdout);
+					symbol__fprintf_symname_offs(node->sym,
+								     &node_al,
+								     fp);
 				} else
-					symbol__fprintf_symname(node->sym, stdout);
+					symbol__fprintf_symname(node->sym, fp);
 			}
 
 			if (print_dso) {
-				printf(" (");
-				map__fprintf_dsoname(node->map, stdout);
-				printf(")");
+				fprintf(fp, " (");
+				map__fprintf_dsoname(node->map, fp);
+				fprintf(fp, ")");
 			}
 
 			if (print_srcline)
 				map__fprintf_srcline(node->map, addr, "\n  ",
-						     stdout);
+						     fp);
 
 			if (!print_oneline)
-				printf("\n");
+				fprintf(fp, "\n");
 
 			stack_depth--;
 next:
@@ -2030,25 +2033,25 @@ next:
 			return;
 
 		if (print_ip)
-			printf("%16" PRIx64, sample->ip);
+			fprintf(fp, "%16" PRIx64, sample->ip);
 
 		if (print_sym) {
-			printf(" ");
+			fprintf(fp, " ");
 			if (print_symoffset)
 				symbol__fprintf_symname_offs(al->sym, al,
-							     stdout);
+							     fp);
 			else
-				symbol__fprintf_symname(al->sym, stdout);
+				symbol__fprintf_symname(al->sym, fp);
 		}
 
 		if (print_dso) {
-			printf(" (");
-			map__fprintf_dsoname(al->map, stdout);
-			printf(")");
+			fprintf(fp, " (");
+			map__fprintf_dsoname(al->map, fp);
+			fprintf(fp, ")");
 		}
 
 		if (print_srcline)
-			map__fprintf_srcline(al->map, al->addr, "\n  ", stdout);
+			map__fprintf_srcline(al->map, al->addr, "\n  ", fp);
 	}
 }
 
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index f96fc9e8c..0ee3d9d 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -106,7 +106,8 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
 
 void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample,
 			  struct addr_location *al,
-			  unsigned int print_opts, unsigned int stack_depth);
+			  unsigned int print_opts, unsigned int stack_depth,
+			  FILE *fp);
 
 int perf_session__cpu_bitmap(struct perf_session *session,
 			     const char *cpu_list, unsigned long *cpu_bitmap);
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] perf trace: Add support for printing call chains on sys_exit events.
  2016-04-08 11:34 [PATCH] perf trace: Add support for printing call chains on sys_exit events Milian Wolff
@ 2016-04-08 17:57 ` Arnaldo Carvalho de Melo
  2016-04-08 18:18   ` Arnaldo Carvalho de Melo
  2016-04-09 11:38   ` Milian Wolff
  0 siblings, 2 replies; 5+ messages in thread
From: Arnaldo Carvalho de Melo @ 2016-04-08 17:57 UTC (permalink / raw)
  To: Milian Wolff; +Cc: linux-perf-users, Jiri Olsa

Em Fri, Apr 08, 2016 at 01:34:15PM +0200, Milian Wolff escreveu:
> Now, one can print the call chain for every encountered sys_exit
> event, e.g.:

You mixed up multiple changes in one single patch, I'll break it down
while testing, and before pushing upstream.

- Arnaldo
 
>     $ perf trace -e nanosleep --call-graph dwarf path/to/ex_sleep
>     1005.757 (1000.090 ms): ex_sleep/13167 nanosleep(...) = 0
>                     20379e syscall_slow_exit_work ([kernel.kallsyms])
>                     203bc4 syscall_return_slowpath ([kernel.kallsyms])
>                     797248 int_ret_from_sys_call ([kernel.kallsyms])
>                 7f3991cd2600 __nanosleep (/usr/lib/libc-2.23.so)
>                 7f39926ab08d [unknown] (/usr/lib/libQt5Core.so.5.6.0)
>                 7f39925fe1bb QThread::sleep (/usr/lib/libQt5Core.so.5.6.0)
>                 5614801e4a2d main (path/to/ex_sleep)
>                 7f3991c3b710 __libc_start_main (/usr/lib/libc-2.23.so)
>                 5614801e4aa9 _start (path/to/ex_sleep)
> 
> Note that it is advised to increase the number of mmap pages to
> prevent event losses when using this new feature. Often, adding
> `-m 10M` to the `perf trace` invocation is enough.
> 
> This feature is also available in strace when built with libunwind
> via `strace -k`. Performance wise, this solution is much better:
> 
>     $ time find path/to/linux &> /dev/null
> 
>     real    0m0.051s
>     user    0m0.013s
>     sys     0m0.037s
> 
>     $ time perf trace -m 800M --call-graph dwarf find path/to/linux &> /dev/null
> 
>     real    0m2.624s
>     user    0m1.203s
>     sys     0m1.333s
> 
>     $ time strace -k find path/to/linux  &> /dev/null
> 
>     real    0m35.398s
>     user    0m10.403s
>     sys     0m23.173s
> 
> Note that it is currently not possible to configure the print output.
> Adding such a feature, similar to what is available in `perf script`
> via its `--fields` knob can be added later on.
> 
> Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
> ---
>  tools/perf/builtin-script.c |  4 ++--
>  tools/perf/builtin-trace.c  | 27 +++++++++++++++++++++++++++
>  tools/perf/util/session.c   | 39 +++++++++++++++++++++------------------
>  tools/perf/util/session.h   |  3 ++-
>  4 files changed, 52 insertions(+), 21 deletions(-)
> 
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index 3770c3d..e79d6f1 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -580,7 +580,7 @@ static void print_sample_bts(struct perf_sample *sample,
>  			}
>  		}
>  		perf_evsel__print_ip(evsel, sample, al, print_opts,
> -				     scripting_max_stack);
> +				     scripting_max_stack, stdout);
>  	}
>  
>  	/* print branch_to information */
> @@ -790,7 +790,7 @@ static void process_event(struct perf_script *script,
>  
>  		perf_evsel__print_ip(evsel, sample, al,
>  				     output[attr->type].print_ip_opts,
> -				     scripting_max_stack);
> +				     scripting_max_stack, stdout);
>  	}
>  
>  	if (PRINT_FIELD(IREGS))
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index d309f45..f3b5e07 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -34,6 +34,7 @@
>  #include "trace-event.h"
>  #include "util/parse-events.h"
>  #include "util/bpf-loader.h"
> +#include "util/callchain.h"
>  
>  #include <libaudit.h>
>  #include <stdlib.h>
> @@ -2161,6 +2162,23 @@ signed_print:
>  		goto signed_print;
>  
>  	fputc('\n', trace->output);
> +
> +	if (sample->callchain) {
> +		struct addr_location al;
> +		/* TODO: user-configurable print_opts */
> +		unsigned int print_opts = PRINT_IP_OPT_IP
> +					| PRINT_IP_OPT_SYM
> +					| PRINT_IP_OPT_DSO;
> +
> +		if (machine__resolve(trace->host, &al, sample) < 0) {
> +			pr_err("problem processing %d event, skipping it.\n",
> +			    event->header.type);
> +			err = -1;
> +			goto out_put;
> +		}
> +		perf_evsel__print_ip(evsel, sample, &al, print_opts,
> +				     scripting_max_stack, trace->output);
> +	}
>  out:
>  	ttrace->entry_pending = false;
>  	err = 0;
> @@ -3225,6 +3243,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
>  		     "Trace pagefaults", parse_pagefaults, "maj"),
>  	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
>  	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
> +	OPT_CALLBACK(0, "call-graph", &trace.opts,
> +		     "record_mode[,record_size]", record_callchain_help,
> +		     &record_parse_callchain_opt),
>  	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
>  			"per thread proc mmap processing timeout in ms"),
>  	OPT_END()
> @@ -3252,6 +3273,12 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
>  		trace.opts.sample_time = true;
>  	}
>  
> +	if (trace.opts.callgraph_set) {
> +		symbol_conf.use_callchain = true;
> +		callchain_param.enabled = true;
> +		symbol__init(NULL);
> +	}
> +
>  	if (trace.evlist->nr_entries > 0)
>  		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
>  
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index ef37055..bbac0ef 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -1955,7 +1955,8 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
>  
>  void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample,
>  			  struct addr_location *al,
> -			  unsigned int print_opts, unsigned int stack_depth)
> +			  unsigned int print_opts, unsigned int stack_depth,
> +			  FILE *fp)
>  {
>  	struct callchain_cursor_node *node;
>  	int print_ip = print_opts & PRINT_IP_OPT_IP;
> @@ -1992,33 +1993,35 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample,
>  				goto next;
>  
>  			if (print_ip)
> -				printf("%c%16" PRIx64, s, node->ip);
> +				fprintf(fp, "%c%16" PRIx64, s, node->ip);
>  
>  			if (node->map)
>  				addr = node->map->map_ip(node->map, node->ip);
>  
>  			if (print_sym) {
> -				printf(" ");
> +				fprintf(fp, " ");
>  				if (print_symoffset) {
>  					node_al.addr = addr;
>  					node_al.map  = node->map;
> -					symbol__fprintf_symname_offs(node->sym, &node_al, stdout);
> +					symbol__fprintf_symname_offs(node->sym,
> +								     &node_al,
> +								     fp);
>  				} else
> -					symbol__fprintf_symname(node->sym, stdout);
> +					symbol__fprintf_symname(node->sym, fp);
>  			}
>  
>  			if (print_dso) {
> -				printf(" (");
> -				map__fprintf_dsoname(node->map, stdout);
> -				printf(")");
> +				fprintf(fp, " (");
> +				map__fprintf_dsoname(node->map, fp);
> +				fprintf(fp, ")");
>  			}
>  
>  			if (print_srcline)
>  				map__fprintf_srcline(node->map, addr, "\n  ",
> -						     stdout);
> +						     fp);
>  
>  			if (!print_oneline)
> -				printf("\n");
> +				fprintf(fp, "\n");
>  
>  			stack_depth--;
>  next:
> @@ -2030,25 +2033,25 @@ next:
>  			return;
>  
>  		if (print_ip)
> -			printf("%16" PRIx64, sample->ip);
> +			fprintf(fp, "%16" PRIx64, sample->ip);
>  
>  		if (print_sym) {
> -			printf(" ");
> +			fprintf(fp, " ");
>  			if (print_symoffset)
>  				symbol__fprintf_symname_offs(al->sym, al,
> -							     stdout);
> +							     fp);
>  			else
> -				symbol__fprintf_symname(al->sym, stdout);
> +				symbol__fprintf_symname(al->sym, fp);
>  		}
>  
>  		if (print_dso) {
> -			printf(" (");
> -			map__fprintf_dsoname(al->map, stdout);
> -			printf(")");
> +			fprintf(fp, " (");
> +			map__fprintf_dsoname(al->map, fp);
> +			fprintf(fp, ")");
>  		}
>  
>  		if (print_srcline)
> -			map__fprintf_srcline(al->map, al->addr, "\n  ", stdout);
> +			map__fprintf_srcline(al->map, al->addr, "\n  ", fp);
>  	}
>  }
>  
> diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
> index f96fc9e8c..0ee3d9d 100644
> --- a/tools/perf/util/session.h
> +++ b/tools/perf/util/session.h
> @@ -106,7 +106,8 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
>  
>  void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample,
>  			  struct addr_location *al,
> -			  unsigned int print_opts, unsigned int stack_depth);
> +			  unsigned int print_opts, unsigned int stack_depth,
> +			  FILE *fp);
>  
>  int perf_session__cpu_bitmap(struct perf_session *session,
>  			     const char *cpu_list, unsigned long *cpu_bitmap);
> -- 
> 2.8.0

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] perf trace: Add support for printing call chains on sys_exit events.
  2016-04-08 17:57 ` Arnaldo Carvalho de Melo
@ 2016-04-08 18:18   ` Arnaldo Carvalho de Melo
  2016-04-09 11:44     ` Milian Wolff
  2016-04-09 11:38   ` Milian Wolff
  1 sibling, 1 reply; 5+ messages in thread
From: Arnaldo Carvalho de Melo @ 2016-04-08 18:18 UTC (permalink / raw)
  To: Milian Wolff
  Cc: linux-perf-users, Jiri Olsa, David Ahern, Brendan Gregg,
	Linux Kernel Mailing List, Namhyung Kim, Wang Nan

Em Fri, Apr 08, 2016 at 02:57:54PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Fri, Apr 08, 2016 at 01:34:15PM +0200, Milian Wolff escreveu:
> > Now, one can print the call chain for every encountered sys_exit
> > event, e.g.:

> > Note that it is advised to increase the number of mmap pages to
> > prevent event losses when using this new feature. Often, adding
> > `-m 10M` to the `perf trace` invocation is enough.

> > This feature is also available in strace when built with libunwind
> > via `strace -k`. Performance wise, this solution is much better:

> >     $ time find path/to/linux &> /dev/null

> >     real    0m0.051s
> >     user    0m0.013s
> >     sys     0m0.037s

> >     $ time perf trace -m 800M --call-graph dwarf find path/to/linux &>
> >     /dev/null

> >     real    0m2.624s
> >     user    0m1.203s
> >     sys     0m1.333s

> >     $ time strace -k find path/to/linux  &> /dev/null
 
> >     real    0m35.398s
> >     user    0m10.403s
> >     sys     0m23.173s

> > Note that it is currently not possible to configure the print output.
> > Adding such a feature, similar to what is available in `perf script`
> > via its `--fields` knob can be added later on.
 
> You mixed up multiple changes in one single patch, I'll break it down
> while testing, and before pushing upstream.

Expanding a bit the audience:

First test, it works, great! But do we really need that address? I guess not,
right, perhaps via some callchain parameter, to tell what we want to see? But
by default knowing the function name + DSO seems enough, no?

[root@jouet bpf]# trace -e nanosleep --call-graph dwarf usleep 1
     0.071 ( 0.071 ms): usleep/5455 nanosleep(rqtp: 0x7ffee070f080) = 0
	          2036be syscall_slow_exit_work ([kernel.kallsyms])
	          203dfb do_syscall_64 ([kernel.kallsyms])
	          9b8fe1 return_from_SYSCALL_64 ([kernel.kallsyms])
	    7f41622ec790 __nanosleep (/usr/lib64/libc-2.22.so)
	    7f416231d524 usleep (/usr/lib64/libc-2.22.so)
	    563b6c6afcab [unknown] (/usr/bin/usleep)
	    7f4162244580 __libc_start_main (/usr/lib64/libc-2.22.so)
	    563b6c6afce9 [unknown] (/usr/bin/usleep)
[root@jouet bpf]#

Yeah, you agree with that, now that I read the patch 8-):

+               /* TODO: user-configurable print_opts */
+               unsigned int print_opts = PRINT_IP_OPT_IP


Ok, removing that OPT_IP I get, oops, the alignment is beign done only on ip?

[root@jouet bpf]# trace -e nanosleep --call-graph dwarf usleep 1
     0.063 ( 0.063 ms): usleep/6132 nanosleep(rqtp: 0x7ffd1b7a8e70                                        ) = 0
 syscall_slow_exit_work ([kernel.kallsyms])
 do_syscall_64 ([kernel.kallsyms])
 return_from_SYSCALL_64 ([kernel.kallsyms])
 __nanosleep (/usr/lib64/libc-2.22.so)
 usleep (/usr/lib64/libc-2.22.so)
 [unknown] (/usr/bin/usleep)
 __libc_start_main (/usr/lib64/libc-2.22.so)
 [unknown] (/usr/bin/usleep)
[root@jouet bpf]# 

Fixing it up we get:

[root@jouet bpf]# trace -e nanosleep --call-graph dwarf usleep 1
     0.063 ( 0.063 ms): usleep/6132 nanosleep(rqtp: 0x7ffd1b7a8e70                                        ) = 0
                                    syscall_slow_exit_work ([kernel.kallsyms])
                                    do_syscall_64 ([kernel.kallsyms])
                                    return_from_SYSCALL_64 ([kernel.kallsyms])
                                    __nanosleep (/usr/lib64/libc-2.22.so)
                                    usleep (/usr/lib64/libc-2.22.so)
                                    [unknown] (/usr/bin/usleep)
                                    __libc_start_main (/usr/lib64/libc-2.22.so)
                                    [unknown] (/usr/bin/usleep)
[root@jouet bpf]# 

Better, but perhaps we should try aligning, up to a limit, the function names/DSOs?

[root@jouet bpf]# trace -e nanosleep --call-graph dwarf usleep 1
     0.063 ( 0.063 ms): usleep/6132 nanosleep(rqtp: 0x7ffd1b7a8e70                                        ) = 0
                                    syscall_slow_exit_work ([kernel.kallsyms])
                                    do_syscall_64          ([kernel.kallsyms])
                                    return_from_SYSCALL_64 ([kernel.kallsyms])
                                    __nanosleep            (/usr/lib64/libc-2.22.so)
                                    usleep                 (/usr/lib64/libc-2.22.so)
                                    [unknown]              (/usr/bin/usleep)
                                    __libc_start_main      (/usr/lib64/libc-2.22.so)
                                    [unknown]              (/usr/bin/usleep)
[root@jouet bpf]# 

wdyt?

Also, after this initial support is in, I think the next step is to
allow per syscall configs, like we have for per tracepoints, i.e. this
should be possible:

   # trace -e nanosleep(call-graph=dwarf),socket -a

And then we would get callchains just for nanosleep calls, not for
socket ones. We then need to think how to ask that efficiently to the
kernel, in this case it should be instead of using
raw_syscalls:sys_enter + tracepoint filters set via ioctl, to use
syscalls:sys_{enter,exit}_nanosleep, with callgraphs +
syscalls:sys_{enter,exit}_socket, without.

Doing it this way allows us to avoid asking callchains for a lot of
events when we want just for a few ones, to reduce overhead.

Anyway, I think I'll just break this down into multiple patches and then
we can work on these other aspects.

David, ah, his patch floated on the linux-perf-users mailing list, easy
one once the thread->priv one got out of the way (it was being used by
builtin-trace.c and the unwind code, ugh).

Thanks,

- Arnaldo

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] perf trace: Add support for printing call chains on sys_exit events.
  2016-04-08 17:57 ` Arnaldo Carvalho de Melo
  2016-04-08 18:18   ` Arnaldo Carvalho de Melo
@ 2016-04-09 11:38   ` Milian Wolff
  1 sibling, 0 replies; 5+ messages in thread
From: Milian Wolff @ 2016-04-09 11:38 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo; +Cc: linux-perf-users, Jiri Olsa

[-- Attachment #1: Type: text/plain, Size: 863 bytes --]

On Freitag, 8. April 2016 14:57:54 CEST Arnaldo Carvalho de Melo wrote:
> Em Fri, Apr 08, 2016 at 01:34:15PM +0200, Milian Wolff escreveu:
> > Now, one can print the call chain for every encountered sys_exit
> 
> > event, e.g.:
> You mixed up multiple changes in one single patch, I'll break it down
> while testing, and before pushing upstream.

Cool, thanks a lot Arnaldo. I'm also interested to see how you break it up, 
apparently you handle this differently to the projects I work on so far. I 
guess you want the "pass `FILE* fp` to `perf_evsel__print_ip`" as one chunk 
and then actually using it in a second patch? Or are there even more steps you 
can split this into?

Thanks

-- 
Milian Wolff | milian.wolff@kdab.com | Software Engineer
KDAB (Deutschland) GmbH&Co KG, a KDAB Group company
Tel: +49-30-521325470
KDAB - The Qt Experts

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5903 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] perf trace: Add support for printing call chains on sys_exit events.
  2016-04-08 18:18   ` Arnaldo Carvalho de Melo
@ 2016-04-09 11:44     ` Milian Wolff
  0 siblings, 0 replies; 5+ messages in thread
From: Milian Wolff @ 2016-04-09 11:44 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: linux-perf-users, Jiri Olsa, David Ahern, Brendan Gregg,
	Linux Kernel Mailing List, Namhyung Kim, Wang Nan

[-- Attachment #1: Type: text/plain, Size: 4966 bytes --]

On Freitag, 8. April 2016 15:18:53 CEST Arnaldo Carvalho de Melo wrote:
> Em Fri, Apr 08, 2016 at 02:57:54PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Fri, Apr 08, 2016 at 01:34:15PM +0200, Milian Wolff escreveu:
> > > Now, one can print the call chain for every encountered sys_exit
> > > event, e.g.:
> > > 
> > > Note that it is advised to increase the number of mmap pages to
> > > prevent event losses when using this new feature. Often, adding
> > > `-m 10M` to the `perf trace` invocation is enough.
> > > 
> > > This feature is also available in strace when built with libunwind
> > > 
> > > via `strace -k`. Performance wise, this solution is much better:
> > >     $ time find path/to/linux &> /dev/null
> > >     
> > >     real    0m0.051s
> > >     user    0m0.013s
> > >     sys     0m0.037s
> > >     
> > >     $ time perf trace -m 800M --call-graph dwarf find path/to/linux &>
> > >     /dev/null
> > >     
> > >     real    0m2.624s
> > >     user    0m1.203s
> > >     sys     0m1.333s
> > >     
> > >     $ time strace -k find path/to/linux  &> /dev/null
> > >     
> > >     real    0m35.398s
> > >     user    0m10.403s
> > >     sys     0m23.173s
> > > 
> > > Note that it is currently not possible to configure the print output.
> > > Adding such a feature, similar to what is available in `perf script`
> > > via its `--fields` knob can be added later on.
> > 
> > You mixed up multiple changes in one single patch, I'll break it down
> > while testing, and before pushing upstream.
> 
> Expanding a bit the audience:
> 
> First test, it works, great! But do we really need that address? I guess
> not, right, perhaps via some callchain parameter, to tell what we want to
> see? But by default knowing the function name + DSO seems enough, no?

...

> Yeah, you agree with that, now that I read the patch 8-):
> 
> +               /* TODO: user-configurable print_opts */
> +               unsigned int print_opts = PRINT_IP_OPT_IP

;-)

I even tried to make the code of `perf script` reusable for `perf trace`, but 
stopped once I realised that it currently relies on the existance of a 
`perf_session`, which does not exist when we do live tracing. It only exists 
for replaying in `builtin-trace.c`. So it involves some more refactoring which 
I did not have the time for.

<snip>

> Better, but perhaps we should try aligning, up to a limit, the function
> names/DSOs?
> 
> [root@jouet bpf]# trace -e nanosleep --call-graph dwarf usleep 1
>      0.063 ( 0.063 ms): usleep/6132 nanosleep(rqtp: 0x7ffd1b7a8e70          
>                              ) = 0 syscall_slow_exit_work
> ([kernel.kallsyms]) do_syscall_64          ([kernel.kallsyms])
> return_from_SYSCALL_64 ([kernel.kallsyms]) __nanosleep           
> (/usr/lib64/libc-2.22.so) usleep                 (/usr/lib64/libc-2.22.so)
> [unknown]              (/usr/bin/usleep) __libc_start_main     
> (/usr/lib64/libc-2.22.so) [unknown]              (/usr/bin/usleep)
> [root@jouet bpf]#
> 
> wdyt?

Yes, sounds good. Many profilers I've worked with always dump the IP, so I 
thought we should do it here as well. `perf script` e.g. does it. Could we 
maybe print the IP if the symbol is [unknown]?

> Also, after this initial support is in, I think the next step is to
> allow per syscall configs, like we have for per tracepoints, i.e. this
> should be possible:
> 
>    # trace -e nanosleep(call-graph=dwarf),socket -a
> 
> And then we would get callchains just for nanosleep calls, not for
> socket ones. We then need to think how to ask that efficiently to the
> kernel, in this case it should be instead of using
> raw_syscalls:sys_enter + tracepoint filters set via ioctl, to use
> syscalls:sys_{enter,exit}_nanosleep, with callgraphs +
> syscalls:sys_{enter,exit}_socket, without.
> 
> Doing it this way allows us to avoid asking callchains for a lot of
> events when we want just for a few ones, to reduce overhead.
 
Yep, sounds useful for some more specific use-cases. For me, this patch is 
sufficient as I'd just do:

    $ trace -e nanosleep --call-graph=dwarf ...

What I think is more important though is to make sure we only ask for 
callchains on the sys_exit events. Afaik, my patch will do it also for the 
sys_enter which is just additional cost with no benefit? So fixing that first 
is I think even more important, but I don't know how.

> Anyway, I think I'll just break this down into multiple patches and then
> we can work on these other aspects.

Yes, but note that I'll be busy and then on vacation for the next two weeks. 
I'll get back to this after wards.

> David, ah, his patch floated on the linux-perf-users mailing list, easy
> one once the thread->priv one got out of the way (it was being used by
> builtin-trace.c and the unwind code, ugh).
> 
> Thanks,

Same to you, cheers!

-- 
Milian Wolff | milian.wolff@kdab.com | Software Engineer
KDAB (Deutschland) GmbH&Co KG, a KDAB Group company
Tel: +49-30-521325470
KDAB - The Qt Experts

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5903 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-04-09 11:44 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-08 11:34 [PATCH] perf trace: Add support for printing call chains on sys_exit events Milian Wolff
2016-04-08 17:57 ` Arnaldo Carvalho de Melo
2016-04-08 18:18   ` Arnaldo Carvalho de Melo
2016-04-09 11:44     ` Milian Wolff
2016-04-09 11:38   ` Milian Wolff

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.