All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf report: Add option to collapse undesired parts of call graph
@ 2012-12-07  7:30 Greg Price
  2013-01-11  5:27 ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 22+ messages in thread
From: Greg Price @ 2012-12-07  7:30 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar,
	Arnaldo Carvalho de Melo, Jiri Olsa, David Ahern

If an application has an expensive function implemented with a large
tree of calls to helper functions, the default call-graph presentation
will be dominated by the many different call-chains within that
function.  By treating the function as a black box, we can collect the
call-chains leading into the function and compactly identify what to
blame for expensive calls.

For example, in this report the callers of garbage_collect() are
scattered across the tree:
$ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z]
    22.03%     ruby  [.] gc_mark
               --- gc_mark
                  |--59.40%-- mark_keyvalue
                  |          st_foreach
                  |          gc_mark_children
                  |          |--99.75%-- rb_gc_mark
                  |          |          rb_vm_mark
                  |          |          gc_mark_children
                  |          |          gc_marks
                  |          |          |--99.00%-- garbage_collect

If we make garbage_collect() a black box, its callers are coalesced:
$ perf report --blackbox garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z]
    72.92%     ruby  [.] garbage_collect
               --- garbage_collect
                   vm_xmalloc
                  |--47.08%-- ruby_xmalloc
                  |          st_insert2
                  |          rb_hash_aset
                  |          |--98.45%-- features_index_add
                  |          |          rb_provide_feature
                  |          |          rb_require_safe
                  |          |          vm_call_method

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Greg Price <price@mit.edu>
---
 tools/perf/builtin-report.c | 17 +++++++++++++++--
 tools/perf/builtin-top.c    |  3 +--
 tools/perf/util/map.h       |  4 +++-
 tools/perf/util/session.c   | 29 ++++++++++++++++++-----------
 tools/perf/util/session.h   |  5 +++++
 5 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index a61725d..3bbda35 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -70,7 +70,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
 	if ((sort__has_parent || symbol_conf.use_callchain)
 	    && sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -141,7 +141,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 
 	if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -607,6 +607,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
 	OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
 		    "alias for inverted call graph"),
+	OPT_STRING(0, "blackbox", &blackbox_pattern, "regex",
+		   "functions to treat as black boxes in call graphs, collapsing callees"),
 	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
 		   "only consider symbols in these dsos"),
 	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
@@ -687,6 +689,17 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	}
 
+	if (blackbox_pattern) {
+		int err = regcomp(&blackbox_regex, blackbox_pattern, REG_EXTENDED);
+		if (err) {
+			char buf[BUFSIZ];
+			regerror(err, &blackbox_regex, buf, sizeof(buf));
+			pr_err("Invalid blackbox regex: %s\n%s", blackbox_pattern, buf);
+			goto error;
+		}
+		have_blackbox = 1;
+	}
+
 	if (strcmp(report.input_name, "-") != 0)
 		setup_browser(true);
 	else {
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ff6db80..ee969b5 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -786,8 +786,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		    sample->callchain) {
 			err = machine__resolve_callchain(machine, evsel,
 							 al.thread, sample,
-							 &parent);
-
+							 &parent, NULL);
 			if (err)
 				return;
 		}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index d2250fc..6d1b8e1 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -23,6 +23,7 @@ struct ref_reloc_sym;
 struct map_groups;
 struct machine;
 struct perf_evsel;
+struct addr_location;
 
 struct map {
 	union {
@@ -163,7 +164,8 @@ int machine__resolve_callchain(struct machine *machine,
 			       struct perf_evsel *evsel,
 			       struct thread *thread,
 			       struct perf_sample *sample,
-			       struct symbol **parent);
+			       struct symbol **parent,
+			       struct addr_location *root_al);
 int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name,
 				     u64 addr);
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 8cdd232..9a8798c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -19,6 +19,10 @@
 #include "unwind.h"
 #include "vdso.h"
 
+regex_t blackbox_regex;
+const char *blackbox_pattern;
+int have_blackbox = 0;
+
 static int perf_session__open(struct perf_session *self, bool force)
 {
 	struct stat input_stat;
@@ -226,11 +230,10 @@ void machine__remove_thread(struct machine *self, struct thread *th)
 	list_add_tail(&th->node, &self->dead_threads);
 }
 
-static bool symbol__match_parent_regex(struct symbol *sym)
+static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
 {
-	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
+	if (sym->name && !regexec(regex, sym->name, 0, NULL, 0))
 		return 1;
-
 	return 0;
 }
 
@@ -295,8 +298,8 @@ struct branch_info *machine__resolve_bstack(struct machine *self,
 static int machine__resolve_callchain_sample(struct machine *machine,
 					     struct thread *thread,
 					     struct ip_callchain *chain,
-					     struct symbol **parent)
-
+					     struct symbol **parent,
+					     struct addr_location *root_al)
 {
 	u8 cpumode = PERF_RECORD_MISC_USER;
 	unsigned int i;
@@ -347,8 +350,13 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 					   MAP__FUNCTION, ip, &al, NULL);
 		if (al.sym != NULL) {
 			if (sort__has_parent && !*parent &&
-			    symbol__match_parent_regex(al.sym))
+			    symbol__match_regex(al.sym, &parent_regex))
 				*parent = al.sym;
+			else if (have_blackbox && root_al &&
+			         symbol__match_regex(al.sym, &blackbox_regex)) {
+				*root_al = al;
+				callchain_cursor_reset(&callchain_cursor);
+			}
 			if (!symbol_conf.use_callchain)
 				break;
 		}
@@ -373,15 +381,15 @@ int machine__resolve_callchain(struct machine *machine,
 			       struct perf_evsel *evsel,
 			       struct thread *thread,
 			       struct perf_sample *sample,
-			       struct symbol **parent)
-
+			       struct symbol **parent,
+			       struct addr_location *root_al)
 {
 	int ret;
 
 	callchain_cursor_reset(&callchain_cursor);
 
 	ret = machine__resolve_callchain_sample(machine, thread,
-						sample->callchain, parent);
+	                                        sample->callchain, parent, root_al);
 	if (ret)
 		return ret;
 
@@ -1603,9 +1611,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 
 	if (symbol_conf.use_callchain && sample->callchain) {
 
-
 		if (machine__resolve_callchain(machine, evsel, al.thread,
-					       sample, NULL) != 0) {
+					       sample, NULL, NULL) != 0) {
 			if (verbose)
 				error("Failed to resolve callchain. Skipping\n");
 			return;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 0eae00a..6db3e55 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -1,6 +1,7 @@
 #ifndef __PERF_SESSION_H
 #define __PERF_SESSION_H
 
+#include <regex.h>
 #include "hist.h"
 #include "event.h"
 #include "header.h"
@@ -9,6 +10,10 @@
 #include <linux/rbtree.h>
 #include <linux/perf_event.h>
 
+extern regex_t blackbox_regex;
+extern const char *blackbox_pattern;
+extern int have_blackbox;
+
 struct sample_queue;
 struct ip_callchain;
 struct thread;
-- 
1.7.11.3


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] perf report: Add option to collapse undesired parts of call graph
  2012-12-07  7:30 [PATCH] perf report: " Greg Price
@ 2013-01-11  5:27 ` Arnaldo Carvalho de Melo
  2013-02-25  4:28   ` Greg Price
  2013-06-23  3:17   ` Greg Price
  0 siblings, 2 replies; 22+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-01-11  5:27 UTC (permalink / raw)
  To: Greg Price
  Cc: linux-kernel, Peter Zijlstra, Paul Mackerras, Ingo Molnar,
	Jiri Olsa, David Ahern

Em Fri, Dec 07, 2012 at 02:30:44AM -0500, Greg Price escreveu:
> If an application has an expensive function implemented with a large
> tree of calls to helper functions, the default call-graph presentation
> will be dominated by the many different call-chains within that
> function.  By treating the function as a black box, we can collect the
> call-chains leading into the function and compactly identify what to
> blame for expensive calls.

Looks like an interesting feature, will review this soon,

- Arnaldo
 
> For example, in this report the callers of garbage_collect() are
> scattered across the tree:
> $ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z]
>     22.03%     ruby  [.] gc_mark
>                --- gc_mark
>                   |--59.40%-- mark_keyvalue
>                   |          st_foreach
>                   |          gc_mark_children
>                   |          |--99.75%-- rb_gc_mark
>                   |          |          rb_vm_mark
>                   |          |          gc_mark_children
>                   |          |          gc_marks
>                   |          |          |--99.00%-- garbage_collect
> 
> If we make garbage_collect() a black box, its callers are coalesced:
> $ perf report --blackbox garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z]
>     72.92%     ruby  [.] garbage_collect
>                --- garbage_collect
>                    vm_xmalloc
>                   |--47.08%-- ruby_xmalloc
>                   |          st_insert2
>                   |          rb_hash_aset
>                   |          |--98.45%-- features_index_add
>                   |          |          rb_provide_feature
>                   |          |          rb_require_safe
>                   |          |          vm_call_method
> 
> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
> Cc: Jiri Olsa <jolsa@redhat.com>
> Cc: David Ahern <dsahern@gmail.com>
> Signed-off-by: Greg Price <price@mit.edu>
> ---
>  tools/perf/builtin-report.c | 17 +++++++++++++++--
>  tools/perf/builtin-top.c    |  3 +--
>  tools/perf/util/map.h       |  4 +++-
>  tools/perf/util/session.c   | 29 ++++++++++++++++++-----------
>  tools/perf/util/session.h   |  5 +++++
>  5 files changed, 42 insertions(+), 16 deletions(-)
> 
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index a61725d..3bbda35 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -70,7 +70,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
>  	if ((sort__has_parent || symbol_conf.use_callchain)
>  	    && sample->callchain) {
>  		err = machine__resolve_callchain(machine, evsel, al->thread,
> -						 sample, &parent);
> +						 sample, &parent, al);
>  		if (err)
>  			return err;
>  	}
> @@ -141,7 +141,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
>  
>  	if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
>  		err = machine__resolve_callchain(machine, evsel, al->thread,
> -						 sample, &parent);
> +						 sample, &parent, al);
>  		if (err)
>  			return err;
>  	}
> @@ -607,6 +607,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
>  		     "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
>  	OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
>  		    "alias for inverted call graph"),
> +	OPT_STRING(0, "blackbox", &blackbox_pattern, "regex",
> +		   "functions to treat as black boxes in call graphs, collapsing callees"),
>  	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
>  		   "only consider symbols in these dsos"),
>  	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
> @@ -687,6 +689,17 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
>  
>  	}
>  
> +	if (blackbox_pattern) {
> +		int err = regcomp(&blackbox_regex, blackbox_pattern, REG_EXTENDED);
> +		if (err) {
> +			char buf[BUFSIZ];
> +			regerror(err, &blackbox_regex, buf, sizeof(buf));
> +			pr_err("Invalid blackbox regex: %s\n%s", blackbox_pattern, buf);
> +			goto error;
> +		}
> +		have_blackbox = 1;
> +	}
> +
>  	if (strcmp(report.input_name, "-") != 0)
>  		setup_browser(true);
>  	else {
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index ff6db80..ee969b5 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -786,8 +786,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
>  		    sample->callchain) {
>  			err = machine__resolve_callchain(machine, evsel,
>  							 al.thread, sample,
> -							 &parent);
> -
> +							 &parent, NULL);
>  			if (err)
>  				return;
>  		}
> diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
> index d2250fc..6d1b8e1 100644
> --- a/tools/perf/util/map.h
> +++ b/tools/perf/util/map.h
> @@ -23,6 +23,7 @@ struct ref_reloc_sym;
>  struct map_groups;
>  struct machine;
>  struct perf_evsel;
> +struct addr_location;
>  
>  struct map {
>  	union {
> @@ -163,7 +164,8 @@ int machine__resolve_callchain(struct machine *machine,
>  			       struct perf_evsel *evsel,
>  			       struct thread *thread,
>  			       struct perf_sample *sample,
> -			       struct symbol **parent);
> +			       struct symbol **parent,
> +			       struct addr_location *root_al);
>  int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name,
>  				     u64 addr);
>  
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index 8cdd232..9a8798c 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -19,6 +19,10 @@
>  #include "unwind.h"
>  #include "vdso.h"
>  
> +regex_t blackbox_regex;
> +const char *blackbox_pattern;
> +int have_blackbox = 0;
> +
>  static int perf_session__open(struct perf_session *self, bool force)
>  {
>  	struct stat input_stat;
> @@ -226,11 +230,10 @@ void machine__remove_thread(struct machine *self, struct thread *th)
>  	list_add_tail(&th->node, &self->dead_threads);
>  }
>  
> -static bool symbol__match_parent_regex(struct symbol *sym)
> +static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
>  {
> -	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
> +	if (sym->name && !regexec(regex, sym->name, 0, NULL, 0))
>  		return 1;
> -
>  	return 0;
>  }
>  
> @@ -295,8 +298,8 @@ struct branch_info *machine__resolve_bstack(struct machine *self,
>  static int machine__resolve_callchain_sample(struct machine *machine,
>  					     struct thread *thread,
>  					     struct ip_callchain *chain,
> -					     struct symbol **parent)
> -
> +					     struct symbol **parent,
> +					     struct addr_location *root_al)
>  {
>  	u8 cpumode = PERF_RECORD_MISC_USER;
>  	unsigned int i;
> @@ -347,8 +350,13 @@ static int machine__resolve_callchain_sample(struct machine *machine,
>  					   MAP__FUNCTION, ip, &al, NULL);
>  		if (al.sym != NULL) {
>  			if (sort__has_parent && !*parent &&
> -			    symbol__match_parent_regex(al.sym))
> +			    symbol__match_regex(al.sym, &parent_regex))
>  				*parent = al.sym;
> +			else if (have_blackbox && root_al &&
> +			         symbol__match_regex(al.sym, &blackbox_regex)) {
> +				*root_al = al;
> +				callchain_cursor_reset(&callchain_cursor);
> +			}
>  			if (!symbol_conf.use_callchain)
>  				break;
>  		}
> @@ -373,15 +381,15 @@ int machine__resolve_callchain(struct machine *machine,
>  			       struct perf_evsel *evsel,
>  			       struct thread *thread,
>  			       struct perf_sample *sample,
> -			       struct symbol **parent)
> -
> +			       struct symbol **parent,
> +			       struct addr_location *root_al)
>  {
>  	int ret;
>  
>  	callchain_cursor_reset(&callchain_cursor);
>  
>  	ret = machine__resolve_callchain_sample(machine, thread,
> -						sample->callchain, parent);
> +	                                        sample->callchain, parent, root_al);
>  	if (ret)
>  		return ret;
>  
> @@ -1603,9 +1611,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
>  
>  	if (symbol_conf.use_callchain && sample->callchain) {
>  
> -
>  		if (machine__resolve_callchain(machine, evsel, al.thread,
> -					       sample, NULL) != 0) {
> +					       sample, NULL, NULL) != 0) {
>  			if (verbose)
>  				error("Failed to resolve callchain. Skipping\n");
>  			return;
> diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
> index 0eae00a..6db3e55 100644
> --- a/tools/perf/util/session.h
> +++ b/tools/perf/util/session.h
> @@ -1,6 +1,7 @@
>  #ifndef __PERF_SESSION_H
>  #define __PERF_SESSION_H
>  
> +#include <regex.h>
>  #include "hist.h"
>  #include "event.h"
>  #include "header.h"
> @@ -9,6 +10,10 @@
>  #include <linux/rbtree.h>
>  #include <linux/perf_event.h>
>  
> +extern regex_t blackbox_regex;
> +extern const char *blackbox_pattern;
> +extern int have_blackbox;
> +
>  struct sample_queue;
>  struct ip_callchain;
>  struct thread;
> -- 
> 1.7.11.3

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] perf report: Add option to collapse undesired parts of call graph
  2013-01-11  5:27 ` Arnaldo Carvalho de Melo
@ 2013-02-25  4:28   ` Greg Price
  2013-06-23  3:17   ` Greg Price
  1 sibling, 0 replies; 22+ messages in thread
From: Greg Price @ 2013-02-25  4:28 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: linux-kernel, Peter Zijlstra, Paul Mackerras, Ingo Molnar,
	Jiri Olsa, David Ahern

On Fri, Jan 11, 2013 at 02:27:36AM -0300, Arnaldo Carvalho de Melo wrote:
> Em Fri, Dec 07, 2012 at 02:30:44AM -0500, Greg Price escreveu:
> > If an application has an expensive function implemented with a large
> > tree of calls to helper functions, the default call-graph presentation
> > will be dominated by the many different call-chains within that
> > function.  By treating the function as a black box, we can collect the
> > call-chains leading into the function and compactly identify what to
> > blame for expensive calls.
> 
> Looks like an interesting feature, will review this soon,

Hi Arnaldo,

Have you had a chance to look at this yet?

Cheers,
Greg





> > For example, in this report the callers of garbage_collect() are
> > scattered across the tree:
> > $ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z]
> >     22.03%     ruby  [.] gc_mark
> >                --- gc_mark
> >                   |--59.40%-- mark_keyvalue
> >                   |          st_foreach
> >                   |          gc_mark_children
> >                   |          |--99.75%-- rb_gc_mark
> >                   |          |          rb_vm_mark
> >                   |          |          gc_mark_children
> >                   |          |          gc_marks
> >                   |          |          |--99.00%-- garbage_collect
> > 
> > If we make garbage_collect() a black box, its callers are coalesced:
> > $ perf report --blackbox garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z]
> >     72.92%     ruby  [.] garbage_collect
> >                --- garbage_collect
> >                    vm_xmalloc
> >                   |--47.08%-- ruby_xmalloc
> >                   |          st_insert2
> >                   |          rb_hash_aset
> >                   |          |--98.45%-- features_index_add
> >                   |          |          rb_provide_feature
> >                   |          |          rb_require_safe
> >                   |          |          vm_call_method
> > 
> > Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> > Cc: Paul Mackerras <paulus@samba.org>
> > Cc: Ingo Molnar <mingo@redhat.com>
> > Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
> > Cc: Jiri Olsa <jolsa@redhat.com>
> > Cc: David Ahern <dsahern@gmail.com>
> > Signed-off-by: Greg Price <price@mit.edu>
> > ---
> >  tools/perf/builtin-report.c | 17 +++++++++++++++--
> >  tools/perf/builtin-top.c    |  3 +--
> >  tools/perf/util/map.h       |  4 +++-
> >  tools/perf/util/session.c   | 29 ++++++++++++++++++-----------
> >  tools/perf/util/session.h   |  5 +++++
> >  5 files changed, 42 insertions(+), 16 deletions(-)
> > 
> > diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> > index a61725d..3bbda35 100644
> > --- a/tools/perf/builtin-report.c
> > +++ b/tools/perf/builtin-report.c
> > @@ -70,7 +70,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
> >  	if ((sort__has_parent || symbol_conf.use_callchain)
> >  	    && sample->callchain) {
> >  		err = machine__resolve_callchain(machine, evsel, al->thread,
> > -						 sample, &parent);
> > +						 sample, &parent, al);
> >  		if (err)
> >  			return err;
> >  	}
> > @@ -141,7 +141,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
> >  
> >  	if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
> >  		err = machine__resolve_callchain(machine, evsel, al->thread,
> > -						 sample, &parent);
> > +						 sample, &parent, al);
> >  		if (err)
> >  			return err;
> >  	}
> > @@ -607,6 +607,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
> >  		     "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
> >  	OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
> >  		    "alias for inverted call graph"),
> > +	OPT_STRING(0, "blackbox", &blackbox_pattern, "regex",
> > +		   "functions to treat as black boxes in call graphs, collapsing callees"),
> >  	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
> >  		   "only consider symbols in these dsos"),
> >  	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
> > @@ -687,6 +689,17 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
> >  
> >  	}
> >  
> > +	if (blackbox_pattern) {
> > +		int err = regcomp(&blackbox_regex, blackbox_pattern, REG_EXTENDED);
> > +		if (err) {
> > +			char buf[BUFSIZ];
> > +			regerror(err, &blackbox_regex, buf, sizeof(buf));
> > +			pr_err("Invalid blackbox regex: %s\n%s", blackbox_pattern, buf);
> > +			goto error;
> > +		}
> > +		have_blackbox = 1;
> > +	}
> > +
> >  	if (strcmp(report.input_name, "-") != 0)
> >  		setup_browser(true);
> >  	else {
> > diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> > index ff6db80..ee969b5 100644
> > --- a/tools/perf/builtin-top.c
> > +++ b/tools/perf/builtin-top.c
> > @@ -786,8 +786,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
> >  		    sample->callchain) {
> >  			err = machine__resolve_callchain(machine, evsel,
> >  							 al.thread, sample,
> > -							 &parent);
> > -
> > +							 &parent, NULL);
> >  			if (err)
> >  				return;
> >  		}
> > diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
> > index d2250fc..6d1b8e1 100644
> > --- a/tools/perf/util/map.h
> > +++ b/tools/perf/util/map.h
> > @@ -23,6 +23,7 @@ struct ref_reloc_sym;
> >  struct map_groups;
> >  struct machine;
> >  struct perf_evsel;
> > +struct addr_location;
> >  
> >  struct map {
> >  	union {
> > @@ -163,7 +164,8 @@ int machine__resolve_callchain(struct machine *machine,
> >  			       struct perf_evsel *evsel,
> >  			       struct thread *thread,
> >  			       struct perf_sample *sample,
> > -			       struct symbol **parent);
> > +			       struct symbol **parent,
> > +			       struct addr_location *root_al);
> >  int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name,
> >  				     u64 addr);
> >  
> > diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> > index 8cdd232..9a8798c 100644
> > --- a/tools/perf/util/session.c
> > +++ b/tools/perf/util/session.c
> > @@ -19,6 +19,10 @@
> >  #include "unwind.h"
> >  #include "vdso.h"
> >  
> > +regex_t blackbox_regex;
> > +const char *blackbox_pattern;
> > +int have_blackbox = 0;
> > +
> >  static int perf_session__open(struct perf_session *self, bool force)
> >  {
> >  	struct stat input_stat;
> > @@ -226,11 +230,10 @@ void machine__remove_thread(struct machine *self, struct thread *th)
> >  	list_add_tail(&th->node, &self->dead_threads);
> >  }
> >  
> > -static bool symbol__match_parent_regex(struct symbol *sym)
> > +static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
> >  {
> > -	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
> > +	if (sym->name && !regexec(regex, sym->name, 0, NULL, 0))
> >  		return 1;
> > -
> >  	return 0;
> >  }
> >  
> > @@ -295,8 +298,8 @@ struct branch_info *machine__resolve_bstack(struct machine *self,
> >  static int machine__resolve_callchain_sample(struct machine *machine,
> >  					     struct thread *thread,
> >  					     struct ip_callchain *chain,
> > -					     struct symbol **parent)
> > -
> > +					     struct symbol **parent,
> > +					     struct addr_location *root_al)
> >  {
> >  	u8 cpumode = PERF_RECORD_MISC_USER;
> >  	unsigned int i;
> > @@ -347,8 +350,13 @@ static int machine__resolve_callchain_sample(struct machine *machine,
> >  					   MAP__FUNCTION, ip, &al, NULL);
> >  		if (al.sym != NULL) {
> >  			if (sort__has_parent && !*parent &&
> > -			    symbol__match_parent_regex(al.sym))
> > +			    symbol__match_regex(al.sym, &parent_regex))
> >  				*parent = al.sym;
> > +			else if (have_blackbox && root_al &&
> > +			         symbol__match_regex(al.sym, &blackbox_regex)) {
> > +				*root_al = al;
> > +				callchain_cursor_reset(&callchain_cursor);
> > +			}
> >  			if (!symbol_conf.use_callchain)
> >  				break;
> >  		}
> > @@ -373,15 +381,15 @@ int machine__resolve_callchain(struct machine *machine,
> >  			       struct perf_evsel *evsel,
> >  			       struct thread *thread,
> >  			       struct perf_sample *sample,
> > -			       struct symbol **parent)
> > -
> > +			       struct symbol **parent,
> > +			       struct addr_location *root_al)
> >  {
> >  	int ret;
> >  
> >  	callchain_cursor_reset(&callchain_cursor);
> >  
> >  	ret = machine__resolve_callchain_sample(machine, thread,
> > -						sample->callchain, parent);
> > +	                                        sample->callchain, parent, root_al);
> >  	if (ret)
> >  		return ret;
> >  
> > @@ -1603,9 +1611,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
> >  
> >  	if (symbol_conf.use_callchain && sample->callchain) {
> >  
> > -
> >  		if (machine__resolve_callchain(machine, evsel, al.thread,
> > -					       sample, NULL) != 0) {
> > +					       sample, NULL, NULL) != 0) {
> >  			if (verbose)
> >  				error("Failed to resolve callchain. Skipping\n");
> >  			return;
> > diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
> > index 0eae00a..6db3e55 100644
> > --- a/tools/perf/util/session.h
> > +++ b/tools/perf/util/session.h
> > @@ -1,6 +1,7 @@
> >  #ifndef __PERF_SESSION_H
> >  #define __PERF_SESSION_H
> >  
> > +#include <regex.h>
> >  #include "hist.h"
> >  #include "event.h"
> >  #include "header.h"
> > @@ -9,6 +10,10 @@
> >  #include <linux/rbtree.h>
> >  #include <linux/perf_event.h>
> >  
> > +extern regex_t blackbox_regex;
> > +extern const char *blackbox_pattern;
> > +extern int have_blackbox;
> > +
> >  struct sample_queue;
> >  struct ip_callchain;
> >  struct thread;
> > -- 
> > 1.7.11.3

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] perf report: Add option to collapse undesired parts of call graph
  2013-01-11  5:27 ` Arnaldo Carvalho de Melo
  2013-02-25  4:28   ` Greg Price
@ 2013-06-23  3:17   ` Greg Price
  2013-06-23 21:53     ` Jiri Olsa
  2013-06-26  1:28     ` Namhyung Kim
  1 sibling, 2 replies; 22+ messages in thread
From: Greg Price @ 2013-06-23  3:17 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: linux-kernel, Peter Zijlstra, Paul Mackerras, Ingo Molnar,
	Jiri Olsa, David Ahern

For example, in an application with an expensive function
implemented with deeply nested recursive calls, the default
call-graph presentation is dominated by the different callchains
within that function.  By treating the function as a black box,
we can collect the callchains leading into the function and
compactly identify what to blame for expensive calls.

For example, in this report the callers of garbage_collect() are
scattered across the tree:
$ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z]
    22.03%     ruby  [.] gc_mark
               --- gc_mark
                  |--59.40%-- mark_keyvalue
                  |          st_foreach
                  |          gc_mark_children
                  |          |--99.75%-- rb_gc_mark
                  |          |          rb_vm_mark
                  |          |          gc_mark_children
                  |          |          gc_marks
                  |          |          |--99.00%-- garbage_collect

If we make garbage_collect() a black box, its callers are coalesced:
$ perf report --blackbox garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z]
    72.92%     ruby  [.] garbage_collect
               --- garbage_collect
                   vm_xmalloc
                  |--47.08%-- ruby_xmalloc
                  |          st_insert2
                  |          rb_hash_aset
                  |          |--98.45%-- features_index_add
                  |          |          rb_provide_feature
                  |          |          rb_require_safe
                  |          |          vm_call_method

Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Greg Price <price@mit.edu>
---

On Fri, Jan 11, 2013 at 02:27:36AM -0300, Arnaldo Carvalho de Melo wrote:
> Looks like an interesting feature, will review this soon,

Rebased on top of v3.10-rc7, please take a look at your convenience.


 tools/perf/builtin-report.c | 19 ++++++++++++++++---
 tools/perf/builtin-top.c    |  3 +--
 tools/perf/util/machine.c   | 26 +++++++++++++++++---------
 tools/perf/util/machine.h   |  9 ++++++++-
 tools/perf/util/session.c   |  3 +--
 5 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index bd0ca81..bf0d860 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -83,7 +83,7 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
 	if ((sort__has_parent || symbol_conf.use_callchain) &&
 	    sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -174,7 +174,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
 	if ((sort__has_parent || symbol_conf.use_callchain)
 	    && sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -245,7 +245,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 
 	if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -764,6 +764,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
 	OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
 		    "alias for inverted call graph"),
+	OPT_STRING(0, "blackbox", &blackbox_pattern, "regex",
+		   "functions to treat as black boxes in call graphs, collapsing callees"),
 	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
 		   "only consider symbols in these dsos"),
 	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
@@ -918,6 +920,17 @@ repeat:
 	} else
 		symbol_conf.exclude_other = false;
 
+	if (blackbox_pattern) {
+		int err = regcomp(&blackbox_regex, blackbox_pattern, REG_EXTENDED);
+		if (err) {
+			char buf[BUFSIZ];
+			regerror(err, &blackbox_regex, buf, sizeof(buf));
+			pr_err("Invalid blackbox regex: %s\n%s", blackbox_pattern, buf);
+			goto error;
+		}
+		have_blackbox = 1;
+	}
+
 	if (argc) {
 		/*
 		 * Special case: if there's an argument left then assume that
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 67bdb9f..abec83d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -775,8 +775,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		    sample->callchain) {
 			err = machine__resolve_callchain(machine, evsel,
 							 al.thread, sample,
-							 &parent);
-
+							 &parent, NULL);
 			if (err)
 				return;
 		}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b2ecad6..a14489c 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -11,6 +11,10 @@
 #include <stdbool.h>
 #include "unwind.h"
 
+regex_t blackbox_regex;
+const char *blackbox_pattern;
+int have_blackbox = 0;
+
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 {
 	map_groups__init(&machine->kmaps);
@@ -1058,11 +1062,10 @@ int machine__process_event(struct machine *machine, union perf_event *event)
 	return ret;
 }
 
-static bool symbol__match_parent_regex(struct symbol *sym)
+static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
 {
-	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
+	if (sym->name && !regexec(regex, sym->name, 0, NULL, 0))
 		return 1;
-
 	return 0;
 }
 
@@ -1159,8 +1162,8 @@ struct branch_info *machine__resolve_bstack(struct machine *machine,
 static int machine__resolve_callchain_sample(struct machine *machine,
 					     struct thread *thread,
 					     struct ip_callchain *chain,
-					     struct symbol **parent)
-
+					     struct symbol **parent,
+					     struct addr_location *root_al)
 {
 	u8 cpumode = PERF_RECORD_MISC_USER;
 	unsigned int i;
@@ -1211,8 +1214,13 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 					   MAP__FUNCTION, ip, &al, NULL);
 		if (al.sym != NULL) {
 			if (sort__has_parent && !*parent &&
-			    symbol__match_parent_regex(al.sym))
+			    symbol__match_regex(al.sym, &parent_regex))
 				*parent = al.sym;
+			else if (have_blackbox && root_al &&
+			         symbol__match_regex(al.sym, &blackbox_regex)) {
+				*root_al = al;
+				callchain_cursor_reset(&callchain_cursor);
+			}
 			if (!symbol_conf.use_callchain)
 				break;
 		}
@@ -1237,15 +1245,15 @@ int machine__resolve_callchain(struct machine *machine,
 			       struct perf_evsel *evsel,
 			       struct thread *thread,
 			       struct perf_sample *sample,
-			       struct symbol **parent)
-
+			       struct symbol **parent,
+			       struct addr_location *root_al)
 {
 	int ret;
 
 	callchain_cursor_reset(&callchain_cursor);
 
 	ret = machine__resolve_callchain_sample(machine, thread,
-						sample->callchain, parent);
+	                                        sample->callchain, parent, root_al);
 	if (ret)
 		return ret;
 
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 7794068..6f0310a 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -3,8 +3,14 @@
 
 #include <sys/types.h>
 #include <linux/rbtree.h>
+#include <regex.h>
 #include "map.h"
 
+extern regex_t blackbox_regex;
+extern const char *blackbox_pattern;
+extern int have_blackbox;
+
+struct addr_location;
 struct branch_stack;
 struct perf_evsel;
 struct perf_sample;
@@ -83,7 +89,8 @@ int machine__resolve_callchain(struct machine *machine,
 			       struct perf_evsel *evsel,
 			       struct thread *thread,
 			       struct perf_sample *sample,
-			       struct symbol **parent);
+			       struct symbol **parent,
+			       struct addr_location *root_al);
 
 /*
  * Default guest kernel is defined by parameter --guestkallsyms
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index cf1fe01..7024950 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1397,9 +1397,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 
 	if (symbol_conf.use_callchain && sample->callchain) {
 
-
 		if (machine__resolve_callchain(machine, evsel, al.thread,
-					       sample, NULL) != 0) {
+					       sample, NULL, NULL) != 0) {
 			if (verbose)
 				error("Failed to resolve callchain. Skipping\n");
 			return;
-- 
1.8.2

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] perf report: Add option to collapse undesired parts of call graph
  2013-06-23  3:17   ` Greg Price
@ 2013-06-23 21:53     ` Jiri Olsa
  2013-06-24  8:32       ` Ingo Molnar
  2013-06-24 22:50       ` Greg Price
  2013-06-26  1:28     ` Namhyung Kim
  1 sibling, 2 replies; 22+ messages in thread
From: Jiri Olsa @ 2013-06-23 21:53 UTC (permalink / raw)
  To: Greg Price
  Cc: Arnaldo Carvalho de Melo, linux-kernel, Peter Zijlstra,
	Paul Mackerras, Ingo Molnar, David Ahern

On Sat, Jun 22, 2013 at 11:17:20PM -0400, Greg Price wrote:
> For example, in an application with an expensive function
> implemented with deeply nested recursive calls, the default
> call-graph presentation is dominated by the different callchains
> within that function.  By treating the function as a black box,
> we can collect the callchains leading into the function and
> compactly identify what to blame for expensive calls.
> 
> For example, in this report the callers of garbage_collect() are
> scattered across the tree:
> $ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z]
>     22.03%     ruby  [.] gc_mark
>                --- gc_mark
>                   |--59.40%-- mark_keyvalue
>                   |          st_foreach
>                   |          gc_mark_children
>                   |          |--99.75%-- rb_gc_mark
>                   |          |          rb_vm_mark
>                   |          |          gc_mark_children
>                   |          |          gc_marks
>                   |          |          |--99.00%-- garbage_collect
> 
> If we make garbage_collect() a black box, its callers are coalesced:
> $ perf report --blackbox garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z]
>     72.92%     ruby  [.] garbage_collect
>                --- garbage_collect
>                    vm_xmalloc
>                   |--47.08%-- ruby_xmalloc
>                   |          st_insert2
>                   |          rb_hash_aset
>                   |          |--98.45%-- features_index_add
>                   |          |          rb_provide_feature
>                   |          |          rb_require_safe
>                   |          |          vm_call_method

Seems useful, sort of oposite to parent option (-p)

few comments below

SNIP

>  		 * Special case: if there's an argument left then assume that
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index 67bdb9f..abec83d 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -775,8 +775,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
>  		    sample->callchain) {
>  			err = machine__resolve_callchain(machine, evsel,
>  							 al.thread, sample,
> -							 &parent);
> -
> +							 &parent, NULL);
>  			if (err)
>  				return;
>  		}

Any reason why not add this for top?

> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> index b2ecad6..a14489c 100644
> --- a/tools/perf/util/machine.c
> +++ b/tools/perf/util/machine.c
> @@ -11,6 +11,10 @@
>  #include <stdbool.h>
>  #include "unwind.h"
>  
> +regex_t blackbox_regex;
> +const char *blackbox_pattern;
> +int have_blackbox = 0;

util/sort.c mich be better place for this

It could also make sense to allow sorting on this
the same way as we do for '-s parent' and report only
'[other]' and 'blackbox' entries.

Also I dont like the 'blackbox' option name, it should
complement the parent option somehow.. but no idea ;-)

I tested this on separate example and numbers seem ok.

jirka

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] perf report: Add option to collapse undesired parts of call graph
  2013-06-23 21:53     ` Jiri Olsa
@ 2013-06-24  8:32       ` Ingo Molnar
  2013-06-24 23:14         ` Greg Price
  2013-06-24 22:50       ` Greg Price
  1 sibling, 1 reply; 22+ messages in thread
From: Ingo Molnar @ 2013-06-24  8:32 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Greg Price, Arnaldo Carvalho de Melo, linux-kernel,
	Peter Zijlstra, Paul Mackerras, Ingo Molnar, David Ahern,
	Pekka Enberg, Namhyung Kim, Namhyung Kim


* Jiri Olsa <jolsa@redhat.com> wrote:

> It could also make sense to allow sorting on this
> the same way as we do for '-s parent' and report only
> '[other]' and 'blackbox' entries.
> 
> Also I dont like the 'blackbox' option name, it should
> complement the parent option somehow.. but no idea ;-)

Looks like a nice feature.

Maybe calling it '--collapse' would be a better name?

By default the call-graphs are all expanded to maximum. With this option 
certain function(s) and all their child chains can be collapsed.

--parent filters the call-chains, excluding all others that don't include 
this parent. It might make sense to rename it to --filter?

It would also be nice if all these visualization variants were available 
in the GTK front-end.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] perf report: Add option to collapse undesired parts of call graph
  2013-06-23 21:53     ` Jiri Olsa
  2013-06-24  8:32       ` Ingo Molnar
@ 2013-06-24 22:50       ` Greg Price
  1 sibling, 0 replies; 22+ messages in thread
From: Greg Price @ 2013-06-24 22:50 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Arnaldo Carvalho de Melo, linux-kernel, Peter Zijlstra,
	Paul Mackerras, Ingo Molnar, David Ahern

On Sun, Jun 23, 2013 at 11:53:27PM +0200, Jiri Olsa wrote:
> Seems useful, sort of oposite to parent option (-p)

Cool, good to hear.


> Any reason why not add this for top?

Only because I didn't think about it. :)  Seems like a good idea; I'll
add that.


> > diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> > index b2ecad6..a14489c 100644
> > --- a/tools/perf/util/machine.c
> > +++ b/tools/perf/util/machine.c
> > @@ -11,6 +11,10 @@
> >  #include <stdbool.h>
> >  #include "unwind.h"
> >  
> > +regex_t blackbox_regex;
> > +const char *blackbox_pattern;
> > +int have_blackbox = 0;
> 
> util/sort.c mich be better place for this

Sure, happy to put it there.


> It could also make sense to allow sorting on this
> the same way as we do for '-s parent' and report only
> '[other]' and 'blackbox' entries.

Could be.  My inclination would be to wait for someone to show up
wanting to use that functionality, because it's not yet obvious to me
why it will be useful.


> Also I dont like the 'blackbox' option name, it should
> complement the parent option somehow.. but no idea ;-)

OK, I'll think of some alternative possible names.


Thanks for the review!

Cheers,
Greg

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] perf report: Add option to collapse undesired parts of call graph
  2013-06-24  8:32       ` Ingo Molnar
@ 2013-06-24 23:14         ` Greg Price
  2013-06-25  7:47           ` Ingo Molnar
  0 siblings, 1 reply; 22+ messages in thread
From: Greg Price @ 2013-06-24 23:14 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jiri Olsa, Arnaldo Carvalho de Melo, linux-kernel,
	Peter Zijlstra, Paul Mackerras, Ingo Molnar, David Ahern,
	Pekka Enberg, Namhyung Kim, Namhyung Kim

On Mon, Jun 24, 2013 at 10:32:53AM +0200, Ingo Molnar wrote:
> * Jiri Olsa <jolsa@redhat.com> wrote:
> > It could also make sense to allow sorting on this
> > the same way as we do for '-s parent' and report only
> > '[other]' and 'blackbox' entries.
> > 
> > Also I dont like the 'blackbox' option name, it should
> > complement the parent option somehow.. but no idea ;-)
> 
> Looks like a nice feature.

Thanks, good to hear!


> Maybe calling it '--collapse' would be a better name?
> 
> By default the call-graphs are all expanded to maximum. With this option 
> certain function(s) and all their child chains can be collapsed.

Maybe.  But what this does is actually a bit of an inverse of the
collapsing one does in the front-end (with 'C' or with enter, etc.);
unless we have --inverted/-G, the collapsing one can do interactively
is of the various *callers*, recursively, of a function, whereas this
collapses the recursive *callees*.  Or in other words, interactively
one may collapse or expand a subtree of the display; with this option
one collects together and merges all the subtrees that have a given
symbol name at their respective roots.

So this is doing something that can't be achieved by just doing some
grunt work in the front-end as a user, and it might be confusing to
give it the same name as the interactive thing.

If you're investigating the kind of question that --inverted/-G is
good for answering, then this is the same thing one can do with that
grunt work, but in that case the option is less interesting. :)  The
example in my commit message, which comes from a real use case, is
without -G.

Perhaps --coalesce, but that's a little mysterious.  --trim-callees?
--ignore-callees?  --ignore-inside?  --strip-callees?  --trim-inside?


> --parent filters the call-chains, excluding all others that don't include 
> this parent. It might make sense to rename it to --filter?

Sure, maybe so.  I don't have a firm opinion on the name or exact
semantics of --parent.


> It would also be nice if all these visualization variants were available 
> in the GTK front-end.

TBH I'm not really familiar with the GTK front-end, as I mainly use
the TUI.  At a quick trial, it looks like --blackbox has the expected
effect on the display there; though with or without --blackbox I can't
seem to get the entries to expand to show me a call-graph profile, so
it's hard to demonstrate it fully.  Not sure what I may have done
wrong in building or running perf to make that not work (or is that
expected?)

What changes do you have in mind to make these available in the GTK
front-end?


Cheers,
Greg

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] perf report: Add option to collapse undesired parts of call graph
  2013-06-24 23:14         ` Greg Price
@ 2013-06-25  7:47           ` Ingo Molnar
  2013-06-25  8:01             ` Namhyung Kim
  0 siblings, 1 reply; 22+ messages in thread
From: Ingo Molnar @ 2013-06-25  7:47 UTC (permalink / raw)
  To: Greg Price
  Cc: Jiri Olsa, Arnaldo Carvalho de Melo, linux-kernel,
	Peter Zijlstra, Paul Mackerras, Ingo Molnar, David Ahern,
	Pekka Enberg, Namhyung Kim, Namhyung Kim


* Greg Price <price@MIT.EDU> wrote:

> > It would also be nice if all these visualization variants were available 
> > in the GTK front-end.
> 
> TBH I'm not really familiar with the GTK front-end, as I mainly use
> the TUI.  At a quick trial, it looks like --blackbox has the expected
> effect on the display there; though with or without --blackbox I can't
> seem to get the entries to expand to show me a call-graph profile, so
> it's hard to demonstrate it fully.  Not sure what I may have done
> wrong in building or running perf to make that not work (or is that
> expected?)
> 
> What changes do you have in mind to make these available in the GTK
> front-end?

I was thinking of something obvious like right-clicking it to make that 
function back boxed away or so? Have no firm ideas - maybe the GTK gents 
on Cc: know how to best integrate such features.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] perf report: Add option to collapse undesired parts of call graph
  2013-06-25  7:47           ` Ingo Molnar
@ 2013-06-25  8:01             ` Namhyung Kim
  2013-06-26 22:41               ` Greg Price
  0 siblings, 1 reply; 22+ messages in thread
From: Namhyung Kim @ 2013-06-25  8:01 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Greg Price, Jiri Olsa, Arnaldo Carvalho de Melo, linux-kernel,
	Peter Zijlstra, Paul Mackerras, Ingo Molnar, David Ahern,
	Pekka Enberg, Namhyung Kim

Hi,

2013-06-25 PM 4:47, Ingo Molnar wrote:
>
> * Greg Price <price@MIT.EDU> wrote:
>
>>> It would also be nice if all these visualization variants were available
>>> in the GTK front-end.
>>
>> TBH I'm not really familiar with the GTK front-end, as I mainly use
>> the TUI.  At a quick trial, it looks like --blackbox has the expected
>> effect on the display there; though with or without --blackbox I can't
>> seem to get the entries to expand to show me a call-graph profile, so
>> it's hard to demonstrate it fully.  Not sure what I may have done
>> wrong in building or running perf to make that not work (or is that
>> expected?)

Currently perf GTK front-end does not support callchains at all. 
There's a floating patchset to enable it though.

https://lkml.org/lkml/2013/6/4/181

>>
>> What changes do you have in mind to make these available in the GTK
>> front-end?
>
> I was thinking of something obvious like right-clicking it to make that
> function back boxed away or so? Have no firm ideas - maybe the GTK gents
> on Cc: know how to best integrate such features.

We don't support context menu yet.  Frankly, perf GTK code is still 
premature and needs some love.  Any contribution should be welcomed!

Thanks,
Namhyung


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] perf report: Add option to collapse undesired parts of call graph
  2013-06-23  3:17   ` Greg Price
  2013-06-23 21:53     ` Jiri Olsa
@ 2013-06-26  1:28     ` Namhyung Kim
  2013-06-26 22:25       ` Greg Price
  1 sibling, 1 reply; 22+ messages in thread
From: Namhyung Kim @ 2013-06-26  1:28 UTC (permalink / raw)
  To: Greg Price
  Cc: Arnaldo Carvalho de Melo, linux-kernel, Peter Zijlstra,
	Paul Mackerras, Ingo Molnar, Jiri Olsa, David Ahern

Hi Greg,

On Sat, 22 Jun 2013 23:17:20 -0400, Greg Price wrote:
> For example, in an application with an expensive function
> implemented with deeply nested recursive calls, the default
> call-graph presentation is dominated by the different callchains
> within that function.  By treating the function as a black box,
> we can collect the callchains leading into the function and
> compactly identify what to blame for expensive calls.
>
> For example, in this report the callers of garbage_collect() are

s/callers/callees/ ?

And it'd be better it shows more lines after garbage_collect so that one
can see its callers also to understand what it does more clearly.


> scattered across the tree:
> $ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z]
>     22.03%     ruby  [.] gc_mark
>                --- gc_mark
>                   |--59.40%-- mark_keyvalue
>                   |          st_foreach
>                   |          gc_mark_children
>                   |          |--99.75%-- rb_gc_mark
>                   |          |          rb_vm_mark
>                   |          |          gc_mark_children
>                   |          |          gc_marks
>                   |          |          |--99.00%-- garbage_collect
>
> If we make garbage_collect() a black box, its callers are coalesced:

Again, s/callers/callees/ ?


> $ perf report --blackbox garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z]
>     72.92%     ruby  [.] garbage_collect
>                --- garbage_collect
>                    vm_xmalloc
>                   |--47.08%-- ruby_xmalloc
>                   |          st_insert2
>                   |          rb_hash_aset
>                   |          |--98.45%-- features_index_add
>                   |          |          rb_provide_feature
>                   |          |          rb_require_safe
>                   |          |          vm_call_method
>
> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Jiri Olsa <jolsa@redhat.com>
> Cc: David Ahern <dsahern@gmail.com>
> Signed-off-by: Greg Price <price@mit.edu>
> ---
>
> On Fri, Jan 11, 2013 at 02:27:36AM -0300, Arnaldo Carvalho de Melo wrote:
>> Looks like an interesting feature, will review this soon,
>
> Rebased on top of v3.10-rc7, please take a look at your convenience.
>
>
>  tools/perf/builtin-report.c | 19 ++++++++++++++++---
>  tools/perf/builtin-top.c    |  3 +--
>  tools/perf/util/machine.c   | 26 +++++++++++++++++---------
>  tools/perf/util/machine.h   |  9 ++++++++-
>  tools/perf/util/session.c   |  3 +--

You need to update the doc too.

>  5 files changed, 43 insertions(+), 17 deletions(-)
[SNIP]
> @@ -1211,8 +1214,13 @@ static int machine__resolve_callchain_sample(struct machine *machine,
>  					   MAP__FUNCTION, ip, &al, NULL);
>  		if (al.sym != NULL) {
>  			if (sort__has_parent && !*parent &&
> -			    symbol__match_parent_regex(al.sym))
> +			    symbol__match_regex(al.sym, &parent_regex))
>  				*parent = al.sym;
> +			else if (have_blackbox && root_al &&
> +			         symbol__match_regex(al.sym, &blackbox_regex)) {
> +				*root_al = al;
> +				callchain_cursor_reset(&callchain_cursor);

Okay, this is where the magic happens. :)

So it overwrites the original 'al' in process_sample_event() to
blackboxed symbol and drop the callchain.  Wouldn't it deserve a
comment? :)


> +			}
>  			if (!symbol_conf.use_callchain)
>  				break;
pp
This is unrelated to this patch, but why is this line needed?  I guess
this check should be done before calling this function.

Thanks,
Namhyung


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] perf report: Add option to collapse undesired parts of call graph
  2013-06-26  1:28     ` Namhyung Kim
@ 2013-06-26 22:25       ` Greg Price
  2013-06-27  4:58         ` Namhyung Kim
  0 siblings, 1 reply; 22+ messages in thread
From: Greg Price @ 2013-06-26 22:25 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, linux-kernel, Peter Zijlstra,
	Paul Mackerras, Ingo Molnar, Jiri Olsa, David Ahern

Hi Namhyung,

Thanks for the detailed review!


On Wed, Jun 26, 2013 at 10:28:56AM +0900, Namhyung Kim wrote:
> On Sat, 22 Jun 2013 23:17:20 -0400, Greg Price wrote:
> > For example, in an application with an expensive function
> > implemented with deeply nested recursive calls, the default
> > call-graph presentation is dominated by the different callchains
> > within that function.  By treating the function as a black box,
> > we can collect the callchains leading into the function and
> > compactly identify what to blame for expensive calls.
> >
> > For example, in this report the callers of garbage_collect() are
> 
> s/callers/callees/ ?

No, 'callers' is right.  This report is made without -G/--inverted, so
the trees are rooted at the inmost callees (the actual values of the
IP) and each node's children are its callers, rather than vice versa.
Here we want to see who is making these expensive calls to
garbage_collect, but the answer to this question is obscured because
the relevant callchains are separated according to which internal
helper functions to garbage_collect were on the stack.


> And it'd be better it shows more lines after garbage_collect so that one
> can see its callers also to understand what it does more clearly.

If you mean the commit message, the 'after' example that follows shows
this.  In the actual output (without the 'grep -m10' I've used here)
the patch doesn't affect those lines, and plenty more are in fact shown.

I could make the 'before' example longer too, and then we'd see what
the callers were in the callchains that look like
  gc_mark <- mark_keyvalue <- st_foreach <- gc_mark_children
    <- rb_gc_mark <- rb_vm_mark <- gc_mark_children <- gc_marks
    <- garbage_collect <- (the rest of a callchain).
But the callchains that have something else instead of that particular
sequence of eight helper functions (gc_mark called by ... called by
gc_marks) inside garbage_collect won't be included -- they're shown
in other places in the tree.  So it's actually precisely by using this
option that it's possible to see the callers completely, rather than
scattered across many places.


> > scattered across the tree:
> > $ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z]
> >     22.03%     ruby  [.] gc_mark
> >                --- gc_mark
> >                   |--59.40%-- mark_keyvalue
> >                   |          st_foreach
> >                   |          gc_mark_children
> >                   |          |--99.75%-- rb_gc_mark
> >                   |          |          rb_vm_mark
> >                   |          |          gc_mark_children
> >                   |          |          gc_marks
> >                   |          |          |--99.00%-- garbage_collect
> >
> > If we make garbage_collect() a black box, its callers are coalesced:
> 
> Again, s/callers/callees/ ?

Same as above.


> > $ perf report --blackbox garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z]
> >     72.92%     ruby  [.] garbage_collect
> >                --- garbage_collect
> >                    vm_xmalloc
> >                   |--47.08%-- ruby_xmalloc
> >                   |          st_insert2
> >                   |          rb_hash_aset
> >                   |          |--98.45%-- features_index_add
> >                   |          |          rb_provide_feature
> >                   |          |          rb_require_safe
> >                   |          |          vm_call_method


[snip]
> >  tools/perf/builtin-report.c | 19 ++++++++++++++++---
> >  tools/perf/builtin-top.c    |  3 +--
> >  tools/perf/util/machine.c   | 26 +++++++++++++++++---------
> >  tools/perf/util/machine.h   |  9 ++++++++-
> >  tools/perf/util/session.c   |  3 +--
> 
> You need to update the doc too.

Ah, thanks.  Will do.


> >  5 files changed, 43 insertions(+), 17 deletions(-)
> [SNIP]
> > @@ -1211,8 +1214,13 @@ static int machine__resolve_callchain_sample(struct machine *machine,
> >  					   MAP__FUNCTION, ip, &al, NULL);
> >  		if (al.sym != NULL) {
> >  			if (sort__has_parent && !*parent &&
> > -			    symbol__match_parent_regex(al.sym))
> > +			    symbol__match_regex(al.sym, &parent_regex))
> >  				*parent = al.sym;
> > +			else if (have_blackbox && root_al &&
> > +			         symbol__match_regex(al.sym, &blackbox_regex)) {
> > +				*root_al = al;
> > +				callchain_cursor_reset(&callchain_cursor);
> 
> Okay, this is where the magic happens. :)

Indeed! :)

> So it overwrites the original 'al' in process_sample_event() to
> blackboxed symbol and drop the callchain.  Wouldn't it deserve a
> comment? :)

I could do that.  Perhaps something like
  /* ignore the callchain we had so far, i.e. this symbol's callees */
Sound like what you had in mind?



> > +			}
> >  			if (!symbol_conf.use_callchain)
> >  				break;
> pp
> This is unrelated to this patch, but why is this line needed?  I guess
> this check should be done before calling this function.

Hmm.  We actually can get into this function when
!symbol_conf.use_callchain, if we're using say --sort=parent.  But I'm
still somewhat puzzled, because in that case it looks like we'll break
the loop after the first address with a symbol, even if we didn't find
the 'parent' there, which seems like it wouldn't serve the purpose.
Probably I'm still missing something.

FWIW, this logic has worked essentially the same way since v2.6.31-rc4~3^2~63.

Cheers,
Greg

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] perf report: Add option to collapse undesired parts of call graph
  2013-06-25  8:01             ` Namhyung Kim
@ 2013-06-26 22:41               ` Greg Price
  0 siblings, 0 replies; 22+ messages in thread
From: Greg Price @ 2013-06-26 22:41 UTC (permalink / raw)
  To: Namhyung Kim, Ingo Molnar
  Cc: Jiri Olsa, Arnaldo Carvalho de Melo, linux-kernel,
	Peter Zijlstra, Paul Mackerras, Ingo Molnar, David Ahern,
	Pekka Enberg, Namhyung Kim

Hi Namhyung, hi Ingo,

On Tue, Jun 25, 2013 at 05:01:47PM +0900, Namhyung Kim wrote:
> >>TBH I'm not really familiar with the GTK front-end, as I mainly use
> >>the TUI.  At a quick trial, it looks like --blackbox has the expected
> >>effect on the display there; though with or without --blackbox I can't
> >>seem to get the entries to expand to show me a call-graph profile, so
> >>it's hard to demonstrate it fully.  Not sure what I may have done
> >>wrong in building or running perf to make that not work (or is that
> >>expected?)
> 
> Currently perf GTK front-end does not support callchains at all.
> There's a floating patchset to enable it though.
> 
> https://lkml.org/lkml/2013/6/4/181

OK, good to know I didn't simply mess something up.  So probably best
to leave any specific --blackbox-related support aside until that
patchset lands.


> >>What changes do you have in mind to make these available in the GTK
> >>front-end?
> >
> >I was thinking of something obvious like right-clicking it to make that
> >function back boxed away or so? Have no firm ideas - maybe the GTK gents
> >on Cc: know how to best integrate such features.
> 
> We don't support context menu yet.  Frankly, perf GTK code is still
> premature and needs some love.  Any contribution should be welcomed!

Noted.

Cheers,
Greg

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] perf report: Add option to collapse undesired parts of call graph
  2013-06-26 22:25       ` Greg Price
@ 2013-06-27  4:58         ` Namhyung Kim
  2013-07-01 14:05           ` Greg Price
  2013-07-01 14:08           ` [PATCH] perf report: Fix bug in case "--no-call-graph -p foo" Greg Price
  0 siblings, 2 replies; 22+ messages in thread
From: Namhyung Kim @ 2013-06-27  4:58 UTC (permalink / raw)
  To: Greg Price
  Cc: Arnaldo Carvalho de Melo, linux-kernel, Peter Zijlstra,
	Paul Mackerras, Ingo Molnar, Jiri Olsa, David Ahern

On Wed, 26 Jun 2013 18:25:01 -0400, Greg Price wrote:
> Hi Namhyung,
>
> Thanks for the detailed review!
>
>
> On Wed, Jun 26, 2013 at 10:28:56AM +0900, Namhyung Kim wrote:
>> On Sat, 22 Jun 2013 23:17:20 -0400, Greg Price wrote:
>> > For example, in an application with an expensive function
>> > implemented with deeply nested recursive calls, the default
>> > call-graph presentation is dominated by the different callchains
>> > within that function.  By treating the function as a black box,
>> > we can collect the callchains leading into the function and
>> > compactly identify what to blame for expensive calls.
>> >
>> > For example, in this report the callers of garbage_collect() are
>> 
>> s/callers/callees/ ?
>
> No, 'callers' is right.  This report is made without -G/--inverted, so
> the trees are rooted at the inmost callees (the actual values of the
> IP) and each node's children are its callers, rather than vice versa.
> Here we want to see who is making these expensive calls to
> garbage_collect, but the answer to this question is obscured because
> the relevant callchains are separated according to which internal
> helper functions to garbage_collect were on the stack.
>
>
>> And it'd be better it shows more lines after garbage_collect so that one
>> can see its callers also to understand what it does more clearly.
>
> If you mean the commit message, the 'after' example that follows shows
> this.  In the actual output (without the 'grep -m10' I've used here)
> the patch doesn't affect those lines, and plenty more are in fact shown.
>
> I could make the 'before' example longer too, and then we'd see what
> the callers were in the callchains that look like
>   gc_mark <- mark_keyvalue <- st_foreach <- gc_mark_children
>     <- rb_gc_mark <- rb_vm_mark <- gc_mark_children <- gc_marks
>     <- garbage_collect <- (the rest of a callchain).
> But the callchains that have something else instead of that particular
> sequence of eight helper functions (gc_mark called by ... called by
> gc_marks) inside garbage_collect won't be included -- they're shown
> in other places in the tree.  So it's actually precisely by using this
> option that it's possible to see the callers completely, rather than
> scattered across many places.

Okay, I can see your point now.  Thanks for the explanation.


>> [SNIP]
>> > @@ -1211,8 +1214,13 @@ static int machine__resolve_callchain_sample(struct machine *machine,
>> >  					   MAP__FUNCTION, ip, &al, NULL);
>> >  		if (al.sym != NULL) {
>> >  			if (sort__has_parent && !*parent &&
>> > -			    symbol__match_parent_regex(al.sym))
>> > +			    symbol__match_regex(al.sym, &parent_regex))
>> >  				*parent = al.sym;
>> > +			else if (have_blackbox && root_al &&
>> > +			         symbol__match_regex(al.sym, &blackbox_regex)) {
>> > +				*root_al = al;
>> > +				callchain_cursor_reset(&callchain_cursor);
>> 
>> Okay, this is where the magic happens. :)
>
> Indeed! :)
>
>> So it overwrites the original 'al' in process_sample_event() to
>> blackboxed symbol and drop the callchain.  Wouldn't it deserve a
>> comment? :)
>
> I could do that.  Perhaps something like
>   /* ignore the callchain we had so far, i.e. this symbol's callees */
> Sound like what you had in mind?

More important thing is, I think, it updates the original al (root_al)
so that the perf will see the new symbol as if it was sampled in the
first place and it will collect all of its callers in one place.

>
>
>> > +			}
>> >  			if (!symbol_conf.use_callchain)
>> >  				break;
>> pp
>> This is unrelated to this patch, but why is this line needed?  I guess
>> this check should be done before calling this function.
>
> Hmm.  We actually can get into this function when
> !symbol_conf.use_callchain, if we're using say --sort=parent.  But I'm
> still somewhat puzzled, because in that case it looks like we'll break
> the loop after the first address with a symbol, even if we didn't find
> the 'parent' there, which seems like it wouldn't serve the purpose.

Right, that's what I want to say.

We already have the check before calling this function so breaking the
loop after checking only first callchain node looks strange.  If we
don't want to see callchains but only parents, it should either check
every callchain nodes or fail out as an unsupported operation IMHO.

Thanks,
Namhyung


> Probably I'm still missing something.
>
> FWIW, this logic has worked essentially the same way since v2.6.31-rc4~3^2~63.
>
> Cheers,
> Greg

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] perf report: Add option to collapse undesired parts of call graph
  2013-06-27  4:58         ` Namhyung Kim
@ 2013-07-01 14:05           ` Greg Price
  2013-07-01 14:08           ` [PATCH] perf report: Fix bug in case "--no-call-graph -p foo" Greg Price
  1 sibling, 0 replies; 22+ messages in thread
From: Greg Price @ 2013-07-01 14:05 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, linux-kernel, Peter Zijlstra,
	Paul Mackerras, Ingo Molnar, Jiri Olsa, David Ahern

On Thu, Jun 27, 2013 at 01:58:16PM +0900, Namhyung Kim wrote:
> On Wed, 26 Jun 2013 18:25:01 -0400, Greg Price wrote:
> > On Wed, Jun 26, 2013 at 10:28:56AM +0900, Namhyung Kim wrote:
> >> On Sat, 22 Jun 2013 23:17:20 -0400, Greg Price wrote:
> >> > @@ -1211,8 +1214,13 @@ static int machine__resolve_callchain_sample(struct machine *machine,
> >> >  					   MAP__FUNCTION, ip, &al, NULL);
> >> >  		if (al.sym != NULL) {
> >> >  			if (sort__has_parent && !*parent &&
> >> > -			    symbol__match_parent_regex(al.sym))
> >> > +			    symbol__match_regex(al.sym, &parent_regex))
> >> >  				*parent = al.sym;
> >> > +			else if (have_blackbox && root_al &&
> >> > +			         symbol__match_regex(al.sym, &blackbox_regex)) {
> >> > +				*root_al = al;
> >> > +				callchain_cursor_reset(&callchain_cursor);
> >> 
> >> Okay, this is where the magic happens. :)
> >
> > Indeed! :)
> >
> >> So it overwrites the original 'al' in process_sample_event() to
> >> blackboxed symbol and drop the callchain.  Wouldn't it deserve a
> >> comment? :)
> >
> > I could do that.  Perhaps something like
> >   /* ignore the callchain we had so far, i.e. this symbol's callees */
> > Sound like what you had in mind?
> 
> More important thing is, I think, it updates the original al (root_al)
> so that the perf will see the new symbol as if it was sampled in the
> first place and it will collect all of its callers in one place.

OK, I'll try to capture that in a comment in v2.



> >> > +			}
> >> >  			if (!symbol_conf.use_callchain)
> >> >  				break;
> >> pp
> >> This is unrelated to this patch, but why is this line needed?  I guess
> >> this check should be done before calling this function.
> >
> > Hmm.  We actually can get into this function when
> > !symbol_conf.use_callchain, if we're using say --sort=parent.  But I'm
> > still somewhat puzzled, because in that case it looks like we'll break
> > the loop after the first address with a symbol, even if we didn't find
> > the 'parent' there, which seems like it wouldn't serve the purpose.
> 
> Right, that's what I want to say.
> 
> We already have the check before calling this function so breaking the
> loop after checking only first callchain node looks strange.  If we
> don't want to see callchains but only parents, it should either check
> every callchain nodes or fail out as an unsupported operation IMHO.

I agree.  Will reply momentarily with a patch.

I actually wrote a version that tries to keep the optimization, but
decided it was too complicated for what it gains us.

Cheers,
Greg

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH] perf report: Fix bug in case "--no-call-graph -p foo"
  2013-06-27  4:58         ` Namhyung Kim
  2013-07-01 14:05           ` Greg Price
@ 2013-07-01 14:08           ` Greg Price
  1 sibling, 0 replies; 22+ messages in thread
From: Greg Price @ 2013-07-01 14:08 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, linux-kernel, Peter Zijlstra,
	Ingo Molnar, Frederic Weisbecker

The loop in machine__resolve_callchain_sample, which examines each
element of the callchain for several purposes, contains an optimization
which is intended to bail out early in case we have found the "parent"
and we aren't using the callchain for anything else.

But since v2.6.31-rc4~3^2~45 we have displayed callchains by default
when they are present, which makes that situation unusual.  And in
fact in v2.6.31-rc4~3^2~63 we had already broken this optimization so
that it completely messes up the output if it applies: if we're
looking for the parent and not otherwise using the callchain, we bail
out after the first entry which we can resolve to a symbol, even if it
doesn't match the parent pattern (as it usually won't.)  So it must
really be unusual to see this optimization apply, or someone would
have fixed the bug by now.

Therefore just kill the optimization.

Example before this patch:

    $ perf report -p ^vm_ 2>- | grep Event
    # Event count (approx.): 6392784226

    $ perf report -p ^vm_ --no-call-graph 2>- | grep Event
    # Event count (approx.): 54639399

After this patch:

    $ perf report -p ^vm_ 2>- | grep Event
    # Event count (approx.): 6392784226

    $ perf report -p ^vm_ --no-call-graph 2>- | grep Event
    # Event count (approx.): 6392784226

Reported-by: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/871u7p3bjb.fsf@sejong.aot.lge.com
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Greg Price <price@mit.edu>
---

The relevant previous thread:

On Thu, Jun 27, 2013 at 01:58:16PM +0900, Namhyung Kim wrote:
> On Wed, 26 Jun 2013 18:25:01 -0400, Greg Price wrote:
> > On Wed, Jun 26, 2013 at 10:28:56AM +0900, Namhyung Kim wrote:
> >> On Sat, 22 Jun 2013 23:17:20 -0400, Greg Price wrote:
> >> > +			}
> >> >  			if (!symbol_conf.use_callchain)
> >> >  				break;
> >> pp
> >> This is unrelated to this patch, but why is this line needed?  I guess
> >> this check should be done before calling this function.
> >
> > Hmm.  We actually can get into this function when
> > !symbol_conf.use_callchain, if we're using say --sort=parent.  But I'm
> > still somewhat puzzled, because in that case it looks like we'll break
> > the loop after the first address with a symbol, even if we didn't find
> > the 'parent' there, which seems like it wouldn't serve the purpose.
> 
> Right, that's what I want to say.
> 
> We already have the check before calling this function so breaking the
> loop after checking only first callchain node looks strange.  If we
> don't want to see callchains but only parents, it should either check
> every callchain nodes or fail out as an unsupported operation IMHO.

Cheers,
Greg


 tools/perf/util/machine.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 4618e03..3bd8912 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1217,8 +1217,6 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 				*root_al = al;
 				callchain_cursor_reset(&callchain_cursor);
 			}
-			if (!symbol_conf.use_callchain)
-				break;
 		}
 
 		err = callchain_cursor_append(&callchain_cursor,
-- 
1.8.2

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v2] perf report/top: Add option to collapse undesired parts of call graph
@ 2013-07-01 14:28 Greg Price
  2013-07-07 13:13 ` Jiri Olsa
  0 siblings, 1 reply; 22+ messages in thread
From: Greg Price @ 2013-07-01 14:28 UTC (permalink / raw)
  To: linux-kernel, Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa,
	Ingo Molnar
  Cc: Peter Zijlstra, Paul Mackerras, David Ahern

For example, in an application with an expensive function
implemented with deeply nested recursive calls, the default
call-graph presentation is dominated by the different callchains
within that function.  By ignoring these callees, we can collect
the callchains leading into the function and compactly identify
what to blame for expensive calls.

For example, in this report the callers of garbage_collect() are
scattered across the tree:
$ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z]
    22.03%     ruby  [.] gc_mark
               --- gc_mark
                  |--59.40%-- mark_keyvalue
                  |          st_foreach
                  |          gc_mark_children
                  |          |--99.75%-- rb_gc_mark
                  |          |          rb_vm_mark
                  |          |          gc_mark_children
                  |          |          gc_marks
                  |          |          |--99.00%-- garbage_collect

If we ignore the callees of garbage_collect(), its callers are coalesced:
$ perf report --ignore-callees garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z]
    72.92%     ruby  [.] garbage_collect
               --- garbage_collect
                   vm_xmalloc
                  |--47.08%-- ruby_xmalloc
                  |          st_insert2
                  |          rb_hash_aset
                  |          |--98.45%-- features_index_add
                  |          |          rb_provide_feature
                  |          |          rb_require_safe
                  |          |          vm_call_method

Link: http://lkml.kernel.org/r/20130623031720.GW22203@biohazard-cafe.mit.edu
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Greg Price <price@mit.edu>
---

Now on top of v3.10.  Option renamed, added to top, comment and doc added.

 tools/perf/Documentation/perf-report.txt |  5 +++++
 tools/perf/Documentation/perf-top.txt    |  5 +++++
 tools/perf/builtin-report.c              | 27 ++++++++++++++++++++++++---
 tools/perf/builtin-top.c                 |  6 ++++--
 tools/perf/util/machine.c                | 24 +++++++++++++++---------
 tools/perf/util/machine.h                |  4 +++-
 tools/perf/util/session.c                |  3 +--
 tools/perf/util/sort.c                   |  2 ++
 tools/perf/util/sort.h                   |  4 ++++
 9 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 7d5f4f3..57f2137 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -135,6 +135,11 @@ OPTIONS
 --inverted::
         alias for inverted caller based call graph.
 
+--ignore-callees=<regex>::
+        Ignore callees of the function(s) matching the given regex.
+        This has the effect of collecting the callers of each such
+        function into one place in the call-graph tree.
+
 --pretty=<key>::
         Pretty printing style.  key: normal, raw
 
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 9f1a2fe..be66778 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -155,6 +155,11 @@ Default is to monitor all CPUS.
 
 	Default: fractal,0.5,callee.
 
+--ignore-callees=<regex>::
+        Ignore callees of the function(s) matching the given regex.
+        This has the effect of collecting the callers of each such
+        function into one place in the call-graph tree.
+
 INTERACTIVE PROMPTING KEYS
 --------------------------
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index bd0ca81..842575f 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -83,7 +83,7 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
 	if ((sort__has_parent || symbol_conf.use_callchain) &&
 	    sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -174,7 +174,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
 	if ((sort__has_parent || symbol_conf.use_callchain)
 	    && sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -245,7 +245,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 
 	if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -687,6 +687,24 @@ setup:
 	return 0;
 }
 
+int
+report_parse_ignore_callees_opt(const struct option *opt __maybe_unused,
+                          const char *arg, int unset __maybe_unused)
+{
+	if (arg) {
+		int err = regcomp(&ignore_callees_regex, arg, REG_EXTENDED);
+		if (err) {
+			char buf[BUFSIZ];
+			regerror(err, &ignore_callees_regex, buf, sizeof(buf));
+			pr_err("Invalid --ignore-callees regex: %s\n%s", arg, buf);
+			return -1;
+		}
+		have_ignore_callees = 1;
+	}
+
+	return 0;
+}
+
 static int
 parse_branch_mode(const struct option *opt __maybe_unused,
 		  const char *str __maybe_unused, int unset)
@@ -764,6 +782,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
 	OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
 		    "alias for inverted call graph"),
+	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
+		   "ignore callees of these functions in call graphs",
+		   report_parse_ignore_callees_opt),
 	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
 		   "only consider symbols in these dsos"),
 	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 67bdb9f..ef4da38 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -775,8 +775,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		    sample->callchain) {
 			err = machine__resolve_callchain(machine, evsel,
 							 al.thread, sample,
-							 &parent);
-
+							 &parent, &al);
 			if (err)
 				return;
 		}
@@ -1095,6 +1094,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
 			     "mode[,dump_size]", record_callchain_help,
 			     &parse_callchain_opt, "fp"),
+	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
+		   "ignore callees of these functions in call graphs",
+		   report_parse_ignore_callees_opt),
 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
 		    "Show a column with the sum of periods"),
 	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b2ecad6..6ab6112 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1058,11 +1058,10 @@ int machine__process_event(struct machine *machine, union perf_event *event)
 	return ret;
 }
 
-static bool symbol__match_parent_regex(struct symbol *sym)
+static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
 {
-	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
+	if (sym->name && !regexec(regex, sym->name, 0, NULL, 0))
 		return 1;
-
 	return 0;
 }
 
@@ -1159,8 +1158,8 @@ struct branch_info *machine__resolve_bstack(struct machine *machine,
 static int machine__resolve_callchain_sample(struct machine *machine,
 					     struct thread *thread,
 					     struct ip_callchain *chain,
-					     struct symbol **parent)
-
+					     struct symbol **parent,
+					     struct addr_location *root_al)
 {
 	u8 cpumode = PERF_RECORD_MISC_USER;
 	unsigned int i;
@@ -1211,8 +1210,15 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 					   MAP__FUNCTION, ip, &al, NULL);
 		if (al.sym != NULL) {
 			if (sort__has_parent && !*parent &&
-			    symbol__match_parent_regex(al.sym))
+			    symbol__match_regex(al.sym, &parent_regex))
 				*parent = al.sym;
+			else if (have_ignore_callees && root_al &&
+			  symbol__match_regex(al.sym, &ignore_callees_regex)) {
+				/* Treat this symbol as the root,
+				   forgetting its callees. */
+				*root_al = al;
+				callchain_cursor_reset(&callchain_cursor);
+			}
 			if (!symbol_conf.use_callchain)
 				break;
 		}
@@ -1237,15 +1243,15 @@ int machine__resolve_callchain(struct machine *machine,
 			       struct perf_evsel *evsel,
 			       struct thread *thread,
 			       struct perf_sample *sample,
-			       struct symbol **parent)
-
+			       struct symbol **parent,
+			       struct addr_location *root_al)
 {
 	int ret;
 
 	callchain_cursor_reset(&callchain_cursor);
 
 	ret = machine__resolve_callchain_sample(machine, thread,
-						sample->callchain, parent);
+	                                        sample->callchain, parent, root_al);
 	if (ret)
 		return ret;
 
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 7794068..9ce97a5 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -5,6 +5,7 @@
 #include <linux/rbtree.h>
 #include "map.h"
 
+struct addr_location;
 struct branch_stack;
 struct perf_evsel;
 struct perf_sample;
@@ -83,7 +84,8 @@ int machine__resolve_callchain(struct machine *machine,
 			       struct perf_evsel *evsel,
 			       struct thread *thread,
 			       struct perf_sample *sample,
-			       struct symbol **parent);
+			       struct symbol **parent,
+			       struct addr_location *root_al);
 
 /*
  * Default guest kernel is defined by parameter --guestkallsyms
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index cf1fe01..7024950 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1397,9 +1397,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 
 	if (symbol_conf.use_callchain && sample->callchain) {
 
-
 		if (machine__resolve_callchain(machine, evsel, al.thread,
-					       sample, NULL) != 0) {
+					       sample, NULL, NULL) != 0) {
 			if (verbose)
 				error("Failed to resolve callchain. Skipping\n");
 			return;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 5f52d49..295eef8 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -6,6 +6,8 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
 const char	*parent_pattern = default_parent_pattern;
 const char	default_sort_order[] = "comm,dso,symbol";
 const char	*sort_order = default_sort_order;
+regex_t		ignore_callees_regex;
+int		have_ignore_callees = 0;
 int		sort__need_collapse = 0;
 int		sort__has_parent = 0;
 int		sort__has_sym = 0;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index f24bdf6..3275f6b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -29,6 +29,8 @@ extern const char *sort_order;
 extern const char default_parent_pattern[];
 extern const char *parent_pattern;
 extern const char default_sort_order[];
+extern regex_t ignore_callees_regex;
+extern int have_ignore_callees;
 extern int sort__need_collapse;
 extern int sort__has_parent;
 extern int sort__has_sym;
@@ -175,4 +177,6 @@ extern int sort_dimension__add(const char *);
 void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,
 			     const char *list_name, FILE *fp);
 
+int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
+
 #endif	/* __PERF_SORT_H */
-- 
1.8.2

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH v2] perf report/top: Add option to collapse undesired parts of call graph
  2013-07-01 14:28 [PATCH v2] perf report/top: Add option to collapse undesired parts of call graph Greg Price
@ 2013-07-07 13:13 ` Jiri Olsa
  2013-07-08 11:57   ` Greg Price
  0 siblings, 1 reply; 22+ messages in thread
From: Jiri Olsa @ 2013-07-07 13:13 UTC (permalink / raw)
  To: Greg Price
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Namhyung Kim,
	Ingo Molnar, Peter Zijlstra, Paul Mackerras, David Ahern

On Mon, Jul 01, 2013 at 10:28:45AM -0400, Greg Price wrote:
> For example, in an application with an expensive function
> implemented with deeply nested recursive calls, the default
> call-graph presentation is dominated by the different callchains
> within that function.  By ignoring these callees, we can collect
> the callchains leading into the function and compactly identify
> what to blame for expensive calls.

hi,
what's this one based on? I cannot get it applied on acme's perf/core

thanks,
jirka

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2] perf report/top: Add option to collapse undesired parts of call graph
  2013-07-07 13:13 ` Jiri Olsa
@ 2013-07-08 11:57   ` Greg Price
  2013-07-08 16:24     ` Jiri Olsa
  2013-07-19  7:50     ` [tip:perf/core] " tip-bot for Greg Price
  0 siblings, 2 replies; 22+ messages in thread
From: Greg Price @ 2013-07-08 11:57 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Namhyung Kim,
	Ingo Molnar, Peter Zijlstra, Paul Mackerras, David Ahern

On Sun, Jul 07, 2013 at 03:13:40PM +0200, Jiri Olsa wrote:
> On Mon, Jul 01, 2013 at 10:28:45AM -0400, Greg Price wrote:
> > For example, in an application with an expensive function
> > implemented with deeply nested recursive calls, the default
> > call-graph presentation is dominated by the different callchains
> > within that function.  By ignoring these callees, we can collect
> > the callchains leading into the function and compactly identify
> > what to blame for expensive calls.
> 
> hi,
> what's this one based on? I cannot get it applied on acme's perf/core

That was on v3.10.  Here it is on 6d895ece5, which is currently acme's
perf/core.

Cheers,
Greg


-- >8 --
Date: Thu, 6 Dec 2012 21:48:05 -0800

For example, in an application with an expensive function
implemented with deeply nested recursive calls, the default
call-graph presentation is dominated by the different callchains
within that function.  By ignoring these callees, we can collect
the callchains leading into the function and compactly identify
what to blame for expensive calls.

For example, in this report the callers of garbage_collect() are
scattered across the tree:
$ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z]
    22.03%     ruby  [.] gc_mark
               --- gc_mark
                  |--59.40%-- mark_keyvalue
                  |          st_foreach
                  |          gc_mark_children
                  |          |--99.75%-- rb_gc_mark
                  |          |          rb_vm_mark
                  |          |          gc_mark_children
                  |          |          gc_marks
                  |          |          |--99.00%-- garbage_collect

If we ignore the callees of garbage_collect(), its callers are coalesced:
$ perf report --ignore-callees garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z]
    72.92%     ruby  [.] garbage_collect
               --- garbage_collect
                   vm_xmalloc
                  |--47.08%-- ruby_xmalloc
                  |          st_insert2
                  |          rb_hash_aset
                  |          |--98.45%-- features_index_add
                  |          |          rb_provide_feature
                  |          |          rb_require_safe
                  |          |          vm_call_method

Link: http://lkml.kernel.org/r/20130623031720.GW22203@biohazard-cafe.mit.edu
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Greg Price <price@mit.edu>
---
 tools/perf/Documentation/perf-report.txt |  5 +++++
 tools/perf/Documentation/perf-top.txt    |  5 +++++
 tools/perf/builtin-report.c              | 27 ++++++++++++++++++++++++---
 tools/perf/builtin-top.c                 |  6 ++++--
 tools/perf/util/machine.c                | 24 +++++++++++++++---------
 tools/perf/util/machine.h                |  4 +++-
 tools/perf/util/session.c                |  3 +--
 tools/perf/util/sort.c                   |  2 ++
 tools/perf/util/sort.h                   |  4 ++++
 9 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 66dab74..747ff50 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -135,6 +135,11 @@ OPTIONS
 --inverted::
         alias for inverted caller based call graph.
 
+--ignore-callees=<regex>::
+        Ignore callees of the function(s) matching the given regex.
+        This has the effect of collecting the callers of each such
+        function into one place in the call-graph tree.
+
 --pretty=<key>::
         Pretty printing style.  key: normal, raw
 
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 7fdd190..58d6598 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -155,6 +155,11 @@ Default is to monitor all CPUS.
 
 	Default: fractal,0.5,callee.
 
+--ignore-callees=<regex>::
+        Ignore callees of the function(s) matching the given regex.
+        This has the effect of collecting the callers of each such
+        function into one place in the call-graph tree.
+
 --percent-limit::
 	Do not show entries which have an overhead under that percent.
 	(Default: 0).
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index ee2ca3e..d1bd252 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -89,7 +89,7 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
 	if ((sort__has_parent || symbol_conf.use_callchain) &&
 	    sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -180,7 +180,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
 	if ((sort__has_parent || symbol_conf.use_callchain)
 	    && sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -254,7 +254,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 
 	if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -681,6 +681,24 @@ setup:
 	return 0;
 }
 
+int
+report_parse_ignore_callees_opt(const struct option *opt __maybe_unused,
+                          const char *arg, int unset __maybe_unused)
+{
+	if (arg) {
+		int err = regcomp(&ignore_callees_regex, arg, REG_EXTENDED);
+		if (err) {
+			char buf[BUFSIZ];
+			regerror(err, &ignore_callees_regex, buf, sizeof(buf));
+			pr_err("Invalid --ignore-callees regex: %s\n%s", arg, buf);
+			return -1;
+		}
+		have_ignore_callees = 1;
+	}
+
+	return 0;
+}
+
 static int
 parse_branch_mode(const struct option *opt __maybe_unused,
 		  const char *str __maybe_unused, int unset)
@@ -771,6 +789,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
 	OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
 		    "alias for inverted call graph"),
+	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
+		   "ignore callees of these functions in call graphs",
+		   report_parse_ignore_callees_opt),
 	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
 		   "only consider symbols in these dsos"),
 	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a237059..bbf4635 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -773,8 +773,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		    sample->callchain) {
 			err = machine__resolve_callchain(machine, evsel,
 							 al.thread, sample,
-							 &parent);
-
+							 &parent, &al);
 			if (err)
 				return;
 		}
@@ -1109,6 +1108,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
 			     "mode[,dump_size]", record_callchain_help,
 			     &parse_callchain_opt, "fp"),
+	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
+		   "ignore callees of these functions in call graphs",
+		   report_parse_ignore_callees_opt),
 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
 		    "Show a column with the sum of periods"),
 	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5dd5026..f3dacdc 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1058,11 +1058,10 @@ int machine__process_event(struct machine *machine, union perf_event *event)
 	return ret;
 }
 
-static bool symbol__match_parent_regex(struct symbol *sym)
+static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
 {
-	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
+	if (sym->name && !regexec(regex, sym->name, 0, NULL, 0))
 		return 1;
-
 	return 0;
 }
 
@@ -1159,8 +1158,8 @@ struct branch_info *machine__resolve_bstack(struct machine *machine,
 static int machine__resolve_callchain_sample(struct machine *machine,
 					     struct thread *thread,
 					     struct ip_callchain *chain,
-					     struct symbol **parent)
-
+					     struct symbol **parent,
+					     struct addr_location *root_al)
 {
 	u8 cpumode = PERF_RECORD_MISC_USER;
 	unsigned int i;
@@ -1211,8 +1210,15 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 					   MAP__FUNCTION, ip, &al, NULL);
 		if (al.sym != NULL) {
 			if (sort__has_parent && !*parent &&
-			    symbol__match_parent_regex(al.sym))
+			    symbol__match_regex(al.sym, &parent_regex))
 				*parent = al.sym;
+			else if (have_ignore_callees && root_al &&
+			  symbol__match_regex(al.sym, &ignore_callees_regex)) {
+				/* Treat this symbol as the root,
+				   forgetting its callees. */
+				*root_al = al;
+				callchain_cursor_reset(&callchain_cursor);
+			}
 			if (!symbol_conf.use_callchain)
 				break;
 		}
@@ -1237,13 +1243,13 @@ int machine__resolve_callchain(struct machine *machine,
 			       struct perf_evsel *evsel,
 			       struct thread *thread,
 			       struct perf_sample *sample,
-			       struct symbol **parent)
-
+			       struct symbol **parent,
+			       struct addr_location *root_al)
 {
 	int ret;
 
 	ret = machine__resolve_callchain_sample(machine, thread,
-						sample->callchain, parent);
+	                                        sample->callchain, parent, root_al);
 	if (ret)
 		return ret;
 
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index e49ba01..5bb6244 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -5,6 +5,7 @@
 #include <linux/rbtree.h>
 #include "map.h"
 
+struct addr_location;
 struct branch_stack;
 struct perf_evsel;
 struct perf_sample;
@@ -83,7 +84,8 @@ int machine__resolve_callchain(struct machine *machine,
 			       struct perf_evsel *evsel,
 			       struct thread *thread,
 			       struct perf_sample *sample,
-			       struct symbol **parent);
+			       struct symbol **parent,
+			       struct addr_location *root_al);
 
 /*
  * Default guest kernel is defined by parameter --guestkallsyms
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 951a1cf..1eb58ee 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1406,9 +1406,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 
 	if (symbol_conf.use_callchain && sample->callchain) {
 
-
 		if (machine__resolve_callchain(machine, evsel, al.thread,
-					       sample, NULL) != 0) {
+					       sample, NULL, NULL) != 0) {
 			if (verbose)
 				error("Failed to resolve callchain. Skipping\n");
 			return;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 8deee19..cb2b108 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -7,6 +7,8 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
 const char	*parent_pattern = default_parent_pattern;
 const char	default_sort_order[] = "comm,dso,symbol";
 const char	*sort_order = default_sort_order;
+regex_t		ignore_callees_regex;
+int		have_ignore_callees = 0;
 int		sort__need_collapse = 0;
 int		sort__has_parent = 0;
 int		sort__has_sym = 0;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 45ac84c..a4a6d0b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -29,6 +29,8 @@ extern const char *sort_order;
 extern const char default_parent_pattern[];
 extern const char *parent_pattern;
 extern const char default_sort_order[];
+extern regex_t ignore_callees_regex;
+extern int have_ignore_callees;
 extern int sort__need_collapse;
 extern int sort__has_parent;
 extern int sort__has_sym;
@@ -183,4 +185,6 @@ int setup_sorting(void);
 extern int sort_dimension__add(const char *);
 void sort__setup_elide(FILE *fp);
 
+int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
+
 #endif	/* __PERF_SORT_H */
-- 
1.8.2


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH v2] perf report/top: Add option to collapse undesired parts of call graph
  2013-07-08 11:57   ` Greg Price
@ 2013-07-08 16:24     ` Jiri Olsa
  2013-07-08 16:47       ` Arnaldo Carvalho de Melo
  2013-07-19  7:50     ` [tip:perf/core] " tip-bot for Greg Price
  1 sibling, 1 reply; 22+ messages in thread
From: Jiri Olsa @ 2013-07-08 16:24 UTC (permalink / raw)
  To: Greg Price
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Namhyung Kim,
	Ingo Molnar, Peter Zijlstra, Paul Mackerras, David Ahern

On Mon, Jul 08, 2013 at 07:57:46AM -0400, Greg Price wrote:
> On Sun, Jul 07, 2013 at 03:13:40PM +0200, Jiri Olsa wrote:
> > On Mon, Jul 01, 2013 at 10:28:45AM -0400, Greg Price wrote:
> > > For example, in an application with an expensive function
> > > implemented with deeply nested recursive calls, the default
> > > call-graph presentation is dominated by the different callchains
> > > within that function.  By ignoring these callees, we can collect
> > > the callchains leading into the function and compactly identify
> > > what to blame for expensive calls.
> > 
> > hi,
> > what's this one based on? I cannot get it applied on acme's perf/core
> 
> That was on v3.10.  Here it is on 6d895ece5, which is currently acme's
> perf/core.
> 
> Cheers,
> Greg
> 
> 
> -- >8 --
> Date: Thu, 6 Dec 2012 21:48:05 -0800
> 
> For example, in an application with an expensive function
> implemented with deeply nested recursive calls, the default
> call-graph presentation is dominated by the different callchains
> within that function.  By ignoring these callees, we can collect
> the callchains leading into the function and compactly identify
> what to blame for expensive calls.

change looks ok, but I'm not that confident in this part, so at least:

Tested-by: Jiri Olsa <jolsa@redhat.com>

thanks,
jirka

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2] perf report/top: Add option to collapse undesired parts of call graph
  2013-07-08 16:24     ` Jiri Olsa
@ 2013-07-08 16:47       ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 22+ messages in thread
From: Arnaldo Carvalho de Melo @ 2013-07-08 16:47 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Greg Price, linux-kernel, Namhyung Kim, Ingo Molnar,
	Peter Zijlstra, Paul Mackerras, David Ahern

Em Mon, Jul 08, 2013 at 06:24:52PM +0200, Jiri Olsa escreveu:
> On Mon, Jul 08, 2013 at 07:57:46AM -0400, Greg Price wrote:
> > For example, in an application with an expensive function
> > implemented with deeply nested recursive calls, the default
> > call-graph presentation is dominated by the different callchains
> > within that function.  By ignoring these callees, we can collect the
> > callchains leading into the function and compactly identify what to
> > blame for expensive calls.
 
> change looks ok, but I'm not that confident in this part, so at least:
 
> Tested-by: Jiri Olsa <jolsa@redhat.com>

Thanks, applied.

- Arnaldo

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [tip:perf/core] perf report/top: Add option to collapse undesired parts of call graph
  2013-07-08 11:57   ` Greg Price
  2013-07-08 16:24     ` Jiri Olsa
@ 2013-07-19  7:50     ` tip-bot for Greg Price
  1 sibling, 0 replies; 22+ messages in thread
From: tip-bot for Greg Price @ 2013-07-19  7:50 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: acme, linux-kernel, paulus, mingo, hpa, mingo, a.p.zijlstra,
	price, namhyung, jolsa, dsahern, tglx, fengguang.wu, price

Commit-ID:  b21484f1a1f300d422cfe5d4f8f50015e22cea24
Gitweb:     http://git.kernel.org/tip/b21484f1a1f300d422cfe5d4f8f50015e22cea24
Author:     Greg Price <price@MIT.EDU>
AuthorDate: Thu, 6 Dec 2012 21:48:05 -0800
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Fri, 12 Jul 2013 13:53:55 -0300

perf report/top: Add option to collapse undesired parts of call graph

For example, in an application with an expensive function implemented
with deeply nested recursive calls, the default call-graph presentation
is dominated by the different callchains within that function.  By
ignoring these callees, we can collect the callchains leading into the
function and compactly identify what to blame for expensive calls.

For example, in this report the callers of garbage_collect() are
scattered across the tree:

  $ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z]
      22.03%     ruby  [.] gc_mark
                 --- gc_mark
                    |--59.40%-- mark_keyvalue
                    |          st_foreach
                    |          gc_mark_children
                    |          |--99.75%-- rb_gc_mark
                    |          |          rb_vm_mark
                    |          |          gc_mark_children
                    |          |          gc_marks
                    |          |          |--99.00%-- garbage_collect

If we ignore the callees of garbage_collect(), its callers are coalesced:

  $ perf report --ignore-callees garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z]
      72.92%     ruby  [.] garbage_collect
                 --- garbage_collect
                     vm_xmalloc
                    |--47.08%-- ruby_xmalloc
                    |          st_insert2
                    |          rb_hash_aset
                    |          |--98.45%-- features_index_add
                    |          |          rb_provide_feature
                    |          |          rb_require_safe
                    |          |          vm_call_method

Signed-off-by: Greg Price <price@mit.edu>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20130623031720.GW22203@biohazard-cafe.mit.edu
Link: http://lkml.kernel.org/r/20130708115746.GO22203@biohazard-cafe.mit.edu
Cc: Fengguang Wu <fengguang.wu@intel.com>
[ remove spaces at beginning of line, reported by Fengguang Wu ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt |  5 +++++
 tools/perf/Documentation/perf-top.txt    |  5 +++++
 tools/perf/builtin-report.c              | 27 ++++++++++++++++++++++++---
 tools/perf/builtin-top.c                 |  6 ++++--
 tools/perf/util/machine.c                | 24 +++++++++++++++---------
 tools/perf/util/machine.h                |  4 +++-
 tools/perf/util/session.c                |  3 +--
 tools/perf/util/sort.c                   |  2 ++
 tools/perf/util/sort.h                   |  4 ++++
 9 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 66dab74..747ff50 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -135,6 +135,11 @@ OPTIONS
 --inverted::
         alias for inverted caller based call graph.
 
+--ignore-callees=<regex>::
+        Ignore callees of the function(s) matching the given regex.
+        This has the effect of collecting the callers of each such
+        function into one place in the call-graph tree.
+
 --pretty=<key>::
         Pretty printing style.  key: normal, raw
 
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 7fdd190..58d6598 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -155,6 +155,11 @@ Default is to monitor all CPUS.
 
 	Default: fractal,0.5,callee.
 
+--ignore-callees=<regex>::
+        Ignore callees of the function(s) matching the given regex.
+        This has the effect of collecting the callers of each such
+        function into one place in the call-graph tree.
+
 --percent-limit::
 	Do not show entries which have an overhead under that percent.
 	(Default: 0).
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index ee2ca3e..9a7e54d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -89,7 +89,7 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
 	if ((sort__has_parent || symbol_conf.use_callchain) &&
 	    sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -180,7 +180,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
 	if ((sort__has_parent || symbol_conf.use_callchain)
 	    && sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -254,7 +254,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 
 	if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -681,6 +681,24 @@ setup:
 	return 0;
 }
 
+int
+report_parse_ignore_callees_opt(const struct option *opt __maybe_unused,
+				const char *arg, int unset __maybe_unused)
+{
+	if (arg) {
+		int err = regcomp(&ignore_callees_regex, arg, REG_EXTENDED);
+		if (err) {
+			char buf[BUFSIZ];
+			regerror(err, &ignore_callees_regex, buf, sizeof(buf));
+			pr_err("Invalid --ignore-callees regex: %s\n%s", arg, buf);
+			return -1;
+		}
+		have_ignore_callees = 1;
+	}
+
+	return 0;
+}
+
 static int
 parse_branch_mode(const struct option *opt __maybe_unused,
 		  const char *str __maybe_unused, int unset)
@@ -771,6 +789,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
 	OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
 		    "alias for inverted call graph"),
+	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
+		   "ignore callees of these functions in call graphs",
+		   report_parse_ignore_callees_opt),
 	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
 		   "only consider symbols in these dsos"),
 	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a237059..bbf4635 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -773,8 +773,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		    sample->callchain) {
 			err = machine__resolve_callchain(machine, evsel,
 							 al.thread, sample,
-							 &parent);
-
+							 &parent, &al);
 			if (err)
 				return;
 		}
@@ -1109,6 +1108,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
 			     "mode[,dump_size]", record_callchain_help,
 			     &parse_callchain_opt, "fp"),
+	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
+		   "ignore callees of these functions in call graphs",
+		   report_parse_ignore_callees_opt),
 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
 		    "Show a column with the sum of periods"),
 	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5dd5026..f9f9d63 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1058,11 +1058,10 @@ int machine__process_event(struct machine *machine, union perf_event *event)
 	return ret;
 }
 
-static bool symbol__match_parent_regex(struct symbol *sym)
+static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
 {
-	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
+	if (sym->name && !regexec(regex, sym->name, 0, NULL, 0))
 		return 1;
-
 	return 0;
 }
 
@@ -1159,8 +1158,8 @@ struct branch_info *machine__resolve_bstack(struct machine *machine,
 static int machine__resolve_callchain_sample(struct machine *machine,
 					     struct thread *thread,
 					     struct ip_callchain *chain,
-					     struct symbol **parent)
-
+					     struct symbol **parent,
+					     struct addr_location *root_al)
 {
 	u8 cpumode = PERF_RECORD_MISC_USER;
 	unsigned int i;
@@ -1211,8 +1210,15 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 					   MAP__FUNCTION, ip, &al, NULL);
 		if (al.sym != NULL) {
 			if (sort__has_parent && !*parent &&
-			    symbol__match_parent_regex(al.sym))
+			    symbol__match_regex(al.sym, &parent_regex))
 				*parent = al.sym;
+			else if (have_ignore_callees && root_al &&
+			  symbol__match_regex(al.sym, &ignore_callees_regex)) {
+				/* Treat this symbol as the root,
+				   forgetting its callees. */
+				*root_al = al;
+				callchain_cursor_reset(&callchain_cursor);
+			}
 			if (!symbol_conf.use_callchain)
 				break;
 		}
@@ -1237,13 +1243,13 @@ int machine__resolve_callchain(struct machine *machine,
 			       struct perf_evsel *evsel,
 			       struct thread *thread,
 			       struct perf_sample *sample,
-			       struct symbol **parent)
-
+			       struct symbol **parent,
+			       struct addr_location *root_al)
 {
 	int ret;
 
 	ret = machine__resolve_callchain_sample(machine, thread,
-						sample->callchain, parent);
+						sample->callchain, parent, root_al);
 	if (ret)
 		return ret;
 
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index e49ba01..5bb6244 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -5,6 +5,7 @@
 #include <linux/rbtree.h>
 #include "map.h"
 
+struct addr_location;
 struct branch_stack;
 struct perf_evsel;
 struct perf_sample;
@@ -83,7 +84,8 @@ int machine__resolve_callchain(struct machine *machine,
 			       struct perf_evsel *evsel,
 			       struct thread *thread,
 			       struct perf_sample *sample,
-			       struct symbol **parent);
+			       struct symbol **parent,
+			       struct addr_location *root_al);
 
 /*
  * Default guest kernel is defined by parameter --guestkallsyms
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 951a1cf..1eb58ee 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1406,9 +1406,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 
 	if (symbol_conf.use_callchain && sample->callchain) {
 
-
 		if (machine__resolve_callchain(machine, evsel, al.thread,
-					       sample, NULL) != 0) {
+					       sample, NULL, NULL) != 0) {
 			if (verbose)
 				error("Failed to resolve callchain. Skipping\n");
 			return;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 8deee19..cb2b108 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -7,6 +7,8 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
 const char	*parent_pattern = default_parent_pattern;
 const char	default_sort_order[] = "comm,dso,symbol";
 const char	*sort_order = default_sort_order;
+regex_t		ignore_callees_regex;
+int		have_ignore_callees = 0;
 int		sort__need_collapse = 0;
 int		sort__has_parent = 0;
 int		sort__has_sym = 0;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 45ac84c..a4a6d0b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -29,6 +29,8 @@ extern const char *sort_order;
 extern const char default_parent_pattern[];
 extern const char *parent_pattern;
 extern const char default_sort_order[];
+extern regex_t ignore_callees_regex;
+extern int have_ignore_callees;
 extern int sort__need_collapse;
 extern int sort__has_parent;
 extern int sort__has_sym;
@@ -183,4 +185,6 @@ int setup_sorting(void);
 extern int sort_dimension__add(const char *);
 void sort__setup_elide(FILE *fp);
 
+int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
+
 #endif	/* __PERF_SORT_H */

^ permalink raw reply related	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2013-07-19  7:51 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-01 14:28 [PATCH v2] perf report/top: Add option to collapse undesired parts of call graph Greg Price
2013-07-07 13:13 ` Jiri Olsa
2013-07-08 11:57   ` Greg Price
2013-07-08 16:24     ` Jiri Olsa
2013-07-08 16:47       ` Arnaldo Carvalho de Melo
2013-07-19  7:50     ` [tip:perf/core] " tip-bot for Greg Price
  -- strict thread matches above, loose matches on Subject: below --
2012-12-07  7:30 [PATCH] perf report: " Greg Price
2013-01-11  5:27 ` Arnaldo Carvalho de Melo
2013-02-25  4:28   ` Greg Price
2013-06-23  3:17   ` Greg Price
2013-06-23 21:53     ` Jiri Olsa
2013-06-24  8:32       ` Ingo Molnar
2013-06-24 23:14         ` Greg Price
2013-06-25  7:47           ` Ingo Molnar
2013-06-25  8:01             ` Namhyung Kim
2013-06-26 22:41               ` Greg Price
2013-06-24 22:50       ` Greg Price
2013-06-26  1:28     ` Namhyung Kim
2013-06-26 22:25       ` Greg Price
2013-06-27  4:58         ` Namhyung Kim
2013-07-01 14:05           ` Greg Price
2013-07-01 14:08           ` [PATCH] perf report: Fix bug in case "--no-call-graph -p foo" Greg Price

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.