All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf report/annotate: Add option to specify a CPU range
@ 2011-06-29  5:07 Anton Blanchard
  2011-06-29 18:07 ` David Ahern
  0 siblings, 1 reply; 9+ messages in thread
From: Anton Blanchard @ 2011-06-29  5:07 UTC (permalink / raw)
  To: Peter Zijlstra, Paul Mackerras, Ingo Molnar, Arnaldo Carvalho de Melo
  Cc: linux-kernel


Add an option to perf report and perf annotate to specify which CPUs
to operate on. This enables us to take a single system wide profile
and analyse each CPU (or group of CPUs) in isolation.

This was useful when profiling a multiprocess workload where the
bottleneck was on one CPU but this was hidden in the overall profile.
Per process and per thread breakdowns didn't help because multiple
processes were running on each CPU and no single process consumed
an entire CPU.

The patch converts the list of CPUs returned by cpu_map__new into a
bitmap for fast lookup. I wanted to use -C to be consistent with perf
top/record/stat, but unfortunately perf report already uses -C <comms>.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

I capped it at MAX_NR_CPUS to avoid having to dynamically allocate
cpu_bitmap, but we could do that if the extra complexity is worth it.

Index: linux-2.6-tip/tools/perf/builtin-report.c
===================================================================
--- linux-2.6-tip.orig/tools/perf/builtin-report.c	2011-06-29 09:01:46.209676867 +1000
+++ linux-2.6-tip/tools/perf/builtin-report.c	2011-06-29 14:53:26.131226181 +1000
@@ -33,6 +33,9 @@
 #include "util/sort.h"
 #include "util/hist.h"
 
+#include <linux/bitmap.h>
+#include "util/cpumap.h"
+
 static char		const *input_name = "perf.data";
 
 static bool		force, use_tui, use_stdio;
@@ -48,6 +51,9 @@ static const char	*pretty_printing_style
 static char		callchain_default_opt[] = "fractal,0.5";
 static symbol_filter_t	annotate_init;
 
+static const char	*cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+
 static int perf_session__add_hist_entry(struct perf_session *session,
 					struct addr_location *al,
 					struct perf_sample *sample,
@@ -116,6 +122,9 @@ static int process_sample_event(union pe
 	if (al.filtered || (hide_unresolved && al.sym == NULL))
 		return 0;
 
+	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+		return 0;
+
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
 
@@ -455,6 +464,7 @@ static const struct option options[] = {
 		    "Only display entries resolved to a symbol"),
 	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
 		    "Look for files with symbols relative to this directory"),
+	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 	OPT_END()
 };
 
@@ -501,6 +511,23 @@ int cmd_report(int argc, const char **ar
 
 	setup_sorting(report_usage, options);
 
+	if (cpu_list) {
+		int i;
+		struct cpu_map *map = cpu_map__new(cpu_list);
+
+		for (i = 0; i < map->nr; i++) {
+			int cpu = map->map[i];
+
+			if (cpu >= MAX_NR_CPUS) {
+				fprintf(stderr, "Requested CPU %d too large, "
+					"consider raising MAX_NR_CPUS\n", cpu);
+				return -1;
+			}
+
+			set_bit(cpu, cpu_bitmap);
+		}
+	}
+
 	if (parent_pattern != default_parent_pattern) {
 		if (sort_dimension__add("parent") < 0)
 			return -1;
Index: linux-2.6-tip/tools/perf/builtin-annotate.c
===================================================================
--- linux-2.6-tip.orig/tools/perf/builtin-annotate.c	2011-06-29 09:01:46.199676692 +1000
+++ linux-2.6-tip/tools/perf/builtin-annotate.c	2011-06-29 09:01:56.519857004 +1000
@@ -28,6 +28,9 @@
 #include "util/hist.h"
 #include "util/session.h"
 
+#include <linux/bitmap.h>
+#include "util/cpumap.h"
+
 static char		const *input_name = "perf.data";
 
 static bool		force, use_tui, use_stdio;
@@ -38,6 +41,9 @@ static bool		print_line;
 
 static const char *sym_hist_filter;
 
+static const char	*cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+
 static int perf_evlist__add_sample(struct perf_evlist *evlist,
 				   struct perf_sample *sample,
 				   struct perf_evsel *evsel,
@@ -90,6 +96,9 @@ static int process_sample_event(union pe
 		return -1;
 	}
 
+	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+		return 0;
+
 	if (!al.filtered &&
 	    perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
 		pr_warning("problem incrementing symbol count, "
@@ -252,6 +261,7 @@ static const struct option options[] = {
 		    "print matching source lines (may be slow)"),
 	OPT_BOOLEAN('P', "full-paths", &full_paths,
 		    "Don't shorten the displayed pathnames"),
+	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 	OPT_END()
 };
 
@@ -274,6 +284,23 @@ int cmd_annotate(int argc, const char **
 
 	setup_sorting(annotate_usage, options);
 
+	if (cpu_list) {
+		int i;
+		struct cpu_map *map = cpu_map__new(cpu_list);
+
+		for (i = 0; i < map->nr; i++) {
+			int cpu = map->map[i];
+
+			if (cpu >= MAX_NR_CPUS) {
+				fprintf(stderr, "Requested CPU %d too large, "
+					"consider raising MAX_NR_CPUS\n", cpu);
+				return -1;
+			}
+
+			set_bit(cpu, cpu_bitmap);
+		}
+	}
+
 	if (argc) {
 		/*
 		 * Special case: if there's an argument left then assume tha

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] perf report/annotate: Add option to specify a CPU range
  2011-06-29  5:07 [PATCH] perf report/annotate: Add option to specify a CPU range Anton Blanchard
@ 2011-06-29 18:07 ` David Ahern
  2011-06-30  3:15   ` Anton Blanchard
  2011-06-30  3:16   ` Anton Blanchard
  0 siblings, 2 replies; 9+ messages in thread
From: David Ahern @ 2011-06-29 18:07 UTC (permalink / raw)
  To: Anton Blanchard
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar,
	Arnaldo Carvalho de Melo, linux-kernel



On 06/28/2011 11:07 PM, Anton Blanchard wrote:
> 
> Add an option to perf report and perf annotate to specify which CPUs

What about perf-script?

> to operate on. This enables us to take a single system wide profile
> and analyse each CPU (or group of CPUs) in isolation.
> 
> This was useful when profiling a multiprocess workload where the
> bottleneck was on one CPU but this was hidden in the overall profile.
> Per process and per thread breakdowns didn't help because multiple
> processes were running on each CPU and no single process consumed
> an entire CPU.
> 
> The patch converts the list of CPUs returned by cpu_map__new into a
> bitmap for fast lookup. I wanted to use -C to be consistent with perf
> top/record/stat, but unfortunately perf report already uses -C <comms>.
> 
> Signed-off-by: Anton Blanchard <anton@samba.org>
> ---
> 
> I capped it at MAX_NR_CPUS to avoid having to dynamically allocate
> cpu_bitmap, but we could do that if the extra complexity is worth it.
> 
> Index: linux-2.6-tip/tools/perf/builtin-report.c
> ===================================================================
> --- linux-2.6-tip.orig/tools/perf/builtin-report.c	2011-06-29 09:01:46.209676867 +1000
> +++ linux-2.6-tip/tools/perf/builtin-report.c	2011-06-29 14:53:26.131226181 +1000
> @@ -33,6 +33,9 @@
>  #include "util/sort.h"
>  #include "util/hist.h"
>  
> +#include <linux/bitmap.h>
> +#include "util/cpumap.h"
> +
>  static char		const *input_name = "perf.data";
>  
>  static bool		force, use_tui, use_stdio;
> @@ -48,6 +51,9 @@ static const char	*pretty_printing_style
>  static char		callchain_default_opt[] = "fractal,0.5";
>  static symbol_filter_t	annotate_init;
>  
> +static const char	*cpu_list;
> +static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
> +
>  static int perf_session__add_hist_entry(struct perf_session *session,
>  					struct addr_location *al,
>  					struct perf_sample *sample,
> @@ -116,6 +122,9 @@ static int process_sample_event(union pe
>  	if (al.filtered || (hide_unresolved && al.sym == NULL))
>  		return 0;
>  
> +	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
> +		return 0;
> +

Need to check that the SAMPLE_CPU attribute is set for the event for
which the sample is generated; see builtin-script.c,
perf_evsel__check_attr().

>  	if (al.map != NULL)
>  		al.map->dso->hit = 1;
>  
> @@ -455,6 +464,7 @@ static const struct option options[] = {
>  		    "Only display entries resolved to a symbol"),
>  	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
>  		    "Look for files with symbols relative to this directory"),
> +	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),

This option should be added to the Documentation file,
Documentation/perf-report.txt

Same comments for perf-annotate changes below.

David


>  	OPT_END()
>  };
>  
> @@ -501,6 +511,23 @@ int cmd_report(int argc, const char **ar
>  
>  	setup_sorting(report_usage, options);
>  
> +	if (cpu_list) {
> +		int i;
> +		struct cpu_map *map = cpu_map__new(cpu_list);
> +
> +		for (i = 0; i < map->nr; i++) {
> +			int cpu = map->map[i];
> +
> +			if (cpu >= MAX_NR_CPUS) {
> +				fprintf(stderr, "Requested CPU %d too large, "
> +					"consider raising MAX_NR_CPUS\n", cpu);
> +				return -1;
> +			}
> +
> +			set_bit(cpu, cpu_bitmap);
> +		}
> +	}
> +
>  	if (parent_pattern != default_parent_pattern) {
>  		if (sort_dimension__add("parent") < 0)
>  			return -1;
> Index: linux-2.6-tip/tools/perf/builtin-annotate.c
> ===================================================================
> --- linux-2.6-tip.orig/tools/perf/builtin-annotate.c	2011-06-29 09:01:46.199676692 +1000
> +++ linux-2.6-tip/tools/perf/builtin-annotate.c	2011-06-29 09:01:56.519857004 +1000
> @@ -28,6 +28,9 @@
>  #include "util/hist.h"
>  #include "util/session.h"
>  
> +#include <linux/bitmap.h>
> +#include "util/cpumap.h"
> +
>  static char		const *input_name = "perf.data";
>  
>  static bool		force, use_tui, use_stdio;
> @@ -38,6 +41,9 @@ static bool		print_line;
>  
>  static const char *sym_hist_filter;
>  
> +static const char	*cpu_list;
> +static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
> +
>  static int perf_evlist__add_sample(struct perf_evlist *evlist,
>  				   struct perf_sample *sample,
>  				   struct perf_evsel *evsel,
> @@ -90,6 +96,9 @@ static int process_sample_event(union pe
>  		return -1;
>  	}
>  
> +	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
> +		return 0;
> +
>  	if (!al.filtered &&
>  	    perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
>  		pr_warning("problem incrementing symbol count, "
> @@ -252,6 +261,7 @@ static const struct option options[] = {
>  		    "print matching source lines (may be slow)"),
>  	OPT_BOOLEAN('P', "full-paths", &full_paths,
>  		    "Don't shorten the displayed pathnames"),
> +	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
>  	OPT_END()
>  };
>  
> @@ -274,6 +284,23 @@ int cmd_annotate(int argc, const char **
>  
>  	setup_sorting(annotate_usage, options);
>  
> +	if (cpu_list) {
> +		int i;
> +		struct cpu_map *map = cpu_map__new(cpu_list);
> +
> +		for (i = 0; i < map->nr; i++) {
> +			int cpu = map->map[i];
> +
> +			if (cpu >= MAX_NR_CPUS) {
> +				fprintf(stderr, "Requested CPU %d too large, "
> +					"consider raising MAX_NR_CPUS\n", cpu);
> +				return -1;
> +			}
> +
> +			set_bit(cpu, cpu_bitmap);
> +		}
> +	}
> +
>  	if (argc) {
>  		/*
>  		 * Special case: if there's an argument left then assume tha
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] perf report/annotate: Add option to specify a CPU range
  2011-06-29 18:07 ` David Ahern
@ 2011-06-30  3:15   ` Anton Blanchard
  2011-06-30  3:16   ` Anton Blanchard
  1 sibling, 0 replies; 9+ messages in thread
From: Anton Blanchard @ 2011-06-30  3:15 UTC (permalink / raw)
  To: David Ahern
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar,
	Arnaldo Carvalho de Melo, linux-kernel


Hi David,

> > Add an option to perf report and perf annotate to specify which CPUs
> 
> What about perf-script?

Good point, I added the option to perf script.

> > +	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
> > +		return 0;
> > +
> 
> Need to check that the SAMPLE_CPU attribute is set for the event for
> which the sample is generated; see builtin-script.c,
> perf_evsel__check_attr().

Added.

> This option should be added to the Documentation file,
> Documentation/perf-report.txt

Added. Updated patch on the way.

Thanks,
Anton

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] perf report/annotate: Add option to specify a CPU range
  2011-06-29 18:07 ` David Ahern
  2011-06-30  3:15   ` Anton Blanchard
@ 2011-06-30  3:16   ` Anton Blanchard
  2011-06-30  3:56     ` David Ahern
  1 sibling, 1 reply; 9+ messages in thread
From: Anton Blanchard @ 2011-06-30  3:16 UTC (permalink / raw)
  To: David Ahern
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar,
	Arnaldo Carvalho de Melo, linux-kernel


Add an option to perf report/annotate/script to specify which CPUs
to operate on. This enables us to take a single system wide profile
and analyse each CPU (or group of CPUs) in isolation.

This was useful when profiling a multiprocess workload where the
bottleneck was on one CPU but this was hidden in the overall profile.
Per process and per thread breakdowns didn't help because multiple
processes were running on each CPU and no single process consumed
an entire CPU.

The patch converts the list of CPUs returned by cpu_map__new into a
bitmap for fast lookup. I wanted to use -C to be consistent with perf
top/record/stat, but unfortunately perf report already uses -C <comms>.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

v2: Incorporate suggestions from David Ahern:
	- Added -c to perf script
	- Check that SAMPLE_CPU is set when -c is used
	- Update documentation

Index: linux-2.6-tip/tools/perf/builtin-report.c
===================================================================
--- linux-2.6-tip.orig/tools/perf/builtin-report.c	2011-06-30 11:35:08.488417534 +1000
+++ linux-2.6-tip/tools/perf/builtin-report.c	2011-06-30 12:56:28.894807631 +1000
@@ -33,6 +33,9 @@
 #include "util/sort.h"
 #include "util/hist.h"
 
+#include <linux/bitmap.h>
+#include "util/cpumap.h"
+
 static char		const *input_name = "perf.data";
 
 static bool		force, use_tui, use_stdio;
@@ -48,6 +51,9 @@ static const char	*pretty_printing_style
 static char		callchain_default_opt[] = "fractal,0.5";
 static symbol_filter_t	annotate_init;
 
+static const char	*cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+
 static int perf_session__add_hist_entry(struct perf_session *session,
 					struct addr_location *al,
 					struct perf_sample *sample,
@@ -116,6 +122,9 @@ static int process_sample_event(union pe
 	if (al.filtered || (hide_unresolved && al.sym == NULL))
 		return 0;
 
+	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+		return 0;
+
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
 
@@ -262,6 +271,41 @@ static int __cmd_report(void)
 	if (session == NULL)
 		return -ENOMEM;
 
+	if (cpu_list) {
+		int i;
+		struct cpu_map *map;
+
+		for (i = 0; i < PERF_TYPE_MAX; ++i) {
+			struct perf_evsel *evsel;
+
+			evsel = perf_session__find_first_evtype(session, i);
+			if (!evsel)
+				continue;
+
+			if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
+				pr_err("File does not contain CPU events. "
+				       "Remove -c option to proceed.\n");
+				ret = -1;
+				goto out_delete;
+			}
+		}
+
+		map = cpu_map__new(cpu_list);
+
+		for (i = 0; i < map->nr; i++) {
+			int cpu = map->map[i];
+
+			if (cpu >= MAX_NR_CPUS) {
+				pr_err("Requested CPU %d too large. "
+				       "Consider raising MAX_NR_CPUS\n", cpu);
+				ret = -1;
+				goto out_delete;
+			}
+
+			set_bit(cpu, cpu_bitmap);
+		}
+	}
+
 	if (show_threads)
 		perf_read_values_init(&show_threads_values);
 
@@ -455,6 +499,7 @@ static const struct option options[] = {
 		    "Only display entries resolved to a symbol"),
 	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
 		    "Look for files with symbols relative to this directory"),
+	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 	OPT_END()
 };
 
Index: linux-2.6-tip/tools/perf/builtin-annotate.c
===================================================================
--- linux-2.6-tip.orig/tools/perf/builtin-annotate.c	2011-06-30 11:35:08.468417177 +1000
+++ linux-2.6-tip/tools/perf/builtin-annotate.c	2011-06-30 12:56:35.514926037 +1000
@@ -28,6 +28,9 @@
 #include "util/hist.h"
 #include "util/session.h"
 
+#include <linux/bitmap.h>
+#include "util/cpumap.h"
+
 static char		const *input_name = "perf.data";
 
 static bool		force, use_tui, use_stdio;
@@ -38,6 +41,9 @@ static bool		print_line;
 
 static const char *sym_hist_filter;
 
+static const char	*cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+
 static int perf_evlist__add_sample(struct perf_evlist *evlist,
 				   struct perf_sample *sample,
 				   struct perf_evsel *evsel,
@@ -90,6 +96,9 @@ static int process_sample_event(union pe
 		return -1;
 	}
 
+	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+		return 0;
+
 	if (!al.filtered &&
 	    perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
 		pr_warning("problem incrementing symbol count, "
@@ -177,6 +186,41 @@ static int __cmd_annotate(void)
 	if (session == NULL)
 		return -ENOMEM;
 
+	if (cpu_list) {
+		int i;
+		struct cpu_map *map;
+
+		for (i = 0; i < PERF_TYPE_MAX; ++i) {
+			struct perf_evsel *evsel;
+
+			evsel = perf_session__find_first_evtype(session, i);
+			if (!evsel)
+				continue;
+
+			if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
+				pr_err("File does not contain CPU events. "
+				       "Remove -c option to proceed.\n");
+				ret = -1;
+				goto out_delete;
+			}
+		}
+
+		map = cpu_map__new(cpu_list);
+
+		for (i = 0; i < map->nr; i++) {
+			int cpu = map->map[i];
+
+			if (cpu >= MAX_NR_CPUS) {
+				pr_err("Requested CPU %d too large. "
+				       "Consider raising MAX_NR_CPUS\n", cpu);
+				ret = -1;
+				goto out_delete;
+			}
+
+			set_bit(cpu, cpu_bitmap);
+		}
+	}
+
 	ret = perf_session__process_events(session, &event_ops);
 	if (ret)
 		goto out_delete;
@@ -252,6 +296,7 @@ static const struct option options[] = {
 		    "print matching source lines (may be slow)"),
 	OPT_BOOLEAN('P', "full-paths", &full_paths,
 		    "Don't shorten the displayed pathnames"),
+	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 	OPT_END()
 };
 
Index: linux-2.6-tip/tools/perf/builtin-script.c
===================================================================
--- linux-2.6-tip.orig/tools/perf/builtin-script.c	2011-06-30 11:35:08.478417356 +1000
+++ linux-2.6-tip/tools/perf/builtin-script.c	2011-06-30 12:56:44.185081104 +1000
@@ -13,6 +13,8 @@
 #include "util/util.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include <linux/bitmap.h>
+#include "util/cpumap.h"
 
 static char const		*script_name;
 static char const		*generate_script_lang;
@@ -21,6 +23,8 @@ static u64			last_timestamp;
 static u64			nr_unordered;
 extern const struct option	record_options[];
 static bool			no_callchain;
+static const char		*cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 
 enum perf_output_field {
 	PERF_OUTPUT_COMM            = 1U << 0,
@@ -453,6 +457,10 @@ static int process_sample_event(union pe
 		last_timestamp = sample->time;
 		return 0;
 	}
+
+	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+		return 0;
+
 	scripting_ops->process_event(event, sample, evsel, session, thread);
 
 	session->hists.stats.total_period += sample->period;
@@ -1075,6 +1083,7 @@ static const struct option options[] = {
 	OPT_CALLBACK('f', "fields", NULL, "str",
 		     "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
 		     parse_output_fields),
+	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 
 	OPT_END()
 };
@@ -1255,6 +1264,38 @@ int cmd_script(int argc, const char **ar
 	if (session == NULL)
 		return -ENOMEM;
 
+	if (cpu_list) {
+		struct cpu_map *map;
+
+		for (i = 0; i < PERF_TYPE_MAX; ++i) {
+			struct perf_evsel *evsel;
+
+			evsel = perf_session__find_first_evtype(session, i);
+			if (!evsel)
+				continue;
+
+			if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
+				pr_err("File does not contain CPU events. "
+				       "Remove -c option to proceed.\n");
+				return -1;
+			}
+		}
+
+		map = cpu_map__new(cpu_list);
+
+		for (i = 0; i < map->nr; i++) {
+			int cpu = map->map[i];
+
+			if (cpu >= MAX_NR_CPUS) {
+				pr_err("Requested CPU %d too large. "
+				       "Consider raising MAX_NR_CPUS\n", cpu);
+				return -1;
+			}
+
+			set_bit(cpu, cpu_bitmap);
+		}
+	}
+
 	if (!no_callchain)
 		symbol_conf.use_callchain = true;
 	else
Index: linux-2.6-tip/tools/perf/Documentation/perf-annotate.txt
===================================================================
--- linux-2.6-tip.orig/tools/perf/Documentation/perf-annotate.txt	2011-06-30 11:35:17.768583314 +1000
+++ linux-2.6-tip/tools/perf/Documentation/perf-annotate.txt	2011-06-30 11:35:19.618616362 +1000
@@ -66,6 +66,12 @@ OPTIONS
 	used. This interfaces starts by centering on the line with more
 	samples, TAB/UNTAB cycles through the lines with more samples.
 
+-c::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+	be provided as a comma-separated list with no space: 0,1. Ranges of
+	CPUs are specified with -: 0-2. Default is to report samples on all
+	CPUs.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1]
Index: linux-2.6-tip/tools/perf/Documentation/perf-report.txt
===================================================================
--- linux-2.6-tip.orig/tools/perf/Documentation/perf-report.txt	2011-06-30 11:35:17.768583314 +1000
+++ linux-2.6-tip/tools/perf/Documentation/perf-report.txt	2011-06-30 11:35:19.618616362 +1000
@@ -119,6 +119,12 @@ OPTIONS
 --symfs=<directory>::
         Look for files with symbols relative to this directory.
 
+-c::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+	be provided as a comma-separated list with no space: 0,1. Ranges of
+	CPUs are specified with -: 0-2. Default is to report samples on all
+	CPUs.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1]
Index: linux-2.6-tip/tools/perf/Documentation/perf-script.txt
===================================================================
--- linux-2.6-tip.orig/tools/perf/Documentation/perf-script.txt	2011-06-30 11:35:17.768583314 +1000
+++ linux-2.6-tip/tools/perf/Documentation/perf-script.txt	2011-06-30 11:35:19.618616362 +1000
@@ -182,6 +182,12 @@ OPTIONS
 --hide-call-graph::
         When printing symbols do not display call chain.
 
+-c::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+	be provided as a comma-separated list with no space: 0,1. Ranges of
+	CPUs are specified with -: 0-2. Default is to report samples on all
+	CPUs.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] perf report/annotate: Add option to specify a CPU range
  2011-06-30  3:16   ` Anton Blanchard
@ 2011-06-30  3:56     ` David Ahern
  2011-07-01 11:16       ` Ingo Molnar
  0 siblings, 1 reply; 9+ messages in thread
From: David Ahern @ 2011-06-30  3:56 UTC (permalink / raw)
  To: Anton Blanchard
  Cc: Peter Zijlstra, Paul Mackerras, Ingo Molnar,
	Arnaldo Carvalho de Melo, linux-kernel

On 06/29/2011 09:16 PM, Anton Blanchard wrote:
> 
> Add an option to perf report/annotate/script to specify which CPUs
> to operate on. This enables us to take a single system wide profile
> and analyse each CPU (or group of CPUs) in isolation.
> 
> This was useful when profiling a multiprocess workload where the
> bottleneck was on one CPU but this was hidden in the overall profile.
> Per process and per thread breakdowns didn't help because multiple
> processes were running on each CPU and no single process consumed
> an entire CPU.
> 
> The patch converts the list of CPUs returned by cpu_map__new into a
> bitmap for fast lookup. I wanted to use -C to be consistent with perf
> top/record/stat, but unfortunately perf report already uses -C <comms>.
> 
> Signed-off-by: Anton Blanchard <anton@samba.org>
> ---
> 
> v2: Incorporate suggestions from David Ahern:
> 	- Added -c to perf script
> 	- Check that SAMPLE_CPU is set when -c is used
> 	- Update documentation
> 
> Index: linux-2.6-tip/tools/perf/builtin-report.c
> ===================================================================
> --- linux-2.6-tip.orig/tools/perf/builtin-report.c	2011-06-30 11:35:08.488417534 +1000
> +++ linux-2.6-tip/tools/perf/builtin-report.c	2011-06-30 12:56:28.894807631 +1000
> @@ -33,6 +33,9 @@
>  #include "util/sort.h"
>  #include "util/hist.h"
>  
> +#include <linux/bitmap.h>
> +#include "util/cpumap.h"
> +
>  static char		const *input_name = "perf.data";
>  
>  static bool		force, use_tui, use_stdio;
> @@ -48,6 +51,9 @@ static const char	*pretty_printing_style
>  static char		callchain_default_opt[] = "fractal,0.5";
>  static symbol_filter_t	annotate_init;
>  
> +static const char	*cpu_list;
> +static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
> +
>  static int perf_session__add_hist_entry(struct perf_session *session,
>  					struct addr_location *al,
>  					struct perf_sample *sample,
> @@ -116,6 +122,9 @@ static int process_sample_event(union pe
>  	if (al.filtered || (hide_unresolved && al.sym == NULL))
>  		return 0;
>  
> +	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
> +		return 0;
> +
>  	if (al.map != NULL)
>  		al.map->dso->hit = 1;
>  
> @@ -262,6 +271,41 @@ static int __cmd_report(void)
>  	if (session == NULL)
>  		return -ENOMEM;
>  
> +	if (cpu_list) {
> +		int i;
> +		struct cpu_map *map;
> +
> +		for (i = 0; i < PERF_TYPE_MAX; ++i) {
> +			struct perf_evsel *evsel;
> +
> +			evsel = perf_session__find_first_evtype(session, i);
> +			if (!evsel)
> +				continue;
> +
> +			if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
> +				pr_err("File does not contain CPU events. "
> +				       "Remove -c option to proceed.\n");
> +				ret = -1;
> +				goto out_delete;
> +			}
> +		}
> +
> +		map = cpu_map__new(cpu_list);
> +
> +		for (i = 0; i < map->nr; i++) {
> +			int cpu = map->map[i];
> +
> +			if (cpu >= MAX_NR_CPUS) {
> +				pr_err("Requested CPU %d too large. "
> +				       "Consider raising MAX_NR_CPUS\n", cpu);
> +				ret = -1;
> +				goto out_delete;
> +			}
> +
> +			set_bit(cpu, cpu_bitmap);
> +		}
> +	}
> +

It would be better to make this a function that all 3 commands reference
-- something like perf_session__cpu_bitmap(session, cpu_list,
cpu_bitmap) in util/session.c

David

>  	if (show_threads)
>  		perf_read_values_init(&show_threads_values);
>  
> @@ -455,6 +499,7 @@ static const struct option options[] = {
>  		    "Only display entries resolved to a symbol"),
>  	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
>  		    "Look for files with symbols relative to this directory"),
> +	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
>  	OPT_END()
>  };
>  
> Index: linux-2.6-tip/tools/perf/builtin-annotate.c
> ===================================================================
> --- linux-2.6-tip.orig/tools/perf/builtin-annotate.c	2011-06-30 11:35:08.468417177 +1000
> +++ linux-2.6-tip/tools/perf/builtin-annotate.c	2011-06-30 12:56:35.514926037 +1000
> @@ -28,6 +28,9 @@
>  #include "util/hist.h"
>  #include "util/session.h"
>  
> +#include <linux/bitmap.h>
> +#include "util/cpumap.h"
> +
>  static char		const *input_name = "perf.data";
>  
>  static bool		force, use_tui, use_stdio;
> @@ -38,6 +41,9 @@ static bool		print_line;
>  
>  static const char *sym_hist_filter;
>  
> +static const char	*cpu_list;
> +static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
> +
>  static int perf_evlist__add_sample(struct perf_evlist *evlist,
>  				   struct perf_sample *sample,
>  				   struct perf_evsel *evsel,
> @@ -90,6 +96,9 @@ static int process_sample_event(union pe
>  		return -1;
>  	}
>  
> +	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
> +		return 0;
> +
>  	if (!al.filtered &&
>  	    perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
>  		pr_warning("problem incrementing symbol count, "
> @@ -177,6 +186,41 @@ static int __cmd_annotate(void)
>  	if (session == NULL)
>  		return -ENOMEM;
>  
> +	if (cpu_list) {
> +		int i;
> +		struct cpu_map *map;
> +
> +		for (i = 0; i < PERF_TYPE_MAX; ++i) {
> +			struct perf_evsel *evsel;
> +
> +			evsel = perf_session__find_first_evtype(session, i);
> +			if (!evsel)
> +				continue;
> +
> +			if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
> +				pr_err("File does not contain CPU events. "
> +				       "Remove -c option to proceed.\n");
> +				ret = -1;
> +				goto out_delete;
> +			}
> +		}
> +
> +		map = cpu_map__new(cpu_list);
> +
> +		for (i = 0; i < map->nr; i++) {
> +			int cpu = map->map[i];
> +
> +			if (cpu >= MAX_NR_CPUS) {
> +				pr_err("Requested CPU %d too large. "
> +				       "Consider raising MAX_NR_CPUS\n", cpu);
> +				ret = -1;
> +				goto out_delete;
> +			}
> +
> +			set_bit(cpu, cpu_bitmap);
> +		}
> +	}
> +
>  	ret = perf_session__process_events(session, &event_ops);
>  	if (ret)
>  		goto out_delete;
> @@ -252,6 +296,7 @@ static const struct option options[] = {
>  		    "print matching source lines (may be slow)"),
>  	OPT_BOOLEAN('P', "full-paths", &full_paths,
>  		    "Don't shorten the displayed pathnames"),
> +	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
>  	OPT_END()
>  };
>  
> Index: linux-2.6-tip/tools/perf/builtin-script.c
> ===================================================================
> --- linux-2.6-tip.orig/tools/perf/builtin-script.c	2011-06-30 11:35:08.478417356 +1000
> +++ linux-2.6-tip/tools/perf/builtin-script.c	2011-06-30 12:56:44.185081104 +1000
> @@ -13,6 +13,8 @@
>  #include "util/util.h"
>  #include "util/evlist.h"
>  #include "util/evsel.h"
> +#include <linux/bitmap.h>
> +#include "util/cpumap.h"
>  
>  static char const		*script_name;
>  static char const		*generate_script_lang;
> @@ -21,6 +23,8 @@ static u64			last_timestamp;
>  static u64			nr_unordered;
>  extern const struct option	record_options[];
>  static bool			no_callchain;
> +static const char		*cpu_list;
> +static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
>  
>  enum perf_output_field {
>  	PERF_OUTPUT_COMM            = 1U << 0,
> @@ -453,6 +457,10 @@ static int process_sample_event(union pe
>  		last_timestamp = sample->time;
>  		return 0;
>  	}
> +
> +	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
> +		return 0;
> +
>  	scripting_ops->process_event(event, sample, evsel, session, thread);
>  
>  	session->hists.stats.total_period += sample->period;
> @@ -1075,6 +1083,7 @@ static const struct option options[] = {
>  	OPT_CALLBACK('f', "fields", NULL, "str",
>  		     "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
>  		     parse_output_fields),
> +	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
>  
>  	OPT_END()
>  };
> @@ -1255,6 +1264,38 @@ int cmd_script(int argc, const char **ar
>  	if (session == NULL)
>  		return -ENOMEM;
>  
> +	if (cpu_list) {
> +		struct cpu_map *map;
> +
> +		for (i = 0; i < PERF_TYPE_MAX; ++i) {
> +			struct perf_evsel *evsel;
> +
> +			evsel = perf_session__find_first_evtype(session, i);
> +			if (!evsel)
> +				continue;
> +
> +			if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
> +				pr_err("File does not contain CPU events. "
> +				       "Remove -c option to proceed.\n");
> +				return -1;
> +			}
> +		}
> +
> +		map = cpu_map__new(cpu_list);
> +
> +		for (i = 0; i < map->nr; i++) {
> +			int cpu = map->map[i];
> +
> +			if (cpu >= MAX_NR_CPUS) {
> +				pr_err("Requested CPU %d too large. "
> +				       "Consider raising MAX_NR_CPUS\n", cpu);
> +				return -1;
> +			}
> +
> +			set_bit(cpu, cpu_bitmap);
> +		}
> +	}
> +
>  	if (!no_callchain)
>  		symbol_conf.use_callchain = true;
>  	else
> Index: linux-2.6-tip/tools/perf/Documentation/perf-annotate.txt
> ===================================================================
> --- linux-2.6-tip.orig/tools/perf/Documentation/perf-annotate.txt	2011-06-30 11:35:17.768583314 +1000
> +++ linux-2.6-tip/tools/perf/Documentation/perf-annotate.txt	2011-06-30 11:35:19.618616362 +1000
> @@ -66,6 +66,12 @@ OPTIONS
>  	used. This interfaces starts by centering on the line with more
>  	samples, TAB/UNTAB cycles through the lines with more samples.
>  
> +-c::
> +--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
> +	be provided as a comma-separated list with no space: 0,1. Ranges of
> +	CPUs are specified with -: 0-2. Default is to report samples on all
> +	CPUs.
> +
>  SEE ALSO
>  --------
>  linkperf:perf-record[1], linkperf:perf-report[1]
> Index: linux-2.6-tip/tools/perf/Documentation/perf-report.txt
> ===================================================================
> --- linux-2.6-tip.orig/tools/perf/Documentation/perf-report.txt	2011-06-30 11:35:17.768583314 +1000
> +++ linux-2.6-tip/tools/perf/Documentation/perf-report.txt	2011-06-30 11:35:19.618616362 +1000
> @@ -119,6 +119,12 @@ OPTIONS
>  --symfs=<directory>::
>          Look for files with symbols relative to this directory.
>  
> +-c::
> +--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
> +	be provided as a comma-separated list with no space: 0,1. Ranges of
> +	CPUs are specified with -: 0-2. Default is to report samples on all
> +	CPUs.
> +
>  SEE ALSO
>  --------
>  linkperf:perf-stat[1]
> Index: linux-2.6-tip/tools/perf/Documentation/perf-script.txt
> ===================================================================
> --- linux-2.6-tip.orig/tools/perf/Documentation/perf-script.txt	2011-06-30 11:35:17.768583314 +1000
> +++ linux-2.6-tip/tools/perf/Documentation/perf-script.txt	2011-06-30 11:35:19.618616362 +1000
> @@ -182,6 +182,12 @@ OPTIONS
>  --hide-call-graph::
>          When printing symbols do not display call chain.
>  
> +-c::
> +--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
> +	be provided as a comma-separated list with no space: 0,1. Ranges of
> +	CPUs are specified with -: 0-2. Default is to report samples on all
> +	CPUs.
> +
>  SEE ALSO
>  --------
>  linkperf:perf-record[1], linkperf:perf-script-perl[1],
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] perf report/annotate: Add option to specify a CPU range
  2011-06-30  3:56     ` David Ahern
@ 2011-07-01 11:16       ` Ingo Molnar
  2011-07-04 11:51         ` Anton Blanchard
  2011-07-04 11:57         ` [PATCH] perf report/annotate/script: " Anton Blanchard
  0 siblings, 2 replies; 9+ messages in thread
From: Ingo Molnar @ 2011-07-01 11:16 UTC (permalink / raw)
  To: David Ahern
  Cc: Anton Blanchard, Peter Zijlstra, Paul Mackerras,
	Arnaldo Carvalho de Melo, linux-kernel


* David Ahern <dsahern@gmail.com> wrote:

> > @@ -262,6 +271,41 @@ static int __cmd_report(void)
> >  	if (session == NULL)
> >  		return -ENOMEM;
> >  
> > +	if (cpu_list) {
> > +		int i;
> > +		struct cpu_map *map;
> > +
> > +		for (i = 0; i < PERF_TYPE_MAX; ++i) {
> > +			struct perf_evsel *evsel;
> > +
> > +			evsel = perf_session__find_first_evtype(session, i);
> > +			if (!evsel)
> > +				continue;
> > +
> > +			if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
> > +				pr_err("File does not contain CPU events. "
> > +				       "Remove -c option to proceed.\n");
> > +				ret = -1;
> > +				goto out_delete;
> > +			}
> > +		}
> > +
> > +		map = cpu_map__new(cpu_list);
> > +
> > +		for (i = 0; i < map->nr; i++) {
> > +			int cpu = map->map[i];
> > +
> > +			if (cpu >= MAX_NR_CPUS) {
> > +				pr_err("Requested CPU %d too large. "
> > +				       "Consider raising MAX_NR_CPUS\n", cpu);
> > +				ret = -1;
> > +				goto out_delete;
> > +			}
> > +
> > +			set_bit(cpu, cpu_bitmap);
> > +		}
> > +	}
> > +
> 
> It would be better to make this a function that all 3 commands 
> reference -- something like perf_session__cpu_bitmap(session, 
> cpu_list, cpu_bitmap) in util/session.c

Agreed. I can see how it ended up looking like this (fixing only perf 
report, then adding it to top, then to script), but at this stage it 
really calls for one helper that all three commands can utilize.

Very nice enhancement otherwise - might i suggest a 'perf top' hotkey 
as well to limit the output to certain CPUs only? :-)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] perf report/annotate: Add option to specify a CPU range
  2011-07-01 11:16       ` Ingo Molnar
@ 2011-07-04 11:51         ` Anton Blanchard
  2011-07-04 11:57         ` [PATCH] perf report/annotate/script: " Anton Blanchard
  1 sibling, 0 replies; 9+ messages in thread
From: Anton Blanchard @ 2011-07-04 11:51 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: David Ahern, Peter Zijlstra, Paul Mackerras,
	Arnaldo Carvalho de Melo, linux-kernel


Hi,

Ingo Molnar <mingo@elte.hu> wrote:

> * David Ahern <dsahern@gmail.com> wrote:
> > It would be better to make this a function that all 3 commands 
> > reference -- something like perf_session__cpu_bitmap(session, 
> > cpu_list, cpu_bitmap) in util/session.c
> 
> Agreed. I can see how it ended up looking like this (fixing only perf 
> report, then adding it to top, then to script), but at this stage it 
> really calls for one helper that all three commands can utilize.

Yeah, it was dying to be consolidated. New version on the way.

> Very nice enhancement otherwise - might i suggest a 'perf top' hotkey 
> as well to limit the output to certain CPUs only? :-)

Good idea :) I'll have a go at it.

Anton

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] perf report/annotate/script: Add option to specify a CPU range
  2011-07-01 11:16       ` Ingo Molnar
  2011-07-04 11:51         ` Anton Blanchard
@ 2011-07-04 11:57         ` Anton Blanchard
  2011-07-05 12:56           ` [tip:perf/core] " tip-bot for Anton Blanchard
  1 sibling, 1 reply; 9+ messages in thread
From: Anton Blanchard @ 2011-07-04 11:57 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: David Ahern, Peter Zijlstra, Paul Mackerras,
	Arnaldo Carvalho de Melo, linux-kernel


Add an option to perf report/annotate/script to specify which CPUs
to operate on. This enables us to take a single system wide profile
and analyse each CPU (or group of CPUs) in isolation.

This was useful when profiling a multiprocess workload where the
bottleneck was on one CPU but this was hidden in the overall profile.
Per process and per thread breakdowns didn't help because multiple
processes were running on each CPU and no single process consumed
an entire CPU.

The patch converts the list of CPUs returned by cpu_map__new into a
bitmap for fast lookup. I wanted to use -C to be consistent with perf
top/record/stat, but unfortunately perf report already uses -C <comms>.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

v2: Incorporate suggestions from David Ahern:
	- Added -c to perf script
	- Check that SAMPLE_CPU is set when -c is used
	- Update documentation

v3: Create perf_session__cpu_bitmap

Index: linux-2.6-tip/tools/perf/builtin-report.c
===================================================================
--- linux-2.6-tip.orig/tools/perf/builtin-report.c	2011-07-04 21:18:57.309390814 +1000
+++ linux-2.6-tip/tools/perf/builtin-report.c	2011-07-04 21:30:25.341547527 +1000
@@ -33,6 +33,8 @@
 #include "util/sort.h"
 #include "util/hist.h"
 
+#include <linux/bitmap.h>
+
 static char		const *input_name = "perf.data";
 
 static bool		force, use_tui, use_stdio;
@@ -49,6 +51,9 @@ static char		callchain_default_opt[] = "
 static bool		inverted_callchain;
 static symbol_filter_t	annotate_init;
 
+static const char	*cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+
 static int perf_session__add_hist_entry(struct perf_session *session,
 					struct addr_location *al,
 					struct perf_sample *sample,
@@ -117,6 +122,9 @@ static int process_sample_event(union pe
 	if (al.filtered || (hide_unresolved && al.sym == NULL))
 		return 0;
 
+	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+		return 0;
+
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
 
@@ -263,6 +271,12 @@ static int __cmd_report(void)
 	if (session == NULL)
 		return -ENOMEM;
 
+	if (cpu_list) {
+		ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
+		if (ret)
+			goto out_delete;
+	}
+
 	if (show_threads)
 		perf_read_values_init(&show_threads_values);
 
@@ -473,6 +487,7 @@ static const struct option options[] = {
 		    "Only display entries resolved to a symbol"),
 	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
 		    "Look for files with symbols relative to this directory"),
+	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 	OPT_END()
 };
 
Index: linux-2.6-tip/tools/perf/builtin-annotate.c
===================================================================
--- linux-2.6-tip.orig/tools/perf/builtin-annotate.c	2011-07-04 21:08:03.877843210 +1000
+++ linux-2.6-tip/tools/perf/builtin-annotate.c	2011-07-04 21:30:07.151226616 +1000
@@ -28,6 +28,8 @@
 #include "util/hist.h"
 #include "util/session.h"
 
+#include <linux/bitmap.h>
+
 static char		const *input_name = "perf.data";
 
 static bool		force, use_tui, use_stdio;
@@ -38,6 +40,9 @@ static bool		print_line;
 
 static const char *sym_hist_filter;
 
+static const char	*cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+
 static int perf_evlist__add_sample(struct perf_evlist *evlist,
 				   struct perf_sample *sample,
 				   struct perf_evsel *evsel,
@@ -90,6 +95,9 @@ static int process_sample_event(union pe
 		return -1;
 	}
 
+	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+		return 0;
+
 	if (!al.filtered &&
 	    perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
 		pr_warning("problem incrementing symbol count, "
@@ -177,6 +185,12 @@ static int __cmd_annotate(void)
 	if (session == NULL)
 		return -ENOMEM;
 
+	if (cpu_list) {
+		ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
+		if (ret)
+			goto out_delete;
+	}
+
 	ret = perf_session__process_events(session, &event_ops);
 	if (ret)
 		goto out_delete;
@@ -252,6 +266,7 @@ static const struct option options[] = {
 		    "print matching source lines (may be slow)"),
 	OPT_BOOLEAN('P', "full-paths", &full_paths,
 		    "Don't shorten the displayed pathnames"),
+	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 	OPT_END()
 };
 
Index: linux-2.6-tip/tools/perf/builtin-script.c
===================================================================
--- linux-2.6-tip.orig/tools/perf/builtin-script.c	2011-07-04 21:08:03.887843387 +1000
+++ linux-2.6-tip/tools/perf/builtin-script.c	2011-07-04 21:31:11.822367589 +1000
@@ -13,6 +13,7 @@
 #include "util/util.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include <linux/bitmap.h>
 
 static char const		*script_name;
 static char const		*generate_script_lang;
@@ -21,6 +22,8 @@ static u64			last_timestamp;
 static u64			nr_unordered;
 extern const struct option	record_options[];
 static bool			no_callchain;
+static const char		*cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 
 enum perf_output_field {
 	PERF_OUTPUT_COMM            = 1U << 0,
@@ -453,6 +456,10 @@ static int process_sample_event(union pe
 		last_timestamp = sample->time;
 		return 0;
 	}
+
+	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+		return 0;
+
 	scripting_ops->process_event(event, sample, evsel, session, thread);
 
 	session->hists.stats.total_period += sample->period;
@@ -1075,6 +1082,7 @@ static const struct option options[] = {
 	OPT_CALLBACK('f', "fields", NULL, "str",
 		     "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
 		     parse_output_fields),
+	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 
 	OPT_END()
 };
@@ -1255,6 +1263,11 @@ int cmd_script(int argc, const char **ar
 	if (session == NULL)
 		return -ENOMEM;
 
+	if (cpu_list) {
+		if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap))
+			return -1;
+	}
+
 	if (!no_callchain)
 		symbol_conf.use_callchain = true;
 	else
Index: linux-2.6-tip/tools/perf/Documentation/perf-annotate.txt
===================================================================
--- linux-2.6-tip.orig/tools/perf/Documentation/perf-annotate.txt	2011-07-04 21:08:03.927844094 +1000
+++ linux-2.6-tip/tools/perf/Documentation/perf-annotate.txt	2011-07-04 21:20:14.160749659 +1000
@@ -66,6 +66,12 @@ OPTIONS
 	used. This interfaces starts by centering on the line with more
 	samples, TAB/UNTAB cycles through the lines with more samples.
 
+-c::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+	be provided as a comma-separated list with no space: 0,1. Ranges of
+	CPUs are specified with -: 0-2. Default is to report samples on all
+	CPUs.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1]
Index: linux-2.6-tip/tools/perf/Documentation/perf-report.txt
===================================================================
--- linux-2.6-tip.orig/tools/perf/Documentation/perf-report.txt	2011-07-04 21:18:57.309390814 +1000
+++ linux-2.6-tip/tools/perf/Documentation/perf-report.txt	2011-07-04 21:20:14.160749659 +1000
@@ -128,6 +128,12 @@ OPTIONS
 --symfs=<directory>::
         Look for files with symbols relative to this directory.
 
+-c::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+	be provided as a comma-separated list with no space: 0,1. Ranges of
+	CPUs are specified with -: 0-2. Default is to report samples on all
+	CPUs.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1]
Index: linux-2.6-tip/tools/perf/Documentation/perf-script.txt
===================================================================
--- linux-2.6-tip.orig/tools/perf/Documentation/perf-script.txt	2011-07-04 21:08:03.907843741 +1000
+++ linux-2.6-tip/tools/perf/Documentation/perf-script.txt	2011-07-04 21:20:14.160749659 +1000
@@ -182,6 +182,12 @@ OPTIONS
 --hide-call-graph::
         When printing symbols do not display call chain.
 
+-c::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+	be provided as a comma-separated list with no space: 0,1. Ranges of
+	CPUs are specified with -: 0-2. Default is to report samples on all
+	CPUs.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
Index: linux-2.6-tip/tools/perf/util/session.c
===================================================================
--- linux-2.6-tip.orig/tools/perf/util/session.c	2011-07-04 21:21:01.901593861 +1000
+++ linux-2.6-tip/tools/perf/util/session.c	2011-07-04 21:32:09.443384298 +1000
@@ -12,6 +12,7 @@
 #include "session.h"
 #include "sort.h"
 #include "util.h"
+#include "cpumap.h"
 
 static int perf_session__open(struct perf_session *self, bool force)
 {
@@ -1282,3 +1283,40 @@ void perf_session__print_ip(union perf_e
 		}
 	}
 }
+
+int perf_session__cpu_bitmap(struct perf_session *session,
+			     const char *cpu_list, unsigned long *cpu_bitmap)
+{
+	int i;
+	struct cpu_map *map;
+
+	for (i = 0; i < PERF_TYPE_MAX; ++i) {
+		struct perf_evsel *evsel;
+
+		evsel = perf_session__find_first_evtype(session, i);
+		if (!evsel)
+			continue;
+
+		if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
+			pr_err("File does not contain CPU events. "
+			       "Remove -c option to proceed.\n");
+			return -1;
+		}
+	}
+
+	map = cpu_map__new(cpu_list);
+
+	for (i = 0; i < map->nr; i++) {
+		int cpu = map->map[i];
+
+		if (cpu >= MAX_NR_CPUS) {
+			pr_err("Requested CPU %d too large. "
+			       "Consider raising MAX_NR_CPUS\n", cpu);
+			return -1;
+		}
+
+		set_bit(cpu, cpu_bitmap);
+	}
+
+	return 0;
+}
Index: linux-2.6-tip/tools/perf/util/session.h
===================================================================
--- linux-2.6-tip.orig/tools/perf/util/session.h	2011-07-04 21:21:01.921594213 +1000
+++ linux-2.6-tip/tools/perf/util/session.h	2011-07-04 21:32:21.253592699 +1000
@@ -172,4 +172,7 @@ void perf_session__print_ip(union perf_e
 				 struct perf_session *session,
 				 int print_sym, int print_dso);
 
+int perf_session__cpu_bitmap(struct perf_session *session,
+			     const char *cpu_list, unsigned long *cpu_bitmap);
+
 #endif /* __PERF_SESSION_H */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tip:perf/core] perf report/annotate/script: Add option to specify a CPU range
  2011-07-04 11:57         ` [PATCH] perf report/annotate/script: " Anton Blanchard
@ 2011-07-05 12:56           ` tip-bot for Anton Blanchard
  0 siblings, 0 replies; 9+ messages in thread
From: tip-bot for Anton Blanchard @ 2011-07-05 12:56 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, paulus, anton, hpa, mingo, a.p.zijlstra, acme,
	dsahern, tglx, mingo

Commit-ID:  5d67be97f8903d05ce53597fb5f3bc25a45e8026
Gitweb:     http://git.kernel.org/tip/5d67be97f8903d05ce53597fb5f3bc25a45e8026
Author:     Anton Blanchard <anton@samba.org>
AuthorDate: Mon, 4 Jul 2011 21:57:50 +1000
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Tue, 5 Jul 2011 10:44:44 +0200

perf report/annotate/script: Add option to specify a CPU range

Add an option to perf report/annotate/script to specify which
CPUs to operate on. This enables us to take a single system wide
profile and analyse each CPU (or group of CPUs) in isolation.

This was useful when profiling a multiprocess workload where the
bottleneck was on one CPU but this was hidden in the overall
profile. Per process and per thread breakdowns didn't help
because multiple processes were running on each CPU and no
single process consumed an entire CPU.

The patch converts the list of CPUs returned by cpu_map__new
into a bitmap for fast lookup. I wanted to use -C to be
consistent with perf top/record/stat, but unfortunately perf
report already uses -C <comms>.

 v2: Incorporate suggestions from David Ahern:
	- Added -c to perf script
	- Check that SAMPLE_CPU is set when -c is used
	- Update documentation

 v3: Create perf_session__cpu_bitmap()

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Link: http://lkml.kernel.org/r/20110704215750.11647eb9@kryten
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Documentation/perf-annotate.txt |    6 ++++
 tools/perf/Documentation/perf-report.txt   |    6 ++++
 tools/perf/Documentation/perf-script.txt   |    6 ++++
 tools/perf/builtin-annotate.c              |   15 +++++++++++
 tools/perf/builtin-report.c                |   15 +++++++++++
 tools/perf/builtin-script.c                |   13 +++++++++
 tools/perf/util/session.c                  |   38 ++++++++++++++++++++++++++++
 tools/perf/util/session.h                  |    3 ++
 8 files changed, 102 insertions(+), 0 deletions(-)

diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 6f5a498..85c5f02 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -66,6 +66,12 @@ OPTIONS
 	used. This interfaces starts by centering on the line with more
 	samples, TAB/UNTAB cycles through the lines with more samples.
 
+-c::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+	be provided as a comma-separated list with no space: 0,1. Ranges of
+	CPUs are specified with -: 0-2. Default is to report samples on all
+	CPUs.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index cfa8e51..04253c0 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -128,6 +128,12 @@ OPTIONS
 --symfs=<directory>::
         Look for files with symbols relative to this directory.
 
+-c::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+	be provided as a comma-separated list with no space: 0,1. Ranges of
+	CPUs are specified with -: 0-2. Default is to report samples on all
+	CPUs.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index c6068cb..db01786 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -182,6 +182,12 @@ OPTIONS
 --hide-call-graph::
         When printing symbols do not display call chain.
 
+-c::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+	be provided as a comma-separated list with no space: 0,1. Ranges of
+	CPUs are specified with -: 0-2. Default is to report samples on all
+	CPUs.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 7b139e1..555aefd 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -28,6 +28,8 @@
 #include "util/hist.h"
 #include "util/session.h"
 
+#include <linux/bitmap.h>
+
 static char		const *input_name = "perf.data";
 
 static bool		force, use_tui, use_stdio;
@@ -38,6 +40,9 @@ static bool		print_line;
 
 static const char *sym_hist_filter;
 
+static const char	*cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+
 static int perf_evlist__add_sample(struct perf_evlist *evlist,
 				   struct perf_sample *sample,
 				   struct perf_evsel *evsel,
@@ -90,6 +95,9 @@ static int process_sample_event(union perf_event *event,
 		return -1;
 	}
 
+	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+		return 0;
+
 	if (!al.filtered &&
 	    perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
 		pr_warning("problem incrementing symbol count, "
@@ -177,6 +185,12 @@ static int __cmd_annotate(void)
 	if (session == NULL)
 		return -ENOMEM;
 
+	if (cpu_list) {
+		ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
+		if (ret)
+			goto out_delete;
+	}
+
 	ret = perf_session__process_events(session, &event_ops);
 	if (ret)
 		goto out_delete;
@@ -252,6 +266,7 @@ static const struct option options[] = {
 		    "print matching source lines (may be slow)"),
 	OPT_BOOLEAN('P', "full-paths", &full_paths,
 		    "Don't shorten the displayed pathnames"),
+	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 	OPT_END()
 };
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 5d43d01..f854efd 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -33,6 +33,8 @@
 #include "util/sort.h"
 #include "util/hist.h"
 
+#include <linux/bitmap.h>
+
 static char		const *input_name = "perf.data";
 
 static bool		force, use_tui, use_stdio;
@@ -49,6 +51,9 @@ static char		callchain_default_opt[] = "fractal,0.5,callee";
 static bool		inverted_callchain;
 static symbol_filter_t	annotate_init;
 
+static const char	*cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+
 static int perf_session__add_hist_entry(struct perf_session *session,
 					struct addr_location *al,
 					struct perf_sample *sample,
@@ -117,6 +122,9 @@ static int process_sample_event(union perf_event *event,
 	if (al.filtered || (hide_unresolved && al.sym == NULL))
 		return 0;
 
+	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+		return 0;
+
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
 
@@ -263,6 +271,12 @@ static int __cmd_report(void)
 	if (session == NULL)
 		return -ENOMEM;
 
+	if (cpu_list) {
+		ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
+		if (ret)
+			goto out_delete;
+	}
+
 	if (show_threads)
 		perf_read_values_init(&show_threads_values);
 
@@ -473,6 +487,7 @@ static const struct option options[] = {
 		    "Only display entries resolved to a symbol"),
 	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
 		    "Look for files with symbols relative to this directory"),
+	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 	OPT_END()
 };
 
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3056b45..09024ec 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -13,6 +13,7 @@
 #include "util/util.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include <linux/bitmap.h>
 
 static char const		*script_name;
 static char const		*generate_script_lang;
@@ -21,6 +22,8 @@ static u64			last_timestamp;
 static u64			nr_unordered;
 extern const struct option	record_options[];
 static bool			no_callchain;
+static const char		*cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 
 enum perf_output_field {
 	PERF_OUTPUT_COMM            = 1U << 0,
@@ -453,6 +456,10 @@ static int process_sample_event(union perf_event *event,
 		last_timestamp = sample->time;
 		return 0;
 	}
+
+	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+		return 0;
+
 	scripting_ops->process_event(event, sample, evsel, session, thread);
 
 	session->hists.stats.total_period += sample->period;
@@ -1075,6 +1082,7 @@ static const struct option options[] = {
 	OPT_CALLBACK('f', "fields", NULL, "str",
 		     "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
 		     parse_output_fields),
+	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 
 	OPT_END()
 };
@@ -1255,6 +1263,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
 	if (session == NULL)
 		return -ENOMEM;
 
+	if (cpu_list) {
+		if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap))
+			return -1;
+	}
+
 	if (!no_callchain)
 		symbol_conf.use_callchain = true;
 	else
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 558bcf9..080e533 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -12,6 +12,7 @@
 #include "session.h"
 #include "sort.h"
 #include "util.h"
+#include "cpumap.h"
 
 static int perf_session__open(struct perf_session *self, bool force)
 {
@@ -1282,3 +1283,40 @@ void perf_session__print_ip(union perf_event *event,
 		}
 	}
 }
+
+int perf_session__cpu_bitmap(struct perf_session *session,
+			     const char *cpu_list, unsigned long *cpu_bitmap)
+{
+	int i;
+	struct cpu_map *map;
+
+	for (i = 0; i < PERF_TYPE_MAX; ++i) {
+		struct perf_evsel *evsel;
+
+		evsel = perf_session__find_first_evtype(session, i);
+		if (!evsel)
+			continue;
+
+		if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
+			pr_err("File does not contain CPU events. "
+			       "Remove -c option to proceed.\n");
+			return -1;
+		}
+	}
+
+	map = cpu_map__new(cpu_list);
+
+	for (i = 0; i < map->nr; i++) {
+		int cpu = map->map[i];
+
+		if (cpu >= MAX_NR_CPUS) {
+			pr_err("Requested CPU %d too large. "
+			       "Consider raising MAX_NR_CPUS\n", cpu);
+			return -1;
+		}
+
+		set_bit(cpu, cpu_bitmap);
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index de4178d..5de754f 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -172,4 +172,7 @@ void perf_session__print_ip(union perf_event *event,
 				 struct perf_session *session,
 				 int print_sym, int print_dso);
 
+int perf_session__cpu_bitmap(struct perf_session *session,
+			     const char *cpu_list, unsigned long *cpu_bitmap);
+
 #endif /* __PERF_SESSION_H */

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2011-07-05 12:56 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-29  5:07 [PATCH] perf report/annotate: Add option to specify a CPU range Anton Blanchard
2011-06-29 18:07 ` David Ahern
2011-06-30  3:15   ` Anton Blanchard
2011-06-30  3:16   ` Anton Blanchard
2011-06-30  3:56     ` David Ahern
2011-07-01 11:16       ` Ingo Molnar
2011-07-04 11:51         ` Anton Blanchard
2011-07-04 11:57         ` [PATCH] perf report/annotate/script: " Anton Blanchard
2011-07-05 12:56           ` [tip:perf/core] " tip-bot for Anton Blanchard

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.