All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jiri Olsa <jolsa@kernel.org>
To: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: lkml <linux-kernel@vger.kernel.org>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Andi Kleen <ak@linux.intel.com>, David Ahern <dsahern@gmail.com>,
	Ingo Molnar <mingo@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Stephane Eranian <eranian@google.com>
Subject: [PATCH 27/27] perf stat: Introduce --per-thread option
Date: Tue, 23 Jun 2015 00:36:28 +0200	[thread overview]
Message-ID: <1435012588-9007-28-git-send-email-jolsa@kernel.org> (raw)
In-Reply-To: <1435012588-9007-1-git-send-email-jolsa@kernel.org>

Currently all the -p option PID arguments tasks values
get aggregated and printed as single values.

Adding --per-tasks option to print values per task.

  $ perf stat  -e cycles,instructions --per-thread -p 30190,30242
  ^C
   Performance counter stats for process id '30190,30242':

               cat-30190                     0      cycles
               yes-30242         3,842,525,421      cycles
               cat-30190                     0      instructions
               yes-30242        10,370,817,010      instructions

         1.143155657 seconds time elapsed

Also works under interval mode:

  $ perf stat  -e cycles,instructions --per-thread -p 30190,30242 -I 1000
  #           time             comm-pid                  counts unit events
       1.000073435              cat-30190                89,058      cycles
       1.000073435              yes-30242         3,360,786,902      cycles                     (100.00%)
       1.000073435              cat-30190                14,066      instructions
       1.000073435              yes-30242         9,069,937,462      instructions
       2.000204830              cat-30190                     0      cycles
       2.000204830              yes-30242         3,351,667,626      cycles
       2.000204830              cat-30190                     0      instructions
       2.000204830              yes-30242         9,045,796,885      instructions
  ^C     2.771286639              cat-30190                     0      cycles
       2.771286639              yes-30242         2,593,884,166      cycles
       2.771286639              cat-30190                     0      instructions
       2.771286639              yes-30242         7,001,171,191      instructions

It works only with -t and -p options, otherwise following
error is printed:

  $ perf stat  -e cycles --per-thread  -I 1000 ls
  The --per-thread option is only available when monitoring via -p -t options.
      -p, --pid <pid>       stat events on existing process id
      -t, --tid <tid>       stat events on existing thread id

Link: http://lkml.kernel.org/n/tip-0v0ixd9k7o9z1u8hqngm1coe@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 tools/perf/Documentation/perf-stat.txt |  4 ++
 tools/perf/builtin-stat.c              | 68 +++++++++++++++++++++++++++++++++-
 tools/perf/util/stat.h                 |  1 +
 3 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 04e150d83e7d..47469abdcc1c 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -144,6 +144,10 @@ is a useful mode to detect imbalance between physical cores.  To enable this mod
 use --per-core in addition to -a. (system-wide).  The output includes the
 core number and the number of online logical processors on that physical processor.
 
+--per-thread::
+Aggregate counts per monitored threads, when monitoring threads (-t option)
+or processes (-p option).
+
 -D msecs::
 --delay msecs::
 After starting the program, wait msecs before measuring. This is useful to
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 56dc8881cb05..64e67b27528d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -231,6 +231,7 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
 		count = &zero;
 
 	switch (aggr_mode) {
+	case AGGR_THREAD:
 	case AGGR_CORE:
 	case AGGR_SOCKET:
 	case AGGR_NONE:
@@ -602,6 +603,14 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 			csv_output ? 0 : -4,
 			perf_evsel__cpus(evsel)->map[id], csv_sep);
 		break;
+	case AGGR_THREAD:
+		fprintf(output, "%*s-%*d%s",
+			csv_output ? 0 : 16,
+			thread_map__comm(evsel->threads, id),
+			csv_output ? 0 : -8,
+			thread_map__pid(evsel->threads, id),
+			csv_sep);
+		break;
 	case AGGR_GLOBAL:
 	default:
 		break;
@@ -750,6 +759,40 @@ static void print_aggr(char *prefix)
 	}
 }
 
+static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
+{
+	int nthreads = thread_map__nr(counter->threads);
+	int ncpus = cpu_map__nr(counter->cpus);
+	int cpu, thread;
+	double uval;
+
+	for (thread = 0; thread < nthreads; thread++) {
+		u64 ena = 0, run = 0, val = 0;
+
+		for (cpu = 0; cpu < ncpus; cpu++) {
+			val += perf_counts(counter->counts, cpu, thread)->val;
+			ena += perf_counts(counter->counts, cpu, thread)->ena;
+			run += perf_counts(counter->counts, cpu, thread)->run;
+		}
+
+		if (prefix)
+			fprintf(output, "%s", prefix);
+
+		uval = val * counter->scale;
+
+		if (nsec_counter(counter))
+			nsec_printout(thread, 0, counter, uval);
+		else
+			abs_printout(thread, 0, counter, uval);
+
+		if (!csv_output)
+			print_noise(counter, 1.0);
+
+		print_running(run, ena);
+		fputc('\n', output);
+	}
+}
+
 /*
  * Print out the results of a single counter:
  * aggregated counts in system-wide mode
@@ -876,6 +919,9 @@ static void print_interval(char *prefix, struct timespec *ts)
 		case AGGR_NONE:
 			fprintf(output, "#           time CPU                counts %*s events\n", unit_width, "unit");
 			break;
+		case AGGR_THREAD:
+			fprintf(output, "#           time             comm-pid                  counts %*s events\n", unit_width, "unit");
+			break;
 		case AGGR_GLOBAL:
 		default:
 			fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
@@ -944,6 +990,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	case AGGR_SOCKET:
 		print_aggr(prefix);
 		break;
+	case AGGR_THREAD:
+		evlist__for_each(evsel_list, counter)
+			print_aggr_thread(counter, prefix);
+		break;
 	case AGGR_GLOBAL:
 		evlist__for_each(evsel_list, counter)
 			print_counter_aggr(counter, prefix);
@@ -1031,6 +1081,7 @@ static int perf_stat_init_aggr_mode(void)
 		break;
 	case AGGR_NONE:
 	case AGGR_GLOBAL:
+	case AGGR_THREAD:
 	default:
 		break;
 	}
@@ -1255,6 +1306,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "aggregate counts per processor socket", AGGR_SOCKET),
 	OPT_SET_UINT(0, "per-core", &aggr_mode,
 		     "aggregate counts per physical processor core", AGGR_CORE),
+	OPT_SET_UINT(0, "per-thread", &aggr_mode,
+		     "aggregate counts per thread", AGGR_THREAD),
 	OPT_UINTEGER('D', "delay", &initial_delay,
 		     "ms to wait before starting measurement after program start"),
 	OPT_END()
@@ -1346,8 +1399,19 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		run_count = 1;
 	}
 
-	/* no_aggr, cgroup are for system-wide only */
-	if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) &&
+	if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
+		fprintf(stderr, "The --per-thread option is only available "
+			"when monitoring via -p -t options.\n");
+		parse_options_usage(NULL, options, "p", 1);
+		parse_options_usage(NULL, options, "t", 1);
+		goto out;
+	}
+
+	/*
+	 * no_aggr, cgroup are for system-wide only
+	 * --per-thread is aggregated per thread, we dont mix it with cpu mode
+	 */
+	if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) &&
 	    !target__has_cpu(&target)) {
 		fprintf(stderr, "both cgroup and no-aggregation "
 			"modes only available in system-wide mode\n");
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 9f05c571befe..1cfbe0a980ac 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -30,6 +30,7 @@ enum aggr_mode {
 	AGGR_GLOBAL,
 	AGGR_SOCKET,
 	AGGR_CORE,
+	AGGR_THREAD,
 };
 
 struct perf_counts_values {
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
Please read the FAQ at  http://www.tux.org/lkml/

  parent reply	other threads:[~2015-06-22 22:39 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-22 22:36 [PATCHv4 00/27] perf stat: Introduce --per-thread option Jiri Olsa
2015-06-22 22:36 ` [PATCH 01/27] perf tools: Change thread_map::map into struct Jiri Olsa
2015-06-25  7:58   ` [tip:perf/core] perf thread_map: Don' t access the array entries directly tip-bot for Jiri Olsa
2015-06-25  7:58   ` [tip:perf/core] perf thread_map: Change map entries into a struct tip-bot for Jiri Olsa
2015-06-22 22:36 ` [PATCH 02/27] perf tools: Add comm string into struct thread_map Jiri Olsa
2015-06-23 21:01   ` Arnaldo Carvalho de Melo
2015-06-23 21:56     ` Jiri Olsa
2015-06-25 14:39       ` Arnaldo Carvalho de Melo
2015-06-25 16:01         ` Jiri Olsa
2015-06-25 16:13           ` Arnaldo Carvalho de Melo
2015-06-22 22:36 ` [PATCH 03/27] perf tools: Add reference counting for cpu_map object Jiri Olsa
2015-06-23 21:06   ` Arnaldo Carvalho de Melo
2015-06-23 21:54     ` Jiri Olsa
2015-06-26  8:44   ` [tip:perf/urgent] " tip-bot for Jiri Olsa
2015-06-22 22:36 ` [PATCH 04/27] perf tools: Add reference counting for thread_map object Jiri Olsa
2015-06-26  8:44   ` [tip:perf/urgent] " tip-bot for Jiri Olsa
2015-06-22 22:36 ` [PATCH 05/27] perf tools: Propagate cpu maps through the evlist Jiri Olsa
2015-06-26  8:45   ` [tip:perf/urgent] perf evlist: Propagate cpu maps to evsels in an evlist tip-bot for Jiri Olsa
2015-06-22 22:36 ` [PATCH 06/27] perf tools: Propagate thread maps through the evlist Jiri Olsa
2015-06-26  8:45   ` [tip:perf/urgent] perf evlist: " tip-bot for Jiri Olsa
2015-06-22 22:36 ` [PATCH 07/27] perf tools: Make perf_evsel__(nr_)cpus generic Jiri Olsa
2015-06-26  8:45   ` [tip:perf/urgent] " tip-bot for Jiri Olsa
2015-06-22 22:36 ` [PATCH 08/27] perf tests: Add thread_map object tests Jiri Olsa
2015-06-22 22:36 ` [PATCH 09/27] perf stat: Introduce perf_counts function Jiri Olsa
2015-06-23 21:11   ` Arnaldo Carvalho de Melo
2015-06-23 22:03     ` Jiri Olsa
2015-06-22 22:36 ` [PATCH 10/27] perf stat: Use xyarray for cpu evsel counts Jiri Olsa
2015-06-22 22:36 ` [PATCH 11/27] perf stat: Make stats work over the thread dimension Jiri Olsa
2015-06-22 22:36 ` [PATCH 12/27] perf stat: Rename struct perf_counts::cpu member to values Jiri Olsa
2015-06-22 22:36 ` [PATCH 13/27] perf stat: Introduce perf_evlist__reset_stats Jiri Olsa
2015-06-22 22:36 ` [PATCH 14/27] perf stat: Move perf_evsel__(alloc|free|reset)_stat_priv into stat object Jiri Olsa
2015-06-22 22:36 ` [PATCH 15/27] perf stat: Move perf_evsel__(alloc|free)_prev_raw_counts " Jiri Olsa
2015-06-22 22:36 ` [PATCH 16/27] perf stat: Move perf_evlist__(alloc|free|reset)_stats " Jiri Olsa
2015-06-22 22:36 ` [PATCH 17/27] perf stat: Introduce perf_evsel__alloc_stats function Jiri Olsa
2015-06-22 22:36 ` [PATCH 18/27] perf stat: Introduce perf_evsel__read function Jiri Olsa
2015-06-22 22:36 ` [PATCH 19/27] perf stat: Introduce read_counters function Jiri Olsa
2015-06-22 22:36 ` [PATCH 20/27] perf stat: Separate counters reading and processing Jiri Olsa
2015-06-22 22:36 ` [PATCH 21/27] perf stat: Move zero_per_pkg into counter process code Jiri Olsa
2015-06-22 22:36 ` [PATCH 22/27] perf stat: Move perf_stat initialization " Jiri Olsa
2015-06-22 22:36 ` [PATCH 23/27] perf stat: Remove perf_evsel__read_cb function Jiri Olsa
2015-06-22 22:36 ` [PATCH 24/27] perf stat: Rename print_interval to process_interval Jiri Olsa
2015-06-22 22:36 ` [PATCH 25/27] perf stat: Using init_stats instead of memset Jiri Olsa
2015-06-22 22:36 ` [PATCH 26/27] perf stat: Introduce print_counters function Jiri Olsa
2015-06-22 22:36 ` Jiri Olsa [this message]
2015-06-22 23:06 ` [PATCHv4 00/27] perf stat: Introduce --per-thread option Arnaldo Carvalho de Melo
2015-06-23  7:22   ` Jiri Olsa
2015-06-23 14:05     ` Arnaldo Carvalho de Melo
2015-06-23 14:08       ` Adrian Hunter
2015-06-23 15:06         ` Arnaldo Carvalho de Melo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1435012588-9007-28-git-send-email-jolsa@kernel.org \
    --to=jolsa@kernel.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=dsahern@gmail.com \
    --cc=eranian@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.