From: Jiri Olsa <jolsa@kernel.org>
To: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: lkml <linux-kernel@vger.kernel.org>,
Adrian Hunter <adrian.hunter@intel.com>,
Andi Kleen <ak@linux.intel.com>, David Ahern <dsahern@gmail.com>,
Ingo Molnar <mingo@kernel.org>,
Namhyung Kim <namhyung@kernel.org>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Stephane Eranian <eranian@google.com>
Subject: [PATCH 27/27] perf stat: Introduce --per-thread option
Date: Tue, 23 Jun 2015 00:36:28 +0200 [thread overview]
Message-ID: <1435012588-9007-28-git-send-email-jolsa@kernel.org> (raw)
In-Reply-To: <1435012588-9007-1-git-send-email-jolsa@kernel.org>
Currently all the -p option PID arguments tasks values
get aggregated and printed as single values.
Adding --per-tasks option to print values per task.
$ perf stat -e cycles,instructions --per-thread -p 30190,30242
^C
Performance counter stats for process id '30190,30242':
cat-30190 0 cycles
yes-30242 3,842,525,421 cycles
cat-30190 0 instructions
yes-30242 10,370,817,010 instructions
1.143155657 seconds time elapsed
Also works under interval mode:
$ perf stat -e cycles,instructions --per-thread -p 30190,30242 -I 1000
# time comm-pid counts unit events
1.000073435 cat-30190 89,058 cycles
1.000073435 yes-30242 3,360,786,902 cycles (100.00%)
1.000073435 cat-30190 14,066 instructions
1.000073435 yes-30242 9,069,937,462 instructions
2.000204830 cat-30190 0 cycles
2.000204830 yes-30242 3,351,667,626 cycles
2.000204830 cat-30190 0 instructions
2.000204830 yes-30242 9,045,796,885 instructions
^C 2.771286639 cat-30190 0 cycles
2.771286639 yes-30242 2,593,884,166 cycles
2.771286639 cat-30190 0 instructions
2.771286639 yes-30242 7,001,171,191 instructions
It works only with -t and -p options, otherwise following
error is printed:
$ perf stat -e cycles --per-thread -I 1000 ls
The --per-thread option is only available when monitoring via -p -t options.
-p, --pid <pid> stat events on existing process id
-t, --tid <tid> stat events on existing thread id
Link: http://lkml.kernel.org/n/tip-0v0ixd9k7o9z1u8hqngm1coe@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/perf/Documentation/perf-stat.txt | 4 ++
tools/perf/builtin-stat.c | 68 +++++++++++++++++++++++++++++++++-
tools/perf/util/stat.h | 1 +
3 files changed, 71 insertions(+), 2 deletions(-)
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 04e150d83e7d..47469abdcc1c 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -144,6 +144,10 @@ is a useful mode to detect imbalance between physical cores. To enable this mod
use --per-core in addition to -a. (system-wide). The output includes the
core number and the number of online logical processors on that physical processor.
+--per-thread::
+Aggregate counts per monitored threads, when monitoring threads (-t option)
+or processes (-p option).
+
-D msecs::
--delay msecs::
After starting the program, wait msecs before measuring. This is useful to
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 56dc8881cb05..64e67b27528d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -231,6 +231,7 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
count = &zero;
switch (aggr_mode) {
+ case AGGR_THREAD:
case AGGR_CORE:
case AGGR_SOCKET:
case AGGR_NONE:
@@ -602,6 +603,14 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
csv_output ? 0 : -4,
perf_evsel__cpus(evsel)->map[id], csv_sep);
break;
+ case AGGR_THREAD:
+ fprintf(output, "%*s-%*d%s",
+ csv_output ? 0 : 16,
+ thread_map__comm(evsel->threads, id),
+ csv_output ? 0 : -8,
+ thread_map__pid(evsel->threads, id),
+ csv_sep);
+ break;
case AGGR_GLOBAL:
default:
break;
@@ -750,6 +759,40 @@ static void print_aggr(char *prefix)
}
}
+static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
+{
+ int nthreads = thread_map__nr(counter->threads);
+ int ncpus = cpu_map__nr(counter->cpus);
+ int cpu, thread;
+ double uval;
+
+ for (thread = 0; thread < nthreads; thread++) {
+ u64 ena = 0, run = 0, val = 0;
+
+ for (cpu = 0; cpu < ncpus; cpu++) {
+ val += perf_counts(counter->counts, cpu, thread)->val;
+ ena += perf_counts(counter->counts, cpu, thread)->ena;
+ run += perf_counts(counter->counts, cpu, thread)->run;
+ }
+
+ if (prefix)
+ fprintf(output, "%s", prefix);
+
+ uval = val * counter->scale;
+
+ if (nsec_counter(counter))
+ nsec_printout(thread, 0, counter, uval);
+ else
+ abs_printout(thread, 0, counter, uval);
+
+ if (!csv_output)
+ print_noise(counter, 1.0);
+
+ print_running(run, ena);
+ fputc('\n', output);
+ }
+}
+
/*
* Print out the results of a single counter:
* aggregated counts in system-wide mode
@@ -876,6 +919,9 @@ static void print_interval(char *prefix, struct timespec *ts)
case AGGR_NONE:
fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit");
break;
+ case AGGR_THREAD:
+ fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit");
+ break;
case AGGR_GLOBAL:
default:
fprintf(output, "# time counts %*s events\n", unit_width, "unit");
@@ -944,6 +990,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
case AGGR_SOCKET:
print_aggr(prefix);
break;
+ case AGGR_THREAD:
+ evlist__for_each(evsel_list, counter)
+ print_aggr_thread(counter, prefix);
+ break;
case AGGR_GLOBAL:
evlist__for_each(evsel_list, counter)
print_counter_aggr(counter, prefix);
@@ -1031,6 +1081,7 @@ static int perf_stat_init_aggr_mode(void)
break;
case AGGR_NONE:
case AGGR_GLOBAL:
+ case AGGR_THREAD:
default:
break;
}
@@ -1255,6 +1306,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-core", &aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
+ OPT_SET_UINT(0, "per-thread", &aggr_mode,
+ "aggregate counts per thread", AGGR_THREAD),
OPT_UINTEGER('D', "delay", &initial_delay,
"ms to wait before starting measurement after program start"),
OPT_END()
@@ -1346,8 +1399,19 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
run_count = 1;
}
- /* no_aggr, cgroup are for system-wide only */
- if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) &&
+ if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
+ fprintf(stderr, "The --per-thread option is only available "
+ "when monitoring via -p -t options.\n");
+ parse_options_usage(NULL, options, "p", 1);
+ parse_options_usage(NULL, options, "t", 1);
+ goto out;
+ }
+
+ /*
+ * no_aggr, cgroup are for system-wide only
+ * --per-thread is aggregated per thread, we dont mix it with cpu mode
+ */
+ if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) &&
!target__has_cpu(&target)) {
fprintf(stderr, "both cgroup and no-aggregation "
"modes only available in system-wide mode\n");
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 9f05c571befe..1cfbe0a980ac 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -30,6 +30,7 @@ enum aggr_mode {
AGGR_GLOBAL,
AGGR_SOCKET,
AGGR_CORE,
+ AGGR_THREAD,
};
struct perf_counts_values {
--
1.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
Please read the FAQ at http://www.tux.org/lkml/
next prev parent reply other threads:[~2015-06-22 22:39 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-06-22 22:36 [PATCHv4 00/27] perf stat: Introduce --per-thread option Jiri Olsa
2015-06-22 22:36 ` [PATCH 01/27] perf tools: Change thread_map::map into struct Jiri Olsa
2015-06-25 7:58 ` [tip:perf/core] perf thread_map: Don' t access the array entries directly tip-bot for Jiri Olsa
2015-06-25 7:58 ` [tip:perf/core] perf thread_map: Change map entries into a struct tip-bot for Jiri Olsa
2015-06-22 22:36 ` [PATCH 02/27] perf tools: Add comm string into struct thread_map Jiri Olsa
2015-06-23 21:01 ` Arnaldo Carvalho de Melo
2015-06-23 21:56 ` Jiri Olsa
2015-06-25 14:39 ` Arnaldo Carvalho de Melo
2015-06-25 16:01 ` Jiri Olsa
2015-06-25 16:13 ` Arnaldo Carvalho de Melo
2015-06-22 22:36 ` [PATCH 03/27] perf tools: Add reference counting for cpu_map object Jiri Olsa
2015-06-23 21:06 ` Arnaldo Carvalho de Melo
2015-06-23 21:54 ` Jiri Olsa
2015-06-26 8:44 ` [tip:perf/urgent] " tip-bot for Jiri Olsa
2015-06-22 22:36 ` [PATCH 04/27] perf tools: Add reference counting for thread_map object Jiri Olsa
2015-06-26 8:44 ` [tip:perf/urgent] " tip-bot for Jiri Olsa
2015-06-22 22:36 ` [PATCH 05/27] perf tools: Propagate cpu maps through the evlist Jiri Olsa
2015-06-26 8:45 ` [tip:perf/urgent] perf evlist: Propagate cpu maps to evsels in an evlist tip-bot for Jiri Olsa
2015-06-22 22:36 ` [PATCH 06/27] perf tools: Propagate thread maps through the evlist Jiri Olsa
2015-06-26 8:45 ` [tip:perf/urgent] perf evlist: " tip-bot for Jiri Olsa
2015-06-22 22:36 ` [PATCH 07/27] perf tools: Make perf_evsel__(nr_)cpus generic Jiri Olsa
2015-06-26 8:45 ` [tip:perf/urgent] " tip-bot for Jiri Olsa
2015-06-22 22:36 ` [PATCH 08/27] perf tests: Add thread_map object tests Jiri Olsa
2015-06-22 22:36 ` [PATCH 09/27] perf stat: Introduce perf_counts function Jiri Olsa
2015-06-23 21:11 ` Arnaldo Carvalho de Melo
2015-06-23 22:03 ` Jiri Olsa
2015-06-22 22:36 ` [PATCH 10/27] perf stat: Use xyarray for cpu evsel counts Jiri Olsa
2015-06-22 22:36 ` [PATCH 11/27] perf stat: Make stats work over the thread dimension Jiri Olsa
2015-06-22 22:36 ` [PATCH 12/27] perf stat: Rename struct perf_counts::cpu member to values Jiri Olsa
2015-06-22 22:36 ` [PATCH 13/27] perf stat: Introduce perf_evlist__reset_stats Jiri Olsa
2015-06-22 22:36 ` [PATCH 14/27] perf stat: Move perf_evsel__(alloc|free|reset)_stat_priv into stat object Jiri Olsa
2015-06-22 22:36 ` [PATCH 15/27] perf stat: Move perf_evsel__(alloc|free)_prev_raw_counts " Jiri Olsa
2015-06-22 22:36 ` [PATCH 16/27] perf stat: Move perf_evlist__(alloc|free|reset)_stats " Jiri Olsa
2015-06-22 22:36 ` [PATCH 17/27] perf stat: Introduce perf_evsel__alloc_stats function Jiri Olsa
2015-06-22 22:36 ` [PATCH 18/27] perf stat: Introduce perf_evsel__read function Jiri Olsa
2015-06-22 22:36 ` [PATCH 19/27] perf stat: Introduce read_counters function Jiri Olsa
2015-06-22 22:36 ` [PATCH 20/27] perf stat: Separate counters reading and processing Jiri Olsa
2015-06-22 22:36 ` [PATCH 21/27] perf stat: Move zero_per_pkg into counter process code Jiri Olsa
2015-06-22 22:36 ` [PATCH 22/27] perf stat: Move perf_stat initialization " Jiri Olsa
2015-06-22 22:36 ` [PATCH 23/27] perf stat: Remove perf_evsel__read_cb function Jiri Olsa
2015-06-22 22:36 ` [PATCH 24/27] perf stat: Rename print_interval to process_interval Jiri Olsa
2015-06-22 22:36 ` [PATCH 25/27] perf stat: Using init_stats instead of memset Jiri Olsa
2015-06-22 22:36 ` [PATCH 26/27] perf stat: Introduce print_counters function Jiri Olsa
2015-06-22 22:36 ` Jiri Olsa [this message]
2015-06-22 23:06 ` [PATCHv4 00/27] perf stat: Introduce --per-thread option Arnaldo Carvalho de Melo
2015-06-23 7:22 ` Jiri Olsa
2015-06-23 14:05 ` Arnaldo Carvalho de Melo
2015-06-23 14:08 ` Adrian Hunter
2015-06-23 15:06 ` Arnaldo Carvalho de Melo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1435012588-9007-28-git-send-email-jolsa@kernel.org \
--to=jolsa@kernel.org \
--cc=a.p.zijlstra@chello.nl \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=ak@linux.intel.com \
--cc=dsahern@gmail.com \
--cc=eranian@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.