* [PATCH v3] perf stat: Show percore counts in per CPU output
@ 2020-02-13 7:15 Jin Yao
2020-02-13 13:20 ` Ravi Bangoria
0 siblings, 1 reply; 5+ messages in thread
From: Jin Yao @ 2020-02-13 7:15 UTC (permalink / raw)
To: acme, jolsa, peterz, mingo, alexander.shishkin
Cc: Linux-kernel, ak, kan.liang, yao.jin, Jin Yao
We have supported the event modifier "percore" which sums up the
event counts for all hardware threads in a core and show the counts
per core.
For example,
# perf stat -e cpu/event=cpu-cycles,percore/ -a -A -- sleep 1
Performance counter stats for 'system wide':
S0-D0-C0 395,072 cpu/event=cpu-cycles,percore/
S0-D0-C1 851,248 cpu/event=cpu-cycles,percore/
S0-D0-C2 954,226 cpu/event=cpu-cycles,percore/
S0-D0-C3 1,233,659 cpu/event=cpu-cycles,percore/
This patch provides a new option "--percore-show-thread". It is
used with event modifier "percore" together to sum up the event counts
for all hardware threads in a core but show the counts per hardware
thread.
This is essentially a replacement for the any bit (which is gone in
Icelake). Per core counts are useful for some formulas, e.g. CoreIPC.
The original percore version was inconvenient to post process. This
variant matches the output of the any bit.
With this patch, for example,
# perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -- sleep 1
Performance counter stats for 'system wide':
CPU0 2,453,061 cpu/event=cpu-cycles,percore/
CPU1 1,823,921 cpu/event=cpu-cycles,percore/
CPU2 1,383,166 cpu/event=cpu-cycles,percore/
CPU3 1,102,652 cpu/event=cpu-cycles,percore/
CPU4 2,453,061 cpu/event=cpu-cycles,percore/
CPU5 1,823,921 cpu/event=cpu-cycles,percore/
CPU6 1,383,166 cpu/event=cpu-cycles,percore/
CPU7 1,102,652 cpu/event=cpu-cycles,percore/
We can see counts are duplicated in CPU pairs
(CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7).
The interval mode also works. For example,
# perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -I 1000
# time CPU counts unit events
1.000425421 CPU0 925,032 cpu/event=cpu-cycles,percore/
1.000425421 CPU1 430,202 cpu/event=cpu-cycles,percore/
1.000425421 CPU2 436,843 cpu/event=cpu-cycles,percore/
1.000425421 CPU3 1,192,504 cpu/event=cpu-cycles,percore/
1.000425421 CPU4 925,032 cpu/event=cpu-cycles,percore/
1.000425421 CPU5 430,202 cpu/event=cpu-cycles,percore/
1.000425421 CPU6 436,843 cpu/event=cpu-cycles,percore/
1.000425421 CPU7 1,192,504 cpu/event=cpu-cycles,percore/
v3:
---
1. Fix the interval mode output error
2. Use cpu value (not cpu index) in config->aggr_get_id().
3. Refine the code according to Jiri's comments.
v2:
---
Add the explanation in change log. This is essentially a replacement
for the any bit. No code change.
Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
tools/perf/Documentation/perf-stat.txt | 9 +++++++
tools/perf/builtin-stat.c | 4 +++
tools/perf/util/stat-display.c | 35 ++++++++++++++++++++++----
tools/perf/util/stat.h | 1 +
4 files changed, 44 insertions(+), 5 deletions(-)
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 9431b8066fb4..86d1fd92f017 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -334,6 +334,15 @@ Configure all used events to run in kernel space.
--all-user::
Configure all used events to run in user space.
+--percore-show-thread::
+The event modifier "percore" has supported to sum up the event counts
+for all hardware threads in a core and show the counts per core.
+
+This option with event modifier "percore" enabled also sums up the event
+counts for all hardware threads in a core but show the sum counts per
+hardware thread. This is essentially a replacement for the any bit and
+convenient for posting process.
+
EXAMPLES
--------
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a098c2ebf4ea..ec053dc1e35c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -929,6 +929,10 @@ static struct option stat_options[] = {
OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
"Configure all used events to run in user space.",
PARSE_OPT_EXCLUSIVE),
+ OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
+ "Use with 'percore' event qualifier to show the event "
+ "counts of one hardware thread by sum up total hardware "
+ "threads of same physical core"),
OPT_END()
};
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bc31fccc0057..7eb3643a97ae 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -110,7 +110,7 @@ static void aggr_printout(struct perf_stat_config *config,
config->csv_sep);
break;
case AGGR_NONE:
- if (evsel->percore) {
+ if (evsel->percore && !config->percore_show_thread) {
fprintf(config->output, "S%d-D%d-C%*d%s",
cpu_map__id_to_socket(id),
cpu_map__id_to_die(id),
@@ -628,7 +628,7 @@ static void aggr_cb(struct perf_stat_config *config,
static void print_counter_aggrdata(struct perf_stat_config *config,
struct evsel *counter, int s,
char *prefix, bool metric_only,
- bool *first)
+ bool *first, int cpu)
{
struct aggr_data ad;
FILE *output = config->output;
@@ -654,7 +654,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- printout(config, id, nr, counter, uval, prefix,
+ printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix,
run, ena, 1.0, &rt_stat);
if (!metric_only)
fputc('\n', output);
@@ -687,7 +687,7 @@ static void print_aggr(struct perf_stat_config *config,
evlist__for_each_entry(evlist, counter) {
print_counter_aggrdata(config, counter, s,
prefix, metric_only,
- &first);
+ &first, -1);
}
if (metric_only)
fputc('\n', output);
@@ -1146,6 +1146,28 @@ static void print_footer(struct perf_stat_config *config)
"the same PMU. Try reorganizing the group.\n");
}
+static void print_percore_thread(struct perf_stat_config *config,
+ struct evsel *counter, char *prefix)
+{
+ int cpu, s, s2, id;
+ bool first = true;
+
+ for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) {
+ cpu = perf_cpu_map__cpu(evsel__cpus(counter), i);
+ s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
+
+ for (s = 0; s < config->aggr_map->nr; s++) {
+ id = config->aggr_map->map[s];
+ if (s2 == id)
+ break;
+ }
+
+ print_counter_aggrdata(config, counter, s,
+ prefix, false,
+ &first, cpu);
+ }
+}
+
static void print_percore(struct perf_stat_config *config,
struct evsel *counter, char *prefix)
{
@@ -1157,13 +1179,16 @@ static void print_percore(struct perf_stat_config *config,
if (!(config->aggr_map || config->aggr_get_id))
return;
+ if (config->percore_show_thread)
+ return print_percore_thread(config, counter, prefix);
+
for (s = 0; s < config->aggr_map->nr; s++) {
if (prefix && metric_only)
fprintf(output, "%s", prefix);
print_counter_aggrdata(config, counter, s,
prefix, metric_only,
- &first);
+ &first, -1);
}
if (metric_only)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index fb990efa54a8..b4fdfaa7f2c0 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -109,6 +109,7 @@ struct perf_stat_config {
bool walltime_run_table;
bool all_kernel;
bool all_user;
+ bool percore_show_thread;
FILE *output;
unsigned int interval;
unsigned int timeout;
--
2.17.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v3] perf stat: Show percore counts in per CPU output
2020-02-13 7:15 [PATCH v3] perf stat: Show percore counts in per CPU output Jin Yao
@ 2020-02-13 13:20 ` Ravi Bangoria
2020-02-13 15:10 ` Jin, Yao
0 siblings, 1 reply; 5+ messages in thread
From: Ravi Bangoria @ 2020-02-13 13:20 UTC (permalink / raw)
To: Jin Yao
Cc: acme, jolsa, peterz, mingo, alexander.shishkin, Linux-kernel, ak,
kan.liang, yao.jin, Ravi Bangoria
Hi Jin,
On 2/13/20 12:45 PM, Jin Yao wrote:
> With this patch, for example,
>
> # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -- sleep 1
>
> Performance counter stats for 'system wide':
>
> CPU0 2,453,061 cpu/event=cpu-cycles,percore/
> CPU1 1,823,921 cpu/event=cpu-cycles,percore/
> CPU2 1,383,166 cpu/event=cpu-cycles,percore/
> CPU3 1,102,652 cpu/event=cpu-cycles,percore/
> CPU4 2,453,061 cpu/event=cpu-cycles,percore/
> CPU5 1,823,921 cpu/event=cpu-cycles,percore/
> CPU6 1,383,166 cpu/event=cpu-cycles,percore/
> CPU7 1,102,652 cpu/event=cpu-cycles,percore/
>
> We can see counts are duplicated in CPU pairs
> (CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7).
>
I was trying this patch and I am getting bit weird results when any cpu
is offline. Ex,
$ lscpu | grep list
On-line CPU(s) list: 0-4,6,7
Off-line CPU(s) list: 5
$ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -vv -- sleep 1
...
cpu/event=cpu-cycles,percore/: 0: 23746491 1001189836 1001189836
cpu/event=cpu-cycles,percore/: 1: 19802666 1001291299 1001291299
cpu/event=cpu-cycles,percore/: 2: 24211983 1001394318 1001394318
cpu/event=cpu-cycles,percore/: 3: 54051396 1001516816 1001516816
cpu/event=cpu-cycles,percore/: 4: 6378825 1001064048 1001064048
cpu/event=cpu-cycles,percore/: 5: 21299840 1001166297 1001166297
cpu/event=cpu-cycles,percore/: 6: 13075410 1001274535 1001274535
Performance counter stats for 'system wide':
CPU0 30,125,316 cpu/event=cpu-cycles,percore/
CPU1 19,802,666 cpu/event=cpu-cycles,percore/
CPU2 45,511,823 cpu/event=cpu-cycles,percore/
CPU3 67,126,806 cpu/event=cpu-cycles,percore/
CPU4 30,125,316 cpu/event=cpu-cycles,percore/
CPU7 67,126,806 cpu/event=cpu-cycles,percore/
CPU0 30,125,316 cpu/event=cpu-cycles,percore/
1.001918764 seconds time elapsed
I see proper result without --percore-show-thread:
$ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A -vv -- sleep 1
...
cpu/event=cpu-cycles,percore/: 0: 11676414 1001190709 1001190709
cpu/event=cpu-cycles,percore/: 1: 39119617 1001291459 1001291459
cpu/event=cpu-cycles,percore/: 2: 41821512 1001391158 1001391158
cpu/event=cpu-cycles,percore/: 3: 46853730 1001492799 1001492799
cpu/event=cpu-cycles,percore/: 4: 14448274 1001095948 1001095948
cpu/event=cpu-cycles,percore/: 5: 42238217 1001191187 1001191187
cpu/event=cpu-cycles,percore/: 6: 33129641 1001292072 1001292072
Performance counter stats for 'system wide':
S0-D0-C0 26,124,688 cpu/event=cpu-cycles,percore/
S0-D0-C1 39,119,617 cpu/event=cpu-cycles,percore/
S0-D0-C2 84,059,729 cpu/event=cpu-cycles,percore/
S0-D0-C3 79,983,371 cpu/event=cpu-cycles,percore/
1.001961563 seconds time elapsed
[...]
> +--percore-show-thread::
> +The event modifier "percore" has supported to sum up the event counts
> +for all hardware threads in a core and show the counts per core.
> +
> +This option with event modifier "percore" enabled also sums up the event
> +counts for all hardware threads in a core but show the sum counts per
> +hardware thread. This is essentially a replacement for the any bit and
> +convenient for posting process.
s/posting process/post processing/ ? :)
Ravi
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3] perf stat: Show percore counts in per CPU output
2020-02-13 13:20 ` Ravi Bangoria
@ 2020-02-13 15:10 ` Jin, Yao
2020-02-14 5:16 ` Ravi Bangoria
0 siblings, 1 reply; 5+ messages in thread
From: Jin, Yao @ 2020-02-13 15:10 UTC (permalink / raw)
To: Ravi Bangoria
Cc: acme, jolsa, peterz, mingo, alexander.shishkin, Linux-kernel, ak,
kan.liang, yao.jin
On 2/13/2020 9:20 PM, Ravi Bangoria wrote:
> Hi Jin,
>
> On 2/13/20 12:45 PM, Jin Yao wrote:
>> With this patch, for example,
>>
>> # perf stat -e cpu/event=cpu-cycles,percore/ -a -A
>> --percore-show-thread -- sleep 1
>>
>> Performance counter stats for 'system wide':
>>
>> CPU0 2,453,061 cpu/event=cpu-cycles,percore/
>> CPU1 1,823,921 cpu/event=cpu-cycles,percore/
>> CPU2 1,383,166 cpu/event=cpu-cycles,percore/
>> CPU3 1,102,652 cpu/event=cpu-cycles,percore/
>> CPU4 2,453,061 cpu/event=cpu-cycles,percore/
>> CPU5 1,823,921 cpu/event=cpu-cycles,percore/
>> CPU6 1,383,166 cpu/event=cpu-cycles,percore/
>> CPU7 1,102,652 cpu/event=cpu-cycles,percore/
>>
>> We can see counts are duplicated in CPU pairs
>> (CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7).
>>
>
> I was trying this patch and I am getting bit weird results when any cpu
> is offline. Ex,
>
> $ lscpu | grep list
> On-line CPU(s) list: 0-4,6,7
> Off-line CPU(s) list: 5
>
> $ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A
> --percore-show-thread -vv -- sleep 1
> ...
> cpu/event=cpu-cycles,percore/: 0: 23746491 1001189836 1001189836
> cpu/event=cpu-cycles,percore/: 1: 19802666 1001291299 1001291299
> cpu/event=cpu-cycles,percore/: 2: 24211983 1001394318 1001394318
> cpu/event=cpu-cycles,percore/: 3: 54051396 1001516816 1001516816
> cpu/event=cpu-cycles,percore/: 4: 6378825 1001064048 1001064048
> cpu/event=cpu-cycles,percore/: 5: 21299840 1001166297 1001166297
> cpu/event=cpu-cycles,percore/: 6: 13075410 1001274535 1001274535
> Performance counter stats for 'system wide':
> CPU0 30,125,316 cpu/event=cpu-cycles,percore/
> CPU1 19,802,666 cpu/event=cpu-cycles,percore/
> CPU2 45,511,823 cpu/event=cpu-cycles,percore/
> CPU3 67,126,806 cpu/event=cpu-cycles,percore/
> CPU4 30,125,316 cpu/event=cpu-cycles,percore/
> CPU7 67,126,806 cpu/event=cpu-cycles,percore/
> CPU0 30,125,316 cpu/event=cpu-cycles,percore/
> 1.001918764 seconds time elapsed
>
> I see proper result without --percore-show-thread:
>
> $ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A -vv -- sleep 1
> ...
> cpu/event=cpu-cycles,percore/: 0: 11676414 1001190709 1001190709
> cpu/event=cpu-cycles,percore/: 1: 39119617 1001291459 1001291459
> cpu/event=cpu-cycles,percore/: 2: 41821512 1001391158 1001391158
> cpu/event=cpu-cycles,percore/: 3: 46853730 1001492799 1001492799
> cpu/event=cpu-cycles,percore/: 4: 14448274 1001095948 1001095948
> cpu/event=cpu-cycles,percore/: 5: 42238217 1001191187 1001191187
> cpu/event=cpu-cycles,percore/: 6: 33129641 1001292072 1001292072
> Performance counter stats for 'system wide':
> S0-D0-C0 26,124,688 cpu/event=cpu-cycles,percore/
> S0-D0-C1 39,119,617 cpu/event=cpu-cycles,percore/
> S0-D0-C2 84,059,729 cpu/event=cpu-cycles,percore/
> S0-D0-C3 79,983,371 cpu/event=cpu-cycles,percore/
> 1.001961563 seconds time elapsed
>
> [...]
>
Thanks so much for reporting this issue!
It looks I should use the cpu idx in print_percore_thread. I can't use
the cpu value. I have a fix:
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 7eb3643a97ae..d89cb0da90f8 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1149,13 +1149,11 @@ static void print_footer(struct perf_stat_config
*config)
static void print_percore_thread(struct perf_stat_config *config,
struct evsel *counter, char *prefix)
{
- int cpu, s, s2, id;
+ int s, s2, id;
bool first = true;
for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) {
- cpu = perf_cpu_map__cpu(evsel__cpus(counter), i);
- s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
-
+ s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
for (s = 0; s < config->aggr_map->nr; s++) {
id = config->aggr_map->map[s];
if (s2 == id)
@@ -1164,7 +1162,7 @@ static void print_percore_thread(struct
perf_stat_config *config,
print_counter_aggrdata(config, counter, s,
prefix, false,
- &first, cpu);
+ &first, i);
}
}
With this fix, my test log:
root@kbl:~# perf stat -e cpu/event=cpu-cycles,percore/ -a -A
--percore-show-thread -- sleep 1
Performance counter stats for 'system wide':
CPU0 386,355 cpu/event=cpu-cycles,percore/
CPU1 538,325 cpu/event=cpu-cycles,percore/
CPU2 900,263 cpu/event=cpu-cycles,percore/
CPU3 1,871,488 cpu/event=cpu-cycles,percore/
CPU4 386,355 cpu/event=cpu-cycles,percore/
CPU6 900,263 cpu/event=cpu-cycles,percore/
CPU7 1,871,488 cpu/event=cpu-cycles,percore/
1.001476492 seconds time elapsed
Once I online all CPUs, the result is:
root@kbl:~# perf stat -e cpu/event=cpu-cycles,percore/ -a -A
--percore-show-thread -- sleep 1
Performance counter stats for 'system wide':
CPU0 1,371,762 cpu/event=cpu-cycles,percore/
CPU1 827,386 cpu/event=cpu-cycles,percore/
CPU2 309,934 cpu/event=cpu-cycles,percore/
CPU3 5,043,596 cpu/event=cpu-cycles,percore/
CPU4 1,371,762 cpu/event=cpu-cycles,percore/
CPU5 827,386 cpu/event=cpu-cycles,percore/
CPU6 309,934 cpu/event=cpu-cycles,percore/
CPU7 5,043,596 cpu/event=cpu-cycles,percore/
1.001535000 seconds time elapsed
>> +--percore-show-thread::
>> +The event modifier "percore" has supported to sum up the event counts
>> +for all hardware threads in a core and show the counts per core.
>> +
>> +This option with event modifier "percore" enabled also sums up the event
>> +counts for all hardware threads in a core but show the sum counts per
>> +hardware thread. This is essentially a replacement for the any bit and
>> +convenient for posting process.
>
> s/posting process/post processing/ ? :)
>
OK, thanks!
Thanks
Jin Yao
> Ravi
>
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v3] perf stat: Show percore counts in per CPU output
2020-02-13 15:10 ` Jin, Yao
@ 2020-02-14 5:16 ` Ravi Bangoria
2020-02-14 6:06 ` Jin, Yao
0 siblings, 1 reply; 5+ messages in thread
From: Ravi Bangoria @ 2020-02-14 5:16 UTC (permalink / raw)
To: Jin, Yao
Cc: acme, jolsa, peterz, mingo, alexander.shishkin, Linux-kernel, ak,
kan.liang, yao.jin, Ravi Bangoria
On 2/13/20 8:40 PM, Jin, Yao wrote:
>
>
> On 2/13/2020 9:20 PM, Ravi Bangoria wrote:
>> Hi Jin,
>>
>> On 2/13/20 12:45 PM, Jin Yao wrote:
>>> With this patch, for example,
>>>
>>> # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -- sleep 1
>>>
>>> Performance counter stats for 'system wide':
>>>
>>> CPU0 2,453,061 cpu/event=cpu-cycles,percore/
>>> CPU1 1,823,921 cpu/event=cpu-cycles,percore/
>>> CPU2 1,383,166 cpu/event=cpu-cycles,percore/
>>> CPU3 1,102,652 cpu/event=cpu-cycles,percore/
>>> CPU4 2,453,061 cpu/event=cpu-cycles,percore/
>>> CPU5 1,823,921 cpu/event=cpu-cycles,percore/
>>> CPU6 1,383,166 cpu/event=cpu-cycles,percore/
>>> CPU7 1,102,652 cpu/event=cpu-cycles,percore/
>>>
>>> We can see counts are duplicated in CPU pairs
>>> (CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7).
>>>
>>
>> I was trying this patch and I am getting bit weird results when any cpu
>> is offline. Ex,
>>
>> $ lscpu | grep list
>> On-line CPU(s) list: 0-4,6,7
>> Off-line CPU(s) list: 5
>>
>> $ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -vv -- sleep 1
>> ...
>> cpu/event=cpu-cycles,percore/: 0: 23746491 1001189836 1001189836
>> cpu/event=cpu-cycles,percore/: 1: 19802666 1001291299 1001291299
>> cpu/event=cpu-cycles,percore/: 2: 24211983 1001394318 1001394318
>> cpu/event=cpu-cycles,percore/: 3: 54051396 1001516816 1001516816
>> cpu/event=cpu-cycles,percore/: 4: 6378825 1001064048 1001064048
>> cpu/event=cpu-cycles,percore/: 5: 21299840 1001166297 1001166297
>> cpu/event=cpu-cycles,percore/: 6: 13075410 1001274535 1001274535
>> Performance counter stats for 'system wide':
>> CPU0 30,125,316 cpu/event=cpu-cycles,percore/
>> CPU1 19,802,666 cpu/event=cpu-cycles,percore/
>> CPU2 45,511,823 cpu/event=cpu-cycles,percore/
>> CPU3 67,126,806 cpu/event=cpu-cycles,percore/
>> CPU4 30,125,316 cpu/event=cpu-cycles,percore/
>> CPU7 67,126,806 cpu/event=cpu-cycles,percore/
>> CPU0 30,125,316 cpu/event=cpu-cycles,percore/
>> 1.001918764 seconds time elapsed
>>
>> I see proper result without --percore-show-thread:
>>
>> $ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A -vv -- sleep 1
>> ...
>> cpu/event=cpu-cycles,percore/: 0: 11676414 1001190709 1001190709
>> cpu/event=cpu-cycles,percore/: 1: 39119617 1001291459 1001291459
>> cpu/event=cpu-cycles,percore/: 2: 41821512 1001391158 1001391158
>> cpu/event=cpu-cycles,percore/: 3: 46853730 1001492799 1001492799
>> cpu/event=cpu-cycles,percore/: 4: 14448274 1001095948 1001095948
>> cpu/event=cpu-cycles,percore/: 5: 42238217 1001191187 1001191187
>> cpu/event=cpu-cycles,percore/: 6: 33129641 1001292072 1001292072
>> Performance counter stats for 'system wide':
>> S0-D0-C0 26,124,688 cpu/event=cpu-cycles,percore/
>> S0-D0-C1 39,119,617 cpu/event=cpu-cycles,percore/
>> S0-D0-C2 84,059,729 cpu/event=cpu-cycles,percore/
>> S0-D0-C3 79,983,371 cpu/event=cpu-cycles,percore/
>> 1.001961563 seconds time elapsed
>>
>> [...]
>>
>
> Thanks so much for reporting this issue!
>
> It looks I should use the cpu idx in print_percore_thread. I can't use the cpu value. I have a fix:
>
> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> index 7eb3643a97ae..d89cb0da90f8 100644
> --- a/tools/perf/util/stat-display.c
> +++ b/tools/perf/util/stat-display.c
> @@ -1149,13 +1149,11 @@ static void print_footer(struct perf_stat_config *config)
> static void print_percore_thread(struct perf_stat_config *config,
> struct evsel *counter, char *prefix)
> {
> - int cpu, s, s2, id;
> + int s, s2, id;
> bool first = true;
>
> for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) {
> - cpu = perf_cpu_map__cpu(evsel__cpus(counter), i);
> - s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
> -
> + s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
> for (s = 0; s < config->aggr_map->nr; s++) {
> id = config->aggr_map->map[s];
> if (s2 == id)
> @@ -1164,7 +1162,7 @@ static void print_percore_thread(struct perf_stat_config *config,
>
> print_counter_aggrdata(config, counter, s,
> prefix, false,
> - &first, cpu);
> + &first, i);
> }
> }
LGTM.
Tested-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Ravi
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3] perf stat: Show percore counts in per CPU output
2020-02-14 5:16 ` Ravi Bangoria
@ 2020-02-14 6:06 ` Jin, Yao
0 siblings, 0 replies; 5+ messages in thread
From: Jin, Yao @ 2020-02-14 6:06 UTC (permalink / raw)
To: Ravi Bangoria
Cc: acme, jolsa, peterz, mingo, alexander.shishkin, Linux-kernel, ak,
kan.liang, yao.jin
On 2/14/2020 1:16 PM, Ravi Bangoria wrote:
>
>
> On 2/13/20 8:40 PM, Jin, Yao wrote:
>>
>>
>> On 2/13/2020 9:20 PM, Ravi Bangoria wrote:
>>> Hi Jin,
>>>
>>> On 2/13/20 12:45 PM, Jin Yao wrote:
>>>> With this patch, for example,
>>>>
>>>> # perf stat -e cpu/event=cpu-cycles,percore/ -a -A
>>>> --percore-show-thread -- sleep 1
>>>>
>>>> Performance counter stats for 'system wide':
>>>>
>>>> CPU0 2,453,061 cpu/event=cpu-cycles,percore/
>>>> CPU1 1,823,921 cpu/event=cpu-cycles,percore/
>>>> CPU2 1,383,166 cpu/event=cpu-cycles,percore/
>>>> CPU3 1,102,652 cpu/event=cpu-cycles,percore/
>>>> CPU4 2,453,061 cpu/event=cpu-cycles,percore/
>>>> CPU5 1,823,921 cpu/event=cpu-cycles,percore/
>>>> CPU6 1,383,166 cpu/event=cpu-cycles,percore/
>>>> CPU7 1,102,652 cpu/event=cpu-cycles,percore/
>>>>
>>>> We can see counts are duplicated in CPU pairs
>>>> (CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7).
>>>>
>>>
>>> I was trying this patch and I am getting bit weird results when any cpu
>>> is offline. Ex,
>>>
>>> $ lscpu | grep list
>>> On-line CPU(s) list: 0-4,6,7
>>> Off-line CPU(s) list: 5
>>>
>>> $ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A
>>> --percore-show-thread -vv -- sleep 1
>>> ...
>>> cpu/event=cpu-cycles,percore/: 0: 23746491 1001189836 1001189836
>>> cpu/event=cpu-cycles,percore/: 1: 19802666 1001291299 1001291299
>>> cpu/event=cpu-cycles,percore/: 2: 24211983 1001394318 1001394318
>>> cpu/event=cpu-cycles,percore/: 3: 54051396 1001516816 1001516816
>>> cpu/event=cpu-cycles,percore/: 4: 6378825 1001064048 1001064048
>>> cpu/event=cpu-cycles,percore/: 5: 21299840 1001166297 1001166297
>>> cpu/event=cpu-cycles,percore/: 6: 13075410 1001274535 1001274535
>>> Performance counter stats for 'system wide':
>>> CPU0 30,125,316 cpu/event=cpu-cycles,percore/
>>> CPU1 19,802,666 cpu/event=cpu-cycles,percore/
>>> CPU2 45,511,823 cpu/event=cpu-cycles,percore/
>>> CPU3 67,126,806 cpu/event=cpu-cycles,percore/
>>> CPU4 30,125,316 cpu/event=cpu-cycles,percore/
>>> CPU7 67,126,806 cpu/event=cpu-cycles,percore/
>>> CPU0 30,125,316 cpu/event=cpu-cycles,percore/
>>> 1.001918764 seconds time elapsed
>>>
>>> I see proper result without --percore-show-thread:
>>>
>>> $ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A -vv --
>>> sleep 1
>>> ...
>>> cpu/event=cpu-cycles,percore/: 0: 11676414 1001190709 1001190709
>>> cpu/event=cpu-cycles,percore/: 1: 39119617 1001291459 1001291459
>>> cpu/event=cpu-cycles,percore/: 2: 41821512 1001391158 1001391158
>>> cpu/event=cpu-cycles,percore/: 3: 46853730 1001492799 1001492799
>>> cpu/event=cpu-cycles,percore/: 4: 14448274 1001095948 1001095948
>>> cpu/event=cpu-cycles,percore/: 5: 42238217 1001191187 1001191187
>>> cpu/event=cpu-cycles,percore/: 6: 33129641 1001292072 1001292072
>>> Performance counter stats for 'system wide':
>>> S0-D0-C0 26,124,688 cpu/event=cpu-cycles,percore/
>>> S0-D0-C1 39,119,617 cpu/event=cpu-cycles,percore/
>>> S0-D0-C2 84,059,729 cpu/event=cpu-cycles,percore/
>>> S0-D0-C3 79,983,371 cpu/event=cpu-cycles,percore/
>>> 1.001961563 seconds time elapsed
>>>
>>> [...]
>>>
>>
>> Thanks so much for reporting this issue!
>>
>> It looks I should use the cpu idx in print_percore_thread. I can't use
>> the cpu value. I have a fix:
>>
>> diff --git a/tools/perf/util/stat-display.c
>> b/tools/perf/util/stat-display.c
>> index 7eb3643a97ae..d89cb0da90f8 100644
>> --- a/tools/perf/util/stat-display.c
>> +++ b/tools/perf/util/stat-display.c
>> @@ -1149,13 +1149,11 @@ static void print_footer(struct
>> perf_stat_config *config)
>> static void print_percore_thread(struct perf_stat_config *config,
>> struct evsel *counter, char *prefix)
>> {
>> - int cpu, s, s2, id;
>> + int s, s2, id;
>> bool first = true;
>>
>> for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) {
>> - cpu = perf_cpu_map__cpu(evsel__cpus(counter), i);
>> - s2 = config->aggr_get_id(config, evsel__cpus(counter),
>> cpu);
>> -
>> + s2 = config->aggr_get_id(config, evsel__cpus(counter),
>> i);
>> for (s = 0; s < config->aggr_map->nr; s++) {
>> id = config->aggr_map->map[s];
>> if (s2 == id)
>> @@ -1164,7 +1162,7 @@ static void print_percore_thread(struct
>> perf_stat_config *config,
>>
>> print_counter_aggrdata(config, counter, s,
>> prefix, false,
>> - &first, cpu);
>> + &first, i);
>> }
>> }
>
> LGTM.
>
> Tested-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
>
> Ravi
>
Thanks for testing this fix! I will post v4.
Thanks
Jin Yao
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2020-02-14 6:06 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-13 7:15 [PATCH v3] perf stat: Show percore counts in per CPU output Jin Yao
2020-02-13 13:20 ` Ravi Bangoria
2020-02-13 15:10 ` Jin, Yao
2020-02-14 5:16 ` Ravi Bangoria
2020-02-14 6:06 ` Jin, Yao
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).