linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] perf stat: Show percore counts in per CPU output
@ 2020-02-13  7:15 Jin Yao
  2020-02-13 13:20 ` Ravi Bangoria
  0 siblings, 1 reply; 5+ messages in thread
From: Jin Yao @ 2020-02-13  7:15 UTC (permalink / raw)
  To: acme, jolsa, peterz, mingo, alexander.shishkin
  Cc: Linux-kernel, ak, kan.liang, yao.jin, Jin Yao

We have supported the event modifier "percore" which sums up the
event counts for all hardware threads in a core and show the counts
per core.

For example,

 # perf stat -e cpu/event=cpu-cycles,percore/ -a -A -- sleep 1

  Performance counter stats for 'system wide':

 S0-D0-C0                395,072      cpu/event=cpu-cycles,percore/
 S0-D0-C1                851,248      cpu/event=cpu-cycles,percore/
 S0-D0-C2                954,226      cpu/event=cpu-cycles,percore/
 S0-D0-C3              1,233,659      cpu/event=cpu-cycles,percore/

This patch provides a new option "--percore-show-thread". It is
used with event modifier "percore" together to sum up the event counts
for all hardware threads in a core but show the counts per hardware
thread.

This is essentially a replacement for the any bit (which is gone in
Icelake). Per core counts are useful for some formulas, e.g. CoreIPC.
The original percore version was inconvenient to post process. This
variant matches the output of the any bit.

With this patch, for example,

 # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -- sleep 1

  Performance counter stats for 'system wide':

 CPU0               2,453,061      cpu/event=cpu-cycles,percore/
 CPU1               1,823,921      cpu/event=cpu-cycles,percore/
 CPU2               1,383,166      cpu/event=cpu-cycles,percore/
 CPU3               1,102,652      cpu/event=cpu-cycles,percore/
 CPU4               2,453,061      cpu/event=cpu-cycles,percore/
 CPU5               1,823,921      cpu/event=cpu-cycles,percore/
 CPU6               1,383,166      cpu/event=cpu-cycles,percore/
 CPU7               1,102,652      cpu/event=cpu-cycles,percore/

We can see counts are duplicated in CPU pairs
(CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7).

The interval mode also works. For example,

 # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -I 1000
 #           time CPU                    counts unit events
      1.000425421 CPU0                 925,032      cpu/event=cpu-cycles,percore/
      1.000425421 CPU1                 430,202      cpu/event=cpu-cycles,percore/
      1.000425421 CPU2                 436,843      cpu/event=cpu-cycles,percore/
      1.000425421 CPU3               1,192,504      cpu/event=cpu-cycles,percore/
      1.000425421 CPU4                 925,032      cpu/event=cpu-cycles,percore/
      1.000425421 CPU5                 430,202      cpu/event=cpu-cycles,percore/
      1.000425421 CPU6                 436,843      cpu/event=cpu-cycles,percore/
      1.000425421 CPU7               1,192,504      cpu/event=cpu-cycles,percore/

 v3:
 ---
 1. Fix the interval mode output error
 2. Use cpu value (not cpu index) in config->aggr_get_id().
 3. Refine the code according to Jiri's comments.

 v2:
 ---
 Add the explanation in change log. This is essentially a replacement
 for the any bit. No code change.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
 tools/perf/Documentation/perf-stat.txt |  9 +++++++
 tools/perf/builtin-stat.c              |  4 +++
 tools/perf/util/stat-display.c         | 35 ++++++++++++++++++++++----
 tools/perf/util/stat.h                 |  1 +
 4 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 9431b8066fb4..86d1fd92f017 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -334,6 +334,15 @@ Configure all used events to run in kernel space.
 --all-user::
 Configure all used events to run in user space.
 
+--percore-show-thread::
+The event modifier "percore" has supported to sum up the event counts
+for all hardware threads in a core and show the counts per core.
+
+This option with event modifier "percore" enabled also sums up the event
+counts for all hardware threads in a core but show the sum counts per
+hardware thread. This is essentially a replacement for the any bit and
+convenient for posting process.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a098c2ebf4ea..ec053dc1e35c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -929,6 +929,10 @@ static struct option stat_options[] = {
 	OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
 			 "Configure all used events to run in user space.",
 			 PARSE_OPT_EXCLUSIVE),
+	OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
+		    "Use with 'percore' event qualifier to show the event "
+		    "counts of one hardware thread by sum up total hardware "
+		    "threads of same physical core"),
 	OPT_END()
 };
 
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bc31fccc0057..7eb3643a97ae 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -110,7 +110,7 @@ static void aggr_printout(struct perf_stat_config *config,
 			config->csv_sep);
 			break;
 	case AGGR_NONE:
-		if (evsel->percore) {
+		if (evsel->percore && !config->percore_show_thread) {
 			fprintf(config->output, "S%d-D%d-C%*d%s",
 				cpu_map__id_to_socket(id),
 				cpu_map__id_to_die(id),
@@ -628,7 +628,7 @@ static void aggr_cb(struct perf_stat_config *config,
 static void print_counter_aggrdata(struct perf_stat_config *config,
 				   struct evsel *counter, int s,
 				   char *prefix, bool metric_only,
-				   bool *first)
+				   bool *first, int cpu)
 {
 	struct aggr_data ad;
 	FILE *output = config->output;
@@ -654,7 +654,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 		fprintf(output, "%s", prefix);
 
 	uval = val * counter->scale;
-	printout(config, id, nr, counter, uval, prefix,
+	printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix,
 		 run, ena, 1.0, &rt_stat);
 	if (!metric_only)
 		fputc('\n', output);
@@ -687,7 +687,7 @@ static void print_aggr(struct perf_stat_config *config,
 		evlist__for_each_entry(evlist, counter) {
 			print_counter_aggrdata(config, counter, s,
 					       prefix, metric_only,
-					       &first);
+					       &first, -1);
 		}
 		if (metric_only)
 			fputc('\n', output);
@@ -1146,6 +1146,28 @@ static void print_footer(struct perf_stat_config *config)
 			"the same PMU. Try reorganizing the group.\n");
 }
 
+static void print_percore_thread(struct perf_stat_config *config,
+				 struct evsel *counter, char *prefix)
+{
+	int cpu, s, s2, id;
+	bool first = true;
+
+	for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) {
+		cpu = perf_cpu_map__cpu(evsel__cpus(counter), i);
+		s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
+
+		for (s = 0; s < config->aggr_map->nr; s++) {
+			id = config->aggr_map->map[s];
+			if (s2 == id)
+				break;
+		}
+
+		print_counter_aggrdata(config, counter, s,
+				       prefix, false,
+				       &first, cpu);
+	}
+}
+
 static void print_percore(struct perf_stat_config *config,
 			  struct evsel *counter, char *prefix)
 {
@@ -1157,13 +1179,16 @@ static void print_percore(struct perf_stat_config *config,
 	if (!(config->aggr_map || config->aggr_get_id))
 		return;
 
+	if (config->percore_show_thread)
+		return print_percore_thread(config, counter, prefix);
+
 	for (s = 0; s < config->aggr_map->nr; s++) {
 		if (prefix && metric_only)
 			fprintf(output, "%s", prefix);
 
 		print_counter_aggrdata(config, counter, s,
 				       prefix, metric_only,
-				       &first);
+				       &first, -1);
 	}
 
 	if (metric_only)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index fb990efa54a8..b4fdfaa7f2c0 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -109,6 +109,7 @@ struct perf_stat_config {
 	bool			 walltime_run_table;
 	bool			 all_kernel;
 	bool			 all_user;
+	bool			 percore_show_thread;
 	FILE			*output;
 	unsigned int		 interval;
 	unsigned int		 timeout;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] perf stat: Show percore counts in per CPU output
  2020-02-13  7:15 [PATCH v3] perf stat: Show percore counts in per CPU output Jin Yao
@ 2020-02-13 13:20 ` Ravi Bangoria
  2020-02-13 15:10   ` Jin, Yao
  0 siblings, 1 reply; 5+ messages in thread
From: Ravi Bangoria @ 2020-02-13 13:20 UTC (permalink / raw)
  To: Jin Yao
  Cc: acme, jolsa, peterz, mingo, alexander.shishkin, Linux-kernel, ak,
	kan.liang, yao.jin, Ravi Bangoria

Hi Jin,

On 2/13/20 12:45 PM, Jin Yao wrote:
> With this patch, for example,
> 
>   # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -- sleep 1
> 
>    Performance counter stats for 'system wide':
> 
>   CPU0               2,453,061      cpu/event=cpu-cycles,percore/
>   CPU1               1,823,921      cpu/event=cpu-cycles,percore/
>   CPU2               1,383,166      cpu/event=cpu-cycles,percore/
>   CPU3               1,102,652      cpu/event=cpu-cycles,percore/
>   CPU4               2,453,061      cpu/event=cpu-cycles,percore/
>   CPU5               1,823,921      cpu/event=cpu-cycles,percore/
>   CPU6               1,383,166      cpu/event=cpu-cycles,percore/
>   CPU7               1,102,652      cpu/event=cpu-cycles,percore/
> 
> We can see counts are duplicated in CPU pairs
> (CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7).
> 

I was trying this patch and I am getting bit weird results when any cpu
is offline. Ex,

   $ lscpu | grep list
   On-line CPU(s) list:             0-4,6,7
   Off-line CPU(s) list:            5

   $ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -vv -- sleep 1
     ...
   cpu/event=cpu-cycles,percore/: 0: 23746491 1001189836 1001189836
   cpu/event=cpu-cycles,percore/: 1: 19802666 1001291299 1001291299
   cpu/event=cpu-cycles,percore/: 2: 24211983 1001394318 1001394318
   cpu/event=cpu-cycles,percore/: 3: 54051396 1001516816 1001516816
   cpu/event=cpu-cycles,percore/: 4: 6378825 1001064048 1001064048
   cpu/event=cpu-cycles,percore/: 5: 21299840 1001166297 1001166297
   cpu/event=cpu-cycles,percore/: 6: 13075410 1001274535 1001274535
   
    Performance counter stats for 'system wide':
   
   CPU0              30,125,316      cpu/event=cpu-cycles,percore/
   CPU1              19,802,666      cpu/event=cpu-cycles,percore/
   CPU2              45,511,823      cpu/event=cpu-cycles,percore/
   CPU3              67,126,806      cpu/event=cpu-cycles,percore/
   CPU4              30,125,316      cpu/event=cpu-cycles,percore/
   CPU7              67,126,806      cpu/event=cpu-cycles,percore/
   CPU0              30,125,316      cpu/event=cpu-cycles,percore/
   
          1.001918764 seconds time elapsed

I see proper result without --percore-show-thread:

   $ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A -vv -- sleep 1
     ...
   cpu/event=cpu-cycles,percore/: 0: 11676414 1001190709 1001190709
   cpu/event=cpu-cycles,percore/: 1: 39119617 1001291459 1001291459
   cpu/event=cpu-cycles,percore/: 2: 41821512 1001391158 1001391158
   cpu/event=cpu-cycles,percore/: 3: 46853730 1001492799 1001492799
   cpu/event=cpu-cycles,percore/: 4: 14448274 1001095948 1001095948
   cpu/event=cpu-cycles,percore/: 5: 42238217 1001191187 1001191187
   cpu/event=cpu-cycles,percore/: 6: 33129641 1001292072 1001292072
   
    Performance counter stats for 'system wide':
   
   S0-D0-C0             26,124,688      cpu/event=cpu-cycles,percore/
   S0-D0-C1             39,119,617      cpu/event=cpu-cycles,percore/
   S0-D0-C2             84,059,729      cpu/event=cpu-cycles,percore/
   S0-D0-C3             79,983,371      cpu/event=cpu-cycles,percore/
   
          1.001961563 seconds time elapsed

[...]

> +--percore-show-thread::
> +The event modifier "percore" has supported to sum up the event counts
> +for all hardware threads in a core and show the counts per core.
> +
> +This option with event modifier "percore" enabled also sums up the event
> +counts for all hardware threads in a core but show the sum counts per
> +hardware thread. This is essentially a replacement for the any bit and
> +convenient for posting process.

s/posting process/post processing/ ? :)

Ravi


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] perf stat: Show percore counts in per CPU output
  2020-02-13 13:20 ` Ravi Bangoria
@ 2020-02-13 15:10   ` Jin, Yao
  2020-02-14  5:16     ` Ravi Bangoria
  0 siblings, 1 reply; 5+ messages in thread
From: Jin, Yao @ 2020-02-13 15:10 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: acme, jolsa, peterz, mingo, alexander.shishkin, Linux-kernel, ak,
	kan.liang, yao.jin



On 2/13/2020 9:20 PM, Ravi Bangoria wrote:
> Hi Jin,
> 
> On 2/13/20 12:45 PM, Jin Yao wrote:
>> With this patch, for example,
>>
>>   # perf stat -e cpu/event=cpu-cycles,percore/ -a -A 
>> --percore-show-thread  -- sleep 1
>>
>>    Performance counter stats for 'system wide':
>>
>>   CPU0               2,453,061      cpu/event=cpu-cycles,percore/
>>   CPU1               1,823,921      cpu/event=cpu-cycles,percore/
>>   CPU2               1,383,166      cpu/event=cpu-cycles,percore/
>>   CPU3               1,102,652      cpu/event=cpu-cycles,percore/
>>   CPU4               2,453,061      cpu/event=cpu-cycles,percore/
>>   CPU5               1,823,921      cpu/event=cpu-cycles,percore/
>>   CPU6               1,383,166      cpu/event=cpu-cycles,percore/
>>   CPU7               1,102,652      cpu/event=cpu-cycles,percore/
>>
>> We can see counts are duplicated in CPU pairs
>> (CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7).
>>
> 
> I was trying this patch and I am getting bit weird results when any cpu
> is offline. Ex,
> 
>    $ lscpu | grep list
>    On-line CPU(s) list:             0-4,6,7
>    Off-line CPU(s) list:            5
> 
>    $ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A 
> --percore-show-thread -vv -- sleep 1
>      ...
>    cpu/event=cpu-cycles,percore/: 0: 23746491 1001189836 1001189836
>    cpu/event=cpu-cycles,percore/: 1: 19802666 1001291299 1001291299
>    cpu/event=cpu-cycles,percore/: 2: 24211983 1001394318 1001394318
>    cpu/event=cpu-cycles,percore/: 3: 54051396 1001516816 1001516816
>    cpu/event=cpu-cycles,percore/: 4: 6378825 1001064048 1001064048
>    cpu/event=cpu-cycles,percore/: 5: 21299840 1001166297 1001166297
>    cpu/event=cpu-cycles,percore/: 6: 13075410 1001274535 1001274535
>     Performance counter stats for 'system wide':
>    CPU0              30,125,316      cpu/event=cpu-cycles,percore/
>    CPU1              19,802,666      cpu/event=cpu-cycles,percore/
>    CPU2              45,511,823      cpu/event=cpu-cycles,percore/
>    CPU3              67,126,806      cpu/event=cpu-cycles,percore/
>    CPU4              30,125,316      cpu/event=cpu-cycles,percore/
>    CPU7              67,126,806      cpu/event=cpu-cycles,percore/
>    CPU0              30,125,316      cpu/event=cpu-cycles,percore/
>           1.001918764 seconds time elapsed
> 
> I see proper result without --percore-show-thread:
> 
>    $ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A -vv -- sleep 1
>      ...
>    cpu/event=cpu-cycles,percore/: 0: 11676414 1001190709 1001190709
>    cpu/event=cpu-cycles,percore/: 1: 39119617 1001291459 1001291459
>    cpu/event=cpu-cycles,percore/: 2: 41821512 1001391158 1001391158
>    cpu/event=cpu-cycles,percore/: 3: 46853730 1001492799 1001492799
>    cpu/event=cpu-cycles,percore/: 4: 14448274 1001095948 1001095948
>    cpu/event=cpu-cycles,percore/: 5: 42238217 1001191187 1001191187
>    cpu/event=cpu-cycles,percore/: 6: 33129641 1001292072 1001292072
>     Performance counter stats for 'system wide':
>    S0-D0-C0             26,124,688      cpu/event=cpu-cycles,percore/
>    S0-D0-C1             39,119,617      cpu/event=cpu-cycles,percore/
>    S0-D0-C2             84,059,729      cpu/event=cpu-cycles,percore/
>    S0-D0-C3             79,983,371      cpu/event=cpu-cycles,percore/
>           1.001961563 seconds time elapsed
> 
> [...]
> 

Thanks so much for reporting this issue!

It looks I should use the cpu idx in print_percore_thread. I can't use 
the cpu value. I have a fix:

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 7eb3643a97ae..d89cb0da90f8 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1149,13 +1149,11 @@ static void print_footer(struct perf_stat_config 
*config)
  static void print_percore_thread(struct perf_stat_config *config,
                                  struct evsel *counter, char *prefix)
  {
-       int cpu, s, s2, id;
+       int s, s2, id;
         bool first = true;

         for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) {
-               cpu = perf_cpu_map__cpu(evsel__cpus(counter), i);
-               s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
-
+               s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
                 for (s = 0; s < config->aggr_map->nr; s++) {
                         id = config->aggr_map->map[s];
                         if (s2 == id)
@@ -1164,7 +1162,7 @@ static void print_percore_thread(struct 
perf_stat_config *config,

                 print_counter_aggrdata(config, counter, s,
                                        prefix, false,
-                                      &first, cpu);
+                                      &first, i);
         }
  }

With this fix, my test log:

root@kbl:~# perf stat -e cpu/event=cpu-cycles,percore/ -a -A 
--percore-show-thread -- sleep 1

  Performance counter stats for 'system wide':

CPU0                 386,355      cpu/event=cpu-cycles,percore/
CPU1                 538,325      cpu/event=cpu-cycles,percore/
CPU2                 900,263      cpu/event=cpu-cycles,percore/
CPU3               1,871,488      cpu/event=cpu-cycles,percore/
CPU4                 386,355      cpu/event=cpu-cycles,percore/
CPU6                 900,263      cpu/event=cpu-cycles,percore/
CPU7               1,871,488      cpu/event=cpu-cycles,percore/

        1.001476492 seconds time elapsed

Once I online all CPUs, the result is:

root@kbl:~# perf stat -e cpu/event=cpu-cycles,percore/ -a -A 
--percore-show-thread -- sleep 1

  Performance counter stats for 'system wide':

CPU0               1,371,762      cpu/event=cpu-cycles,percore/
CPU1                 827,386      cpu/event=cpu-cycles,percore/
CPU2                 309,934      cpu/event=cpu-cycles,percore/
CPU3               5,043,596      cpu/event=cpu-cycles,percore/
CPU4               1,371,762      cpu/event=cpu-cycles,percore/
CPU5                 827,386      cpu/event=cpu-cycles,percore/
CPU6                 309,934      cpu/event=cpu-cycles,percore/
CPU7               5,043,596      cpu/event=cpu-cycles,percore/

        1.001535000 seconds time elapsed

>> +--percore-show-thread::
>> +The event modifier "percore" has supported to sum up the event counts
>> +for all hardware threads in a core and show the counts per core.
>> +
>> +This option with event modifier "percore" enabled also sums up the event
>> +counts for all hardware threads in a core but show the sum counts per
>> +hardware thread. This is essentially a replacement for the any bit and
>> +convenient for posting process.
> 
> s/posting process/post processing/ ? :)
> 

OK, thanks!

Thanks
Jin Yao

> Ravi
> 

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] perf stat: Show percore counts in per CPU output
  2020-02-13 15:10   ` Jin, Yao
@ 2020-02-14  5:16     ` Ravi Bangoria
  2020-02-14  6:06       ` Jin, Yao
  0 siblings, 1 reply; 5+ messages in thread
From: Ravi Bangoria @ 2020-02-14  5:16 UTC (permalink / raw)
  To: Jin, Yao
  Cc: acme, jolsa, peterz, mingo, alexander.shishkin, Linux-kernel, ak,
	kan.liang, yao.jin, Ravi Bangoria



On 2/13/20 8:40 PM, Jin, Yao wrote:
> 
> 
> On 2/13/2020 9:20 PM, Ravi Bangoria wrote:
>> Hi Jin,
>>
>> On 2/13/20 12:45 PM, Jin Yao wrote:
>>> With this patch, for example,
>>>
>>>   # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -- sleep 1
>>>
>>>    Performance counter stats for 'system wide':
>>>
>>>   CPU0               2,453,061      cpu/event=cpu-cycles,percore/
>>>   CPU1               1,823,921      cpu/event=cpu-cycles,percore/
>>>   CPU2               1,383,166      cpu/event=cpu-cycles,percore/
>>>   CPU3               1,102,652      cpu/event=cpu-cycles,percore/
>>>   CPU4               2,453,061      cpu/event=cpu-cycles,percore/
>>>   CPU5               1,823,921      cpu/event=cpu-cycles,percore/
>>>   CPU6               1,383,166      cpu/event=cpu-cycles,percore/
>>>   CPU7               1,102,652      cpu/event=cpu-cycles,percore/
>>>
>>> We can see counts are duplicated in CPU pairs
>>> (CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7).
>>>
>>
>> I was trying this patch and I am getting bit weird results when any cpu
>> is offline. Ex,
>>
>>    $ lscpu | grep list
>>    On-line CPU(s) list:             0-4,6,7
>>    Off-line CPU(s) list:            5
>>
>>    $ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -vv -- sleep 1
>>      ...
>>    cpu/event=cpu-cycles,percore/: 0: 23746491 1001189836 1001189836
>>    cpu/event=cpu-cycles,percore/: 1: 19802666 1001291299 1001291299
>>    cpu/event=cpu-cycles,percore/: 2: 24211983 1001394318 1001394318
>>    cpu/event=cpu-cycles,percore/: 3: 54051396 1001516816 1001516816
>>    cpu/event=cpu-cycles,percore/: 4: 6378825 1001064048 1001064048
>>    cpu/event=cpu-cycles,percore/: 5: 21299840 1001166297 1001166297
>>    cpu/event=cpu-cycles,percore/: 6: 13075410 1001274535 1001274535
>>     Performance counter stats for 'system wide':
>>    CPU0              30,125,316      cpu/event=cpu-cycles,percore/
>>    CPU1              19,802,666      cpu/event=cpu-cycles,percore/
>>    CPU2              45,511,823      cpu/event=cpu-cycles,percore/
>>    CPU3              67,126,806      cpu/event=cpu-cycles,percore/
>>    CPU4              30,125,316      cpu/event=cpu-cycles,percore/
>>    CPU7              67,126,806      cpu/event=cpu-cycles,percore/
>>    CPU0              30,125,316      cpu/event=cpu-cycles,percore/
>>           1.001918764 seconds time elapsed
>>
>> I see proper result without --percore-show-thread:
>>
>>    $ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A -vv -- sleep 1
>>      ...
>>    cpu/event=cpu-cycles,percore/: 0: 11676414 1001190709 1001190709
>>    cpu/event=cpu-cycles,percore/: 1: 39119617 1001291459 1001291459
>>    cpu/event=cpu-cycles,percore/: 2: 41821512 1001391158 1001391158
>>    cpu/event=cpu-cycles,percore/: 3: 46853730 1001492799 1001492799
>>    cpu/event=cpu-cycles,percore/: 4: 14448274 1001095948 1001095948
>>    cpu/event=cpu-cycles,percore/: 5: 42238217 1001191187 1001191187
>>    cpu/event=cpu-cycles,percore/: 6: 33129641 1001292072 1001292072
>>     Performance counter stats for 'system wide':
>>    S0-D0-C0             26,124,688      cpu/event=cpu-cycles,percore/
>>    S0-D0-C1             39,119,617      cpu/event=cpu-cycles,percore/
>>    S0-D0-C2             84,059,729      cpu/event=cpu-cycles,percore/
>>    S0-D0-C3             79,983,371      cpu/event=cpu-cycles,percore/
>>           1.001961563 seconds time elapsed
>>
>> [...]
>>
> 
> Thanks so much for reporting this issue!
> 
> It looks I should use the cpu idx in print_percore_thread. I can't use the cpu value. I have a fix:
> 
> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> index 7eb3643a97ae..d89cb0da90f8 100644
> --- a/tools/perf/util/stat-display.c
> +++ b/tools/perf/util/stat-display.c
> @@ -1149,13 +1149,11 @@ static void print_footer(struct perf_stat_config *config)
>   static void print_percore_thread(struct perf_stat_config *config,
>                                   struct evsel *counter, char *prefix)
>   {
> -       int cpu, s, s2, id;
> +       int s, s2, id;
>          bool first = true;
> 
>          for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) {
> -               cpu = perf_cpu_map__cpu(evsel__cpus(counter), i);
> -               s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
> -
> +               s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
>                  for (s = 0; s < config->aggr_map->nr; s++) {
>                          id = config->aggr_map->map[s];
>                          if (s2 == id)
> @@ -1164,7 +1162,7 @@ static void print_percore_thread(struct perf_stat_config *config,
> 
>                  print_counter_aggrdata(config, counter, s,
>                                         prefix, false,
> -                                      &first, cpu);
> +                                      &first, i);
>          }
>   }

LGTM.

Tested-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>

Ravi


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] perf stat: Show percore counts in per CPU output
  2020-02-14  5:16     ` Ravi Bangoria
@ 2020-02-14  6:06       ` Jin, Yao
  0 siblings, 0 replies; 5+ messages in thread
From: Jin, Yao @ 2020-02-14  6:06 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: acme, jolsa, peterz, mingo, alexander.shishkin, Linux-kernel, ak,
	kan.liang, yao.jin



On 2/14/2020 1:16 PM, Ravi Bangoria wrote:
> 
> 
> On 2/13/20 8:40 PM, Jin, Yao wrote:
>>
>>
>> On 2/13/2020 9:20 PM, Ravi Bangoria wrote:
>>> Hi Jin,
>>>
>>> On 2/13/20 12:45 PM, Jin Yao wrote:
>>>> With this patch, for example,
>>>>
>>>>   # perf stat -e cpu/event=cpu-cycles,percore/ -a -A 
>>>> --percore-show-thread  -- sleep 1
>>>>
>>>>    Performance counter stats for 'system wide':
>>>>
>>>>   CPU0               2,453,061      cpu/event=cpu-cycles,percore/
>>>>   CPU1               1,823,921      cpu/event=cpu-cycles,percore/
>>>>   CPU2               1,383,166      cpu/event=cpu-cycles,percore/
>>>>   CPU3               1,102,652      cpu/event=cpu-cycles,percore/
>>>>   CPU4               2,453,061      cpu/event=cpu-cycles,percore/
>>>>   CPU5               1,823,921      cpu/event=cpu-cycles,percore/
>>>>   CPU6               1,383,166      cpu/event=cpu-cycles,percore/
>>>>   CPU7               1,102,652      cpu/event=cpu-cycles,percore/
>>>>
>>>> We can see counts are duplicated in CPU pairs
>>>> (CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7).
>>>>
>>>
>>> I was trying this patch and I am getting bit weird results when any cpu
>>> is offline. Ex,
>>>
>>>    $ lscpu | grep list
>>>    On-line CPU(s) list:             0-4,6,7
>>>    Off-line CPU(s) list:            5
>>>
>>>    $ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A 
>>> --percore-show-thread -vv -- sleep 1
>>>      ...
>>>    cpu/event=cpu-cycles,percore/: 0: 23746491 1001189836 1001189836
>>>    cpu/event=cpu-cycles,percore/: 1: 19802666 1001291299 1001291299
>>>    cpu/event=cpu-cycles,percore/: 2: 24211983 1001394318 1001394318
>>>    cpu/event=cpu-cycles,percore/: 3: 54051396 1001516816 1001516816
>>>    cpu/event=cpu-cycles,percore/: 4: 6378825 1001064048 1001064048
>>>    cpu/event=cpu-cycles,percore/: 5: 21299840 1001166297 1001166297
>>>    cpu/event=cpu-cycles,percore/: 6: 13075410 1001274535 1001274535
>>>     Performance counter stats for 'system wide':
>>>    CPU0              30,125,316      cpu/event=cpu-cycles,percore/
>>>    CPU1              19,802,666      cpu/event=cpu-cycles,percore/
>>>    CPU2              45,511,823      cpu/event=cpu-cycles,percore/
>>>    CPU3              67,126,806      cpu/event=cpu-cycles,percore/
>>>    CPU4              30,125,316      cpu/event=cpu-cycles,percore/
>>>    CPU7              67,126,806      cpu/event=cpu-cycles,percore/
>>>    CPU0              30,125,316      cpu/event=cpu-cycles,percore/
>>>           1.001918764 seconds time elapsed
>>>
>>> I see proper result without --percore-show-thread:
>>>
>>>    $ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A -vv -- 
>>> sleep 1
>>>      ...
>>>    cpu/event=cpu-cycles,percore/: 0: 11676414 1001190709 1001190709
>>>    cpu/event=cpu-cycles,percore/: 1: 39119617 1001291459 1001291459
>>>    cpu/event=cpu-cycles,percore/: 2: 41821512 1001391158 1001391158
>>>    cpu/event=cpu-cycles,percore/: 3: 46853730 1001492799 1001492799
>>>    cpu/event=cpu-cycles,percore/: 4: 14448274 1001095948 1001095948
>>>    cpu/event=cpu-cycles,percore/: 5: 42238217 1001191187 1001191187
>>>    cpu/event=cpu-cycles,percore/: 6: 33129641 1001292072 1001292072
>>>     Performance counter stats for 'system wide':
>>>    S0-D0-C0             26,124,688      cpu/event=cpu-cycles,percore/
>>>    S0-D0-C1             39,119,617      cpu/event=cpu-cycles,percore/
>>>    S0-D0-C2             84,059,729      cpu/event=cpu-cycles,percore/
>>>    S0-D0-C3             79,983,371      cpu/event=cpu-cycles,percore/
>>>           1.001961563 seconds time elapsed
>>>
>>> [...]
>>>
>>
>> Thanks so much for reporting this issue!
>>
>> It looks I should use the cpu idx in print_percore_thread. I can't use 
>> the cpu value. I have a fix:
>>
>> diff --git a/tools/perf/util/stat-display.c 
>> b/tools/perf/util/stat-display.c
>> index 7eb3643a97ae..d89cb0da90f8 100644
>> --- a/tools/perf/util/stat-display.c
>> +++ b/tools/perf/util/stat-display.c
>> @@ -1149,13 +1149,11 @@ static void print_footer(struct 
>> perf_stat_config *config)
>>   static void print_percore_thread(struct perf_stat_config *config,
>>                                   struct evsel *counter, char *prefix)
>>   {
>> -       int cpu, s, s2, id;
>> +       int s, s2, id;
>>          bool first = true;
>>
>>          for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) {
>> -               cpu = perf_cpu_map__cpu(evsel__cpus(counter), i);
>> -               s2 = config->aggr_get_id(config, evsel__cpus(counter), 
>> cpu);
>> -
>> +               s2 = config->aggr_get_id(config, evsel__cpus(counter), 
>> i);
>>                  for (s = 0; s < config->aggr_map->nr; s++) {
>>                          id = config->aggr_map->map[s];
>>                          if (s2 == id)
>> @@ -1164,7 +1162,7 @@ static void print_percore_thread(struct 
>> perf_stat_config *config,
>>
>>                  print_counter_aggrdata(config, counter, s,
>>                                         prefix, false,
>> -                                      &first, cpu);
>> +                                      &first, i);
>>          }
>>   }
> 
> LGTM.
> 
> Tested-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
> 
> Ravi
> 

Thanks for testing this fix! I will post v4.

Thanks
Jin Yao

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-02-14  6:06 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-13  7:15 [PATCH v3] perf stat: Show percore counts in per CPU output Jin Yao
2020-02-13 13:20 ` Ravi Bangoria
2020-02-13 15:10   ` Jin, Yao
2020-02-14  5:16     ` Ravi Bangoria
2020-02-14  6:06       ` Jin, Yao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).