linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] perf stat: Show percore counts in per CPU output
@ 2020-02-06  1:56 Jin Yao
  2020-02-10 13:28 ` Jiri Olsa
  0 siblings, 1 reply; 7+ messages in thread
From: Jin Yao @ 2020-02-06  1:56 UTC (permalink / raw)
  To: acme, jolsa, peterz, mingo, alexander.shishkin
  Cc: Linux-kernel, ak, kan.liang, yao.jin, Jin Yao

We have supported the event modifier "percore" which sums up the
event counts for all hardware threads in a core and show the counts
per core.

For example,

 # perf stat -e cpu/event=cpu-cycles,percore/ -a -A -- sleep 1

  Performance counter stats for 'system wide':

 S0-D0-C0                395,072      cpu/event=cpu-cycles,percore/
 S0-D0-C1                851,248      cpu/event=cpu-cycles,percore/
 S0-D0-C2                954,226      cpu/event=cpu-cycles,percore/
 S0-D0-C3              1,233,659      cpu/event=cpu-cycles,percore/

This patch provides a new option "--percore-show-thread". It is
used with event modifier "percore" together to sum up the event counts
for all hardware threads in a core but show the counts per hardware
thread.

For example,

 # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -- sleep 1

  Performance counter stats for 'system wide':

 CPU0               2,453,061      cpu/event=cpu-cycles,percore/
 CPU1               1,823,921      cpu/event=cpu-cycles,percore/
 CPU2               1,383,166      cpu/event=cpu-cycles,percore/
 CPU3               1,102,652      cpu/event=cpu-cycles,percore/
 CPU4               2,453,061      cpu/event=cpu-cycles,percore/
 CPU5               1,823,921      cpu/event=cpu-cycles,percore/
 CPU6               1,383,166      cpu/event=cpu-cycles,percore/
 CPU7               1,102,652      cpu/event=cpu-cycles,percore/

We can see counts are duplicated in some CPU pairs
(CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7).

This new option may be useful for some script processing.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
 tools/perf/Documentation/perf-stat.txt |  7 ++++
 tools/perf/builtin-stat.c              |  4 ++
 tools/perf/util/stat-display.c         | 57 ++++++++++++++++++++++----
 tools/perf/util/stat.h                 |  1 +
 4 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 9431b8066fb4..f6033b3d0971 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -334,6 +334,13 @@ Configure all used events to run in kernel space.
 --all-user::
 Configure all used events to run in user space.
 
+--percore-show-thread::
+The event modifier "percore" has supported to sum up the event counts
+for all hardware threads in a core and show the counts per core.
+
+This option with event modifier "percore" enabled also sums up the event
+counts for all hardware threads in a core but show the counts per thread.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a098c2ebf4ea..ec053dc1e35c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -929,6 +929,10 @@ static struct option stat_options[] = {
 	OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
 			 "Configure all used events to run in user space.",
 			 PARSE_OPT_EXCLUSIVE),
+	OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
+		    "Use with 'percore' event qualifier to show the event "
+		    "counts of one hardware thread by sum up total hardware "
+		    "threads of same physical core"),
 	OPT_END()
 };
 
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bc31fccc0057..ca603e59dfe1 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -110,7 +110,7 @@ static void aggr_printout(struct perf_stat_config *config,
 			config->csv_sep);
 			break;
 	case AGGR_NONE:
-		if (evsel->percore) {
+		if (evsel->percore && !config->percore_show_thread) {
 			fprintf(config->output, "S%d-D%d-C%*d%s",
 				cpu_map__id_to_socket(id),
 				cpu_map__id_to_die(id),
@@ -628,7 +628,7 @@ static void aggr_cb(struct perf_stat_config *config,
 static void print_counter_aggrdata(struct perf_stat_config *config,
 				   struct evsel *counter, int s,
 				   char *prefix, bool metric_only,
-				   bool *first)
+				   bool *first, int cpu)
 {
 	struct aggr_data ad;
 	FILE *output = config->output;
@@ -654,8 +654,15 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 		fprintf(output, "%s", prefix);
 
 	uval = val * counter->scale;
-	printout(config, id, nr, counter, uval, prefix,
-		 run, ena, 1.0, &rt_stat);
+
+	if (cpu == -1) {
+		printout(config, id, nr, counter, uval, prefix,
+			 run, ena, 1.0, &rt_stat);
+	} else {
+		printout(config, cpu, nr, counter, uval, prefix,
+			 run, ena, 1.0, &rt_stat);
+	}
+
 	if (!metric_only)
 		fputc('\n', output);
 }
@@ -687,7 +694,7 @@ static void print_aggr(struct perf_stat_config *config,
 		evlist__for_each_entry(evlist, counter) {
 			print_counter_aggrdata(config, counter, s,
 					       prefix, metric_only,
-					       &first);
+					       &first, -1);
 		}
 		if (metric_only)
 			fputc('\n', output);
@@ -1163,13 +1170,38 @@ static void print_percore(struct perf_stat_config *config,
 
 		print_counter_aggrdata(config, counter, s,
 				       prefix, metric_only,
-				       &first);
+				       &first, -1);
 	}
 
 	if (metric_only)
 		fputc('\n', output);
 }
 
+static void print_percore_thread(struct perf_stat_config *config,
+				 struct evsel *counter, char *prefix)
+{
+	int cpu, s, s2, id;
+	bool first = true;
+	FILE *output = config->output;
+
+	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+		s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
+
+		for (s = 0; s < config->aggr_map->nr; s++) {
+			id = config->aggr_map->map[s];
+			if (s2 == id)
+				break;
+		}
+
+		if (prefix)
+			fprintf(output, "%s", prefix);
+
+		print_counter_aggrdata(config, counter, s,
+				       prefix, false,
+				       &first, cpu);
+	}
+}
+
 void
 perf_evlist__print_counters(struct evlist *evlist,
 			    struct perf_stat_config *config,
@@ -1222,9 +1254,16 @@ perf_evlist__print_counters(struct evlist *evlist,
 			print_no_aggr_metric(config, evlist, prefix);
 		else {
 			evlist__for_each_entry(evlist, counter) {
-				if (counter->percore)
-					print_percore(config, counter, prefix);
-				else
+				if (counter->percore) {
+					if (config->percore_show_thread) {
+						print_percore_thread(config,
+								     counter,
+								     prefix);
+					} else {
+						print_percore(config, counter,
+							      prefix);
+					}
+				} else
 					print_counter(config, counter, prefix);
 			}
 		}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index fb990efa54a8..b4fdfaa7f2c0 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -109,6 +109,7 @@ struct perf_stat_config {
 	bool			 walltime_run_table;
 	bool			 all_kernel;
 	bool			 all_user;
+	bool			 percore_show_thread;
 	FILE			*output;
 	unsigned int		 interval;
 	unsigned int		 timeout;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] perf stat: Show percore counts in per CPU output
  2020-02-06  1:56 [PATCH] perf stat: Show percore counts in per CPU output Jin Yao
@ 2020-02-10 13:28 ` Jiri Olsa
  2020-02-10 13:46   ` Jin, Yao
  0 siblings, 1 reply; 7+ messages in thread
From: Jiri Olsa @ 2020-02-10 13:28 UTC (permalink / raw)
  To: Jin Yao
  Cc: acme, jolsa, peterz, mingo, alexander.shishkin, Linux-kernel, ak,
	kan.liang, yao.jin

On Thu, Feb 06, 2020 at 09:56:13AM +0800, Jin Yao wrote:
> We have supported the event modifier "percore" which sums up the
> event counts for all hardware threads in a core and show the counts
> per core.
> 
> For example,
> 
>  # perf stat -e cpu/event=cpu-cycles,percore/ -a -A -- sleep 1
> 
>   Performance counter stats for 'system wide':
> 
>  S0-D0-C0                395,072      cpu/event=cpu-cycles,percore/
>  S0-D0-C1                851,248      cpu/event=cpu-cycles,percore/
>  S0-D0-C2                954,226      cpu/event=cpu-cycles,percore/
>  S0-D0-C3              1,233,659      cpu/event=cpu-cycles,percore/
> 
> This patch provides a new option "--percore-show-thread". It is
> used with event modifier "percore" together to sum up the event counts
> for all hardware threads in a core but show the counts per hardware
> thread.
> 
> For example,
> 
>  # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -- sleep 1
> 
>   Performance counter stats for 'system wide':
> 
>  CPU0               2,453,061      cpu/event=cpu-cycles,percore/
>  CPU1               1,823,921      cpu/event=cpu-cycles,percore/
>  CPU2               1,383,166      cpu/event=cpu-cycles,percore/
>  CPU3               1,102,652      cpu/event=cpu-cycles,percore/
>  CPU4               2,453,061      cpu/event=cpu-cycles,percore/
>  CPU5               1,823,921      cpu/event=cpu-cycles,percore/
>  CPU6               1,383,166      cpu/event=cpu-cycles,percore/
>  CPU7               1,102,652      cpu/event=cpu-cycles,percore/

I don't understand how is this different from -A output:

  # ./perf stat -e cpu/event=cpu-cycles/ -A  
  ^C
   Performance counter stats for 'system wide':

  CPU0              56,847,497      cpu/event=cpu-cycles/                                       
  CPU1              75,274,384      cpu/event=cpu-cycles/                                       
  CPU2              63,866,342      cpu/event=cpu-cycles/                                       
  CPU3              89,559,693      cpu/event=cpu-cycles/                                       
  CPU4              74,761,132      cpu/event=cpu-cycles/                                       
  CPU5              76,320,191      cpu/event=cpu-cycles/                                       
  CPU6              55,100,175      cpu/event=cpu-cycles/                                       
  CPU7              48,472,895      cpu/event=cpu-cycles/                                       

       1.074800857 seconds time elapsed

also the interval output is mangled:

  # ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -I 1000
  #           time CPU                    counts unit events
     1.000177375      1.000177375 CPU0             138,483,540      cpu/event=cpu-cycles,percore/                                   
     1.000177375      1.000177375 CPU1             143,159,477      cpu/event=cpu-cycles,percore/                                   
     1.000177375      1.000177375 CPU2             177,554,642      cpu/event=cpu-cycles,percore/                                   
     1.000177375      1.000177375 CPU3             150,974,512      cpu/event=cpu-cycles,percore/                                   
     1.000177375      1.000177375 CPU4             138,483,540      cpu/event=cpu-cycles,percore/                                   
     1.000177375      1.000177375 CPU5             143,159,477      cpu/event=cpu-cycles,percore/                                   
     1.000177375      1.000177375 CPU6             177,554,642      cpu/event=cpu-cycles,percore/                                   

jirka


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] perf stat: Show percore counts in per CPU output
  2020-02-10 13:28 ` Jiri Olsa
@ 2020-02-10 13:46   ` Jin, Yao
  2020-02-10 14:01     ` Jiri Olsa
  0 siblings, 1 reply; 7+ messages in thread
From: Jin, Yao @ 2020-02-10 13:46 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: acme, jolsa, peterz, mingo, alexander.shishkin, Linux-kernel, ak,
	kan.liang, yao.jin



On 2/10/2020 9:28 PM, Jiri Olsa wrote:
> On Thu, Feb 06, 2020 at 09:56:13AM +0800, Jin Yao wrote:
>> We have supported the event modifier "percore" which sums up the
>> event counts for all hardware threads in a core and show the counts
>> per core.
>>
>> For example,
>>
>>   # perf stat -e cpu/event=cpu-cycles,percore/ -a -A -- sleep 1
>>
>>    Performance counter stats for 'system wide':
>>
>>   S0-D0-C0                395,072      cpu/event=cpu-cycles,percore/
>>   S0-D0-C1                851,248      cpu/event=cpu-cycles,percore/
>>   S0-D0-C2                954,226      cpu/event=cpu-cycles,percore/
>>   S0-D0-C3              1,233,659      cpu/event=cpu-cycles,percore/
>>
>> This patch provides a new option "--percore-show-thread". It is
>> used with event modifier "percore" together to sum up the event counts
>> for all hardware threads in a core but show the counts per hardware
>> thread.
>>
>> For example,
>>
>>   # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -- sleep 1
>>
>>    Performance counter stats for 'system wide':
>>
>>   CPU0               2,453,061      cpu/event=cpu-cycles,percore/
>>   CPU1               1,823,921      cpu/event=cpu-cycles,percore/
>>   CPU2               1,383,166      cpu/event=cpu-cycles,percore/
>>   CPU3               1,102,652      cpu/event=cpu-cycles,percore/
>>   CPU4               2,453,061      cpu/event=cpu-cycles,percore/
>>   CPU5               1,823,921      cpu/event=cpu-cycles,percore/
>>   CPU6               1,383,166      cpu/event=cpu-cycles,percore/
>>   CPU7               1,102,652      cpu/event=cpu-cycles,percore/
> 
> I don't understand how is this different from -A output:
> 
>    # ./perf stat -e cpu/event=cpu-cycles/ -A
>    ^C
>     Performance counter stats for 'system wide':
> 
>    CPU0              56,847,497      cpu/event=cpu-cycles/
>    CPU1              75,274,384      cpu/event=cpu-cycles/
>    CPU2              63,866,342      cpu/event=cpu-cycles/
>    CPU3              89,559,693      cpu/event=cpu-cycles/
>    CPU4              74,761,132      cpu/event=cpu-cycles/
>    CPU5              76,320,191      cpu/event=cpu-cycles/
>    CPU6              55,100,175      cpu/event=cpu-cycles/
>    CPU7              48,472,895      cpu/event=cpu-cycles/
> 
>         1.074800857 seconds time elapsed
> 

The results are different.

With --percore-show-thread, CPU0 and CPU4 have the same counts (CPU0 and 
CPU4 are siblings, e.g. 2,453,061 in my example). The value is sum of 
CPU0 + CPU4.

Without --percore-show-thread, CPU0 and CPU4 have their own counts.

> also the interval output is mangled:
> 
>    # ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -I 1000
>    #           time CPU                    counts unit events
>       1.000177375      1.000177375 CPU0             138,483,540      cpu/event=cpu-cycles,percore/
>       1.000177375      1.000177375 CPU1             143,159,477      cpu/event=cpu-cycles,percore/
>       1.000177375      1.000177375 CPU2             177,554,642      cpu/event=cpu-cycles,percore/
>       1.000177375      1.000177375 CPU3             150,974,512      cpu/event=cpu-cycles,percore/
>       1.000177375      1.000177375 CPU4             138,483,540      cpu/event=cpu-cycles,percore/
>       1.000177375      1.000177375 CPU5             143,159,477      cpu/event=cpu-cycles,percore/
>       1.000177375      1.000177375 CPU6             177,554,642      cpu/event=cpu-cycles,percore/
> 
> jirka
> 

Sorry, why the interval output is mangled? It's expected that CPU0 and 
CPU4 have the same counts.

Thanks
Jin Yao


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] perf stat: Show percore counts in per CPU output
  2020-02-10 13:46   ` Jin, Yao
@ 2020-02-10 14:01     ` Jiri Olsa
  2020-02-10 17:01       ` Andi Kleen
  0 siblings, 1 reply; 7+ messages in thread
From: Jiri Olsa @ 2020-02-10 14:01 UTC (permalink / raw)
  To: Jin, Yao
  Cc: acme, jolsa, peterz, mingo, alexander.shishkin, Linux-kernel, ak,
	kan.liang, yao.jin

On Mon, Feb 10, 2020 at 09:46:46PM +0800, Jin, Yao wrote:
> 
> 
> On 2/10/2020 9:28 PM, Jiri Olsa wrote:
> > On Thu, Feb 06, 2020 at 09:56:13AM +0800, Jin Yao wrote:
> > > We have supported the event modifier "percore" which sums up the
> > > event counts for all hardware threads in a core and show the counts
> > > per core.
> > > 
> > > For example,
> > > 
> > >   # perf stat -e cpu/event=cpu-cycles,percore/ -a -A -- sleep 1
> > > 
> > >    Performance counter stats for 'system wide':
> > > 
> > >   S0-D0-C0                395,072      cpu/event=cpu-cycles,percore/
> > >   S0-D0-C1                851,248      cpu/event=cpu-cycles,percore/
> > >   S0-D0-C2                954,226      cpu/event=cpu-cycles,percore/
> > >   S0-D0-C3              1,233,659      cpu/event=cpu-cycles,percore/
> > > 
> > > This patch provides a new option "--percore-show-thread". It is
> > > used with event modifier "percore" together to sum up the event counts
> > > for all hardware threads in a core but show the counts per hardware
> > > thread.
> > > 
> > > For example,
> > > 
> > >   # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -- sleep 1
> > > 
> > >    Performance counter stats for 'system wide':
> > > 
> > >   CPU0               2,453,061      cpu/event=cpu-cycles,percore/
> > >   CPU1               1,823,921      cpu/event=cpu-cycles,percore/
> > >   CPU2               1,383,166      cpu/event=cpu-cycles,percore/
> > >   CPU3               1,102,652      cpu/event=cpu-cycles,percore/
> > >   CPU4               2,453,061      cpu/event=cpu-cycles,percore/
> > >   CPU5               1,823,921      cpu/event=cpu-cycles,percore/
> > >   CPU6               1,383,166      cpu/event=cpu-cycles,percore/
> > >   CPU7               1,102,652      cpu/event=cpu-cycles,percore/
> > 
> > I don't understand how is this different from -A output:
> > 
> >    # ./perf stat -e cpu/event=cpu-cycles/ -A
> >    ^C
> >     Performance counter stats for 'system wide':
> > 
> >    CPU0              56,847,497      cpu/event=cpu-cycles/
> >    CPU1              75,274,384      cpu/event=cpu-cycles/
> >    CPU2              63,866,342      cpu/event=cpu-cycles/
> >    CPU3              89,559,693      cpu/event=cpu-cycles/
> >    CPU4              74,761,132      cpu/event=cpu-cycles/
> >    CPU5              76,320,191      cpu/event=cpu-cycles/
> >    CPU6              55,100,175      cpu/event=cpu-cycles/
> >    CPU7              48,472,895      cpu/event=cpu-cycles/
> > 
> >         1.074800857 seconds time elapsed
> > 
> 
> The results are different.
> 
> With --percore-show-thread, CPU0 and CPU4 have the same counts (CPU0 and
> CPU4 are siblings, e.g. 2,453,061 in my example). The value is sum of CPU0 +
> CPU4.

so it shows percore stats but displays all the cpus? what is this good for?
to see which cpus are in core? if that's the case then I think we could
somehow display the cpu numbers for core in --per-core output, like:

S0-D0-C0(0,4)                395,072      cpu/event=cpu-cycles,percore/
S0-D0-C1(1,5)                851,248      cpu/event=cpu-cycles,percore/
S0-D0-C2(2,6)                954,226      cpu/event=cpu-cycles,percore/
S0-D0-C3(3,7)              1,233,659      cpu/event=cpu-cycles,percore/


> 
> Without --percore-show-thread, CPU0 and CPU4 have their own counts.
> 
> > also the interval output is mangled:
> > 
> >    # ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -I 1000
> >    #           time CPU                    counts unit events
> >       1.000177375      1.000177375 CPU0             138,483,540      cpu/event=cpu-cycles,percore/
> >       1.000177375      1.000177375 CPU1             143,159,477      cpu/event=cpu-cycles,percore/
> >       1.000177375      1.000177375 CPU2             177,554,642      cpu/event=cpu-cycles,percore/
> >       1.000177375      1.000177375 CPU3             150,974,512      cpu/event=cpu-cycles,percore/
> >       1.000177375      1.000177375 CPU4             138,483,540      cpu/event=cpu-cycles,percore/
> >       1.000177375      1.000177375 CPU5             143,159,477      cpu/event=cpu-cycles,percore/
> >       1.000177375      1.000177375 CPU6             177,554,642      cpu/event=cpu-cycles,percore/
> > 
> > jirka
> > 
> 
> Sorry, why the interval output is mangled? It's expected that CPU0 and CPU4
> have the same counts.

there are 2 timestamp columns and the header line does
not align with the data

jirka


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] perf stat: Show percore counts in per CPU output
  2020-02-10 14:01     ` Jiri Olsa
@ 2020-02-10 17:01       ` Andi Kleen
  2020-02-10 21:04         ` Jiri Olsa
  0 siblings, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2020-02-10 17:01 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Jin, Yao, acme, jolsa, peterz, mingo, alexander.shishkin,
	Linux-kernel, kan.liang, yao.jin

> > With --percore-show-thread, CPU0 and CPU4 have the same counts (CPU0 and
> > CPU4 are siblings, e.g. 2,453,061 in my example). The value is sum of CPU0 +
> > CPU4.
> 
> so it shows percore stats but displays all the cpus? what is this good for?

This is essentially a replacement for the any bit (which is gone in Icelake).
Per core counts are useful for some formulas, e.g. CoreIPC

The original percore version was inconvenient to post process. This
variant matches the output of the any bit.

-Andi

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] perf stat: Show percore counts in per CPU output
  2020-02-10 17:01       ` Andi Kleen
@ 2020-02-10 21:04         ` Jiri Olsa
  2020-02-11  1:48           ` Jin, Yao
  0 siblings, 1 reply; 7+ messages in thread
From: Jiri Olsa @ 2020-02-10 21:04 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Jin, Yao, acme, jolsa, peterz, mingo, alexander.shishkin,
	Linux-kernel, kan.liang, yao.jin

On Mon, Feb 10, 2020 at 09:01:59AM -0800, Andi Kleen wrote:
> > > With --percore-show-thread, CPU0 and CPU4 have the same counts (CPU0 and
> > > CPU4 are siblings, e.g. 2,453,061 in my example). The value is sum of CPU0 +
> > > CPU4.
> > 
> > so it shows percore stats but displays all the cpus? what is this good for?
> 
> This is essentially a replacement for the any bit (which is gone in Icelake).
> Per core counts are useful for some formulas, e.g. CoreIPC
> 
> The original percore version was inconvenient to post process. This
> variant matches the output of the any bit.

I see, please put this to the changelog/doc

thanks,
jirka


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] perf stat: Show percore counts in per CPU output
  2020-02-10 21:04         ` Jiri Olsa
@ 2020-02-11  1:48           ` Jin, Yao
  0 siblings, 0 replies; 7+ messages in thread
From: Jin, Yao @ 2020-02-11  1:48 UTC (permalink / raw)
  To: Jiri Olsa, Andi Kleen
  Cc: acme, jolsa, peterz, mingo, alexander.shishkin, Linux-kernel,
	kan.liang, yao.jin



On 2/11/2020 5:04 AM, Jiri Olsa wrote:
> On Mon, Feb 10, 2020 at 09:01:59AM -0800, Andi Kleen wrote:
>>>> With --percore-show-thread, CPU0 and CPU4 have the same counts (CPU0 and
>>>> CPU4 are siblings, e.g. 2,453,061 in my example). The value is sum of CPU0 +
>>>> CPU4.
>>>
>>> so it shows percore stats but displays all the cpus? what is this good for?
>>
>> This is essentially a replacement for the any bit (which is gone in Icelake).
>> Per core counts are useful for some formulas, e.g. CoreIPC
>>
>> The original percore version was inconvenient to post process. This
>> variant matches the output of the any bit.
> 
> I see, please put this to the changelog/doc
> 
> thanks,
> jirka
> 

Thanks Jiri, thanks Andi!

I will put the explanation in v2.

Thanks
Jin Yao

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-02-11  1:49 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-06  1:56 [PATCH] perf stat: Show percore counts in per CPU output Jin Yao
2020-02-10 13:28 ` Jiri Olsa
2020-02-10 13:46   ` Jin, Yao
2020-02-10 14:01     ` Jiri Olsa
2020-02-10 17:01       ` Andi Kleen
2020-02-10 21:04         ` Jiri Olsa
2020-02-11  1:48           ` Jin, Yao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).