linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] perf stat: Merge uncore events by default for hybrid platform
@ 2021-06-16  6:30 Jin Yao
  2021-07-06  2:32 ` Jin, Yao
  0 siblings, 1 reply; 4+ messages in thread
From: Jin Yao @ 2021-06-16  6:30 UTC (permalink / raw)
  To: acme, jolsa, peterz, mingo, alexander.shishkin
  Cc: Linux-kernel, ak, kan.liang, yao.jin, Jin Yao

On hybrid platform, by default stat aggregates and reports the event counts
per pmu. For example,

  # perf stat -e cycles -a true

   Performance counter stats for 'system wide':

           1,400,445      cpu_core/cycles/
             680,881      cpu_atom/cycles/

         0.001770773 seconds time elapsed

While for uncore events, that's not a suitable method. Uncore has nothing
to do with hybrid. So for uncore events, we aggregate event counts from all
PMUs and report the counts without PMUs.

Before:

  # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true

   Performance counter stats for 'system wide':

               2,058      uncore_arb_0/event=0x81,umask=0x1/
               2,028      uncore_arb_1/event=0x81,umask=0x1/
                   0      uncore_arb_0/event=0x84,umask=0x1/
                   0      uncore_arb_1/event=0x84,umask=0x1/

         0.000614498 seconds time elapsed

After:

  # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true

   Performance counter stats for 'system wide':

               3,996      arb/event=0x81,umask=0x1/
                   0      arb/event=0x84,umask=0x1/

         0.000630046 seconds time elapsed

Of course, we also keep the '--no-merge' still works for uncore events.

  # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ --no-merge true

   Performance counter stats for 'system wide':

               1,952      uncore_arb_0/event=0x81,umask=0x1/
               1,921      uncore_arb_1/event=0x81,umask=0x1/
                   0      uncore_arb_0/event=0x84,umask=0x1/
                   0      uncore_arb_1/event=0x84,umask=0x1/

         0.000575536 seconds time elapsed

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
 tools/perf/builtin-stat.c      |  3 ---
 tools/perf/util/stat-display.c | 29 +++++++++++++++++++++++++----
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f9f74a514315..b67a44982b61 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -2442,9 +2442,6 @@ int cmd_stat(int argc, const char **argv)
 
 	evlist__check_cpu_maps(evsel_list);
 
-	if (perf_pmu__has_hybrid())
-		stat_config.no_merge = true;
-
 	/*
 	 * Initialize thread_map with comm names,
 	 * so we could print it out on output.
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index b759dfd633b4..c6070f4684ca 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -595,6 +595,19 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
 	}
 }
 
+static bool is_uncore(struct evsel *evsel)
+{
+	struct perf_pmu *pmu;
+
+	if (evsel->pmu_name) {
+		pmu = perf_pmu__find(evsel->pmu_name);
+		if (pmu)
+			return pmu->is_uncore;
+	}
+
+	return false;
+}
+
 static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
 			    void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
 				       bool first),
@@ -603,10 +616,18 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
 	if (counter->merged_stat)
 		return false;
 	cb(config, counter, data, true);
-	if (config->no_merge)
-		uniquify_event_name(counter);
-	else if (counter->auto_merge_stats)
-		collect_all_aliases(config, counter, cb, data);
+	if (perf_pmu__has_hybrid()) {
+		if (config->no_merge || !is_uncore(counter))
+			uniquify_event_name(counter);
+		else if (counter->auto_merge_stats)
+			collect_all_aliases(config, counter, cb, data);
+	} else {
+		if (config->no_merge)
+			uniquify_event_name(counter);
+		else if (counter->auto_merge_stats)
+			collect_all_aliases(config, counter, cb, data);
+	}
+
 	return true;
 }
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] perf stat: Merge uncore events by default for hybrid platform
  2021-06-16  6:30 [PATCH] perf stat: Merge uncore events by default for hybrid platform Jin Yao
@ 2021-07-06  2:32 ` Jin, Yao
  2021-07-06 19:51   ` Jiri Olsa
  0 siblings, 1 reply; 4+ messages in thread
From: Jin, Yao @ 2021-07-06  2:32 UTC (permalink / raw)
  To: acme, jolsa, peterz, mingo, alexander.shishkin
  Cc: Linux-kernel, ak, kan.liang, yao.jin

Hi,

On 6/16/2021 2:30 PM, Jin Yao wrote:
> On hybrid platform, by default stat aggregates and reports the event counts
> per pmu. For example,
> 
>    # perf stat -e cycles -a true
> 
>     Performance counter stats for 'system wide':
> 
>             1,400,445      cpu_core/cycles/
>               680,881      cpu_atom/cycles/
> 
>           0.001770773 seconds time elapsed
> 
> While for uncore events, that's not a suitable method. Uncore has nothing
> to do with hybrid. So for uncore events, we aggregate event counts from all
> PMUs and report the counts without PMUs.
> 
> Before:
> 
>    # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true
> 
>     Performance counter stats for 'system wide':
> 
>                 2,058      uncore_arb_0/event=0x81,umask=0x1/
>                 2,028      uncore_arb_1/event=0x81,umask=0x1/
>                     0      uncore_arb_0/event=0x84,umask=0x1/
>                     0      uncore_arb_1/event=0x84,umask=0x1/
> 
>           0.000614498 seconds time elapsed
> 
> After:
> 
>    # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true
> 
>     Performance counter stats for 'system wide':
> 
>                 3,996      arb/event=0x81,umask=0x1/
>                     0      arb/event=0x84,umask=0x1/
> 
>           0.000630046 seconds time elapsed
> 
> Of course, we also keep the '--no-merge' still works for uncore events.
> 
>    # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ --no-merge true
> 
>     Performance counter stats for 'system wide':
> 
>                 1,952      uncore_arb_0/event=0x81,umask=0x1/
>                 1,921      uncore_arb_1/event=0x81,umask=0x1/
>                     0      uncore_arb_0/event=0x84,umask=0x1/
>                     0      uncore_arb_1/event=0x84,umask=0x1/
> 
>           0.000575536 seconds time elapsed
> 
> Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
> ---
>   tools/perf/builtin-stat.c      |  3 ---
>   tools/perf/util/stat-display.c | 29 +++++++++++++++++++++++++----
>   2 files changed, 25 insertions(+), 7 deletions(-)
> 
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index f9f74a514315..b67a44982b61 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -2442,9 +2442,6 @@ int cmd_stat(int argc, const char **argv)
>   
>   	evlist__check_cpu_maps(evsel_list);
>   
> -	if (perf_pmu__has_hybrid())
> -		stat_config.no_merge = true;
> -
>   	/*
>   	 * Initialize thread_map with comm names,
>   	 * so we could print it out on output.
> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> index b759dfd633b4..c6070f4684ca 100644
> --- a/tools/perf/util/stat-display.c
> +++ b/tools/perf/util/stat-display.c
> @@ -595,6 +595,19 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
>   	}
>   }
>   
> +static bool is_uncore(struct evsel *evsel)
> +{
> +	struct perf_pmu *pmu;
> +
> +	if (evsel->pmu_name) {
> +		pmu = perf_pmu__find(evsel->pmu_name);
> +		if (pmu)
> +			return pmu->is_uncore;
> +	}
> +
> +	return false;
> +}
> +
>   static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>   			    void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
>   				       bool first),
> @@ -603,10 +616,18 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>   	if (counter->merged_stat)
>   		return false;
>   	cb(config, counter, data, true);
> -	if (config->no_merge)
> -		uniquify_event_name(counter);
> -	else if (counter->auto_merge_stats)
> -		collect_all_aliases(config, counter, cb, data);
> +	if (perf_pmu__has_hybrid()) {
> +		if (config->no_merge || !is_uncore(counter))
> +			uniquify_event_name(counter);
> +		else if (counter->auto_merge_stats)
> +			collect_all_aliases(config, counter, cb, data);
> +	} else {
> +		if (config->no_merge)
> +			uniquify_event_name(counter);
> +		else if (counter->auto_merge_stats)
> +			collect_all_aliases(config, counter, cb, data);
> +	}
> +
>   	return true;
>   }
>   
> 

Any comments for this patch? :)

Thanks
Jin Yao

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] perf stat: Merge uncore events by default for hybrid platform
  2021-07-06  2:32 ` Jin, Yao
@ 2021-07-06 19:51   ` Jiri Olsa
  2021-07-07  5:44     ` Jin, Yao
  0 siblings, 1 reply; 4+ messages in thread
From: Jiri Olsa @ 2021-07-06 19:51 UTC (permalink / raw)
  To: Jin, Yao
  Cc: acme, jolsa, peterz, mingo, alexander.shishkin, Linux-kernel, ak,
	kan.liang, yao.jin

On Tue, Jul 06, 2021 at 10:32:57AM +0800, Jin, Yao wrote:
> Hi,
> 
> On 6/16/2021 2:30 PM, Jin Yao wrote:
> > On hybrid platform, by default stat aggregates and reports the event counts
> > per pmu. For example,
> > 
> >    # perf stat -e cycles -a true
> > 
> >     Performance counter stats for 'system wide':
> > 
> >             1,400,445      cpu_core/cycles/
> >               680,881      cpu_atom/cycles/
> > 
> >           0.001770773 seconds time elapsed
> > 
> > While for uncore events, that's not a suitable method. Uncore has nothing
> > to do with hybrid. So for uncore events, we aggregate event counts from all
> > PMUs and report the counts without PMUs.
> > 
> > Before:
> > 
> >    # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true
> > 
> >     Performance counter stats for 'system wide':
> > 
> >                 2,058      uncore_arb_0/event=0x81,umask=0x1/
> >                 2,028      uncore_arb_1/event=0x81,umask=0x1/
> >                     0      uncore_arb_0/event=0x84,umask=0x1/
> >                     0      uncore_arb_1/event=0x84,umask=0x1/
> > 
> >           0.000614498 seconds time elapsed
> > 
> > After:
> > 
> >    # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true
> > 
> >     Performance counter stats for 'system wide':
> > 
> >                 3,996      arb/event=0x81,umask=0x1/
> >                     0      arb/event=0x84,umask=0x1/
> > 
> >           0.000630046 seconds time elapsed
> > 
> > Of course, we also keep the '--no-merge' still works for uncore events.
> > 
> >    # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ --no-merge true
> > 
> >     Performance counter stats for 'system wide':
> > 
> >                 1,952      uncore_arb_0/event=0x81,umask=0x1/
> >                 1,921      uncore_arb_1/event=0x81,umask=0x1/
> >                     0      uncore_arb_0/event=0x84,umask=0x1/
> >                     0      uncore_arb_1/event=0x84,umask=0x1/
> > 
> >           0.000575536 seconds time elapsed
> > 
> > Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
> > ---
> >   tools/perf/builtin-stat.c      |  3 ---
> >   tools/perf/util/stat-display.c | 29 +++++++++++++++++++++++++----
> >   2 files changed, 25 insertions(+), 7 deletions(-)
> > 
> > diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> > index f9f74a514315..b67a44982b61 100644
> > --- a/tools/perf/builtin-stat.c
> > +++ b/tools/perf/builtin-stat.c
> > @@ -2442,9 +2442,6 @@ int cmd_stat(int argc, const char **argv)
> >   	evlist__check_cpu_maps(evsel_list);
> > -	if (perf_pmu__has_hybrid())
> > -		stat_config.no_merge = true;
> > -
> >   	/*
> >   	 * Initialize thread_map with comm names,
> >   	 * so we could print it out on output.
> > diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> > index b759dfd633b4..c6070f4684ca 100644
> > --- a/tools/perf/util/stat-display.c
> > +++ b/tools/perf/util/stat-display.c
> > @@ -595,6 +595,19 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
> >   	}
> >   }
> > +static bool is_uncore(struct evsel *evsel)
> > +{
> > +	struct perf_pmu *pmu;
> > +
> > +	if (evsel->pmu_name) {
> > +		pmu = perf_pmu__find(evsel->pmu_name);

evsel__find_pmu might be one line shorter? ;-)


> > +		if (pmu)
> > +			return pmu->is_uncore;
> > +	}
> > +
> > +	return false;
> > +}
> > +
> >   static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
> >   			    void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
> >   				       bool first),
> > @@ -603,10 +616,18 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
> >   	if (counter->merged_stat)
> >   		return false;
> >   	cb(config, counter, data, true);
> > -	if (config->no_merge)
> > -		uniquify_event_name(counter);
> > -	else if (counter->auto_merge_stats)
> > -		collect_all_aliases(config, counter, cb, data);
> > +	if (perf_pmu__has_hybrid()) {
> > +		if (config->no_merge || !is_uncore(counter))

hum, this is all the same except for the !is_uncore condition, right?

could we just add 'config->no_merge || hybrid_uniquify(count)'

that would cover both perf_pmu__has_hybrid and !is_uncore conditions?

jirka

> > +			uniquify_event_name(counter);
> > +		else if (counter->auto_merge_stats)
> > +			collect_all_aliases(config, counter, cb, data);
> > +	} else {
> > +		if (config->no_merge)
> > +			uniquify_event_name(counter);
> > +		else if (counter->auto_merge_stats)
> > +			collect_all_aliases(config, counter, cb, data);
> > +	}
> > +
> >   	return true;
> >   }
> > 
> 
> Any comments for this patch? :)
> 
> Thanks
> Jin Yao
> 


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] perf stat: Merge uncore events by default for hybrid platform
  2021-07-06 19:51   ` Jiri Olsa
@ 2021-07-07  5:44     ` Jin, Yao
  0 siblings, 0 replies; 4+ messages in thread
From: Jin, Yao @ 2021-07-07  5:44 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: acme, jolsa, peterz, mingo, alexander.shishkin, Linux-kernel, ak,
	kan.liang, yao.jin

Hi Jiri,

On 7/7/2021 3:51 AM, Jiri Olsa wrote:
> On Tue, Jul 06, 2021 at 10:32:57AM +0800, Jin, Yao wrote:
>> Hi,
>>
>> On 6/16/2021 2:30 PM, Jin Yao wrote:
>>> On hybrid platform, by default stat aggregates and reports the event counts
>>> per pmu. For example,
>>>
>>>     # perf stat -e cycles -a true
>>>
>>>      Performance counter stats for 'system wide':
>>>
>>>              1,400,445      cpu_core/cycles/
>>>                680,881      cpu_atom/cycles/
>>>
>>>            0.001770773 seconds time elapsed
>>>
>>> While for uncore events, that's not a suitable method. Uncore has nothing
>>> to do with hybrid. So for uncore events, we aggregate event counts from all
>>> PMUs and report the counts without PMUs.
>>>
>>> Before:
>>>
>>>     # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true
>>>
>>>      Performance counter stats for 'system wide':
>>>
>>>                  2,058      uncore_arb_0/event=0x81,umask=0x1/
>>>                  2,028      uncore_arb_1/event=0x81,umask=0x1/
>>>                      0      uncore_arb_0/event=0x84,umask=0x1/
>>>                      0      uncore_arb_1/event=0x84,umask=0x1/
>>>
>>>            0.000614498 seconds time elapsed
>>>
>>> After:
>>>
>>>     # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true
>>>
>>>      Performance counter stats for 'system wide':
>>>
>>>                  3,996      arb/event=0x81,umask=0x1/
>>>                      0      arb/event=0x84,umask=0x1/
>>>
>>>            0.000630046 seconds time elapsed
>>>
>>> Of course, we also keep the '--no-merge' still works for uncore events.
>>>
>>>     # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ --no-merge true
>>>
>>>      Performance counter stats for 'system wide':
>>>
>>>                  1,952      uncore_arb_0/event=0x81,umask=0x1/
>>>                  1,921      uncore_arb_1/event=0x81,umask=0x1/
>>>                      0      uncore_arb_0/event=0x84,umask=0x1/
>>>                      0      uncore_arb_1/event=0x84,umask=0x1/
>>>
>>>            0.000575536 seconds time elapsed
>>>
>>> Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
>>> ---
>>>    tools/perf/builtin-stat.c      |  3 ---
>>>    tools/perf/util/stat-display.c | 29 +++++++++++++++++++++++++----
>>>    2 files changed, 25 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
>>> index f9f74a514315..b67a44982b61 100644
>>> --- a/tools/perf/builtin-stat.c
>>> +++ b/tools/perf/builtin-stat.c
>>> @@ -2442,9 +2442,6 @@ int cmd_stat(int argc, const char **argv)
>>>    	evlist__check_cpu_maps(evsel_list);
>>> -	if (perf_pmu__has_hybrid())
>>> -		stat_config.no_merge = true;
>>> -
>>>    	/*
>>>    	 * Initialize thread_map with comm names,
>>>    	 * so we could print it out on output.
>>> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
>>> index b759dfd633b4..c6070f4684ca 100644
>>> --- a/tools/perf/util/stat-display.c
>>> +++ b/tools/perf/util/stat-display.c
>>> @@ -595,6 +595,19 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
>>>    	}
>>>    }
>>> +static bool is_uncore(struct evsel *evsel)
>>> +{
>>> +	struct perf_pmu *pmu;
>>> +
>>> +	if (evsel->pmu_name) {
>>> +		pmu = perf_pmu__find(evsel->pmu_name);
> 
> evsel__find_pmu might be one line shorter? ;-)
> 

Yes, this is a better method, thanks!

> 
>>> +		if (pmu)
>>> +			return pmu->is_uncore;
>>> +	}
>>> +
>>> +	return false;
>>> +}
>>> +
>>>    static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>>>    			    void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
>>>    				       bool first),
>>> @@ -603,10 +616,18 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>>>    	if (counter->merged_stat)
>>>    		return false;
>>>    	cb(config, counter, data, true);
>>> -	if (config->no_merge)
>>> -		uniquify_event_name(counter);
>>> -	else if (counter->auto_merge_stats)
>>> -		collect_all_aliases(config, counter, cb, data);
>>> +	if (perf_pmu__has_hybrid()) {
>>> +		if (config->no_merge || !is_uncore(counter))
> 
> hum, this is all the same except for the !is_uncore condition, right?
> 
> could we just add 'config->no_merge || hybrid_uniquify(count)'
> 
> that would cover both perf_pmu__has_hybrid and !is_uncore conditions?
> 

Yes, I will create a new function 'hybrid_uniquify' to check if uniquify event name for hybrid.

Thanks
Jin Yao

> jirka
> 
>>> +			uniquify_event_name(counter);
>>> +		else if (counter->auto_merge_stats)
>>> +			collect_all_aliases(config, counter, cb, data);
>>> +	} else {
>>> +		if (config->no_merge)
>>> +			uniquify_event_name(counter);
>>> +		else if (counter->auto_merge_stats)
>>> +			collect_all_aliases(config, counter, cb, data);
>>> +	}
>>> +
>>>    	return true;
>>>    }
>>>
>>
>> Any comments for this patch? :)
>>
>> Thanks
>> Jin Yao
>>
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-07-07  5:44 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-16  6:30 [PATCH] perf stat: Merge uncore events by default for hybrid platform Jin Yao
2021-07-06  2:32 ` Jin, Yao
2021-07-06 19:51   ` Jiri Olsa
2021-07-07  5:44     ` Jin, Yao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).