All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf evsel: Fix missing exclude_{host,guest} setting
@ 2021-10-16  5:12 Namhyung Kim
  2021-10-17 19:05 ` Jiri Olsa
  0 siblings, 1 reply; 6+ messages in thread
From: Namhyung Kim @ 2021-10-16  5:12 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Jiri Olsa
  Cc: Ingo Molnar, Peter Zijlstra, LKML, Andi Kleen, Ian Rogers,
	Stephane Eranian

The current logic for the perf missing feature has a bug that it can
wrongly clear some modifiers like G or H.  Actually some PMUs don't
support any filtering or exclusion while others do.  But we check it
as a global feature.

For example, the cycles event can have 'G' modifier to enable it only
in the guest mode on x86.  When you don't run any VMs it'll return 0.

  # perf stat -a -e cycles:G sleep 1

    Performance counter stats for 'system wide':

                    0      cycles:G

          1.000721670 seconds time elapsed

But when it's used with other pmu events that don't support G modifier,
it'll be reset and return non-zero values.

  # perf stat -a -e cycles:G,msr/tsc/ sleep 1

    Performance counter stats for 'system wide':

          538,029,960      cycles:G
       16,924,010,738      msr/tsc/

          1.001815327 seconds time elapsed

This is because of the missing feature detection logic being global.
Add a hashmap to set pmu-specific exclude_host/guest features.

Reported-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/evsel.c | 47 +++++++++++++++++++++++++++++++++++------
 tools/perf/util/evsel.h |  6 ++++++
 2 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index dbfeceb2546c..437a28e769fe 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1434,6 +1434,10 @@ void evsel__delete(struct evsel *evsel)
 {
 	evsel__exit(evsel);
 	free(evsel);
+
+	/* just free it for the first evsel */
+	hashmap__free(perf_missing_features.pmu);
+	perf_missing_features.pmu = NULL;
 }
 
 void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
@@ -1791,6 +1795,23 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
 	return 0;
 }
 
+#define PMU_HASH_BITS  4
+
+static size_t pmu_hash(const void *key, void *ctx __maybe_unused)
+{
+	const struct evsel *evsel = key;
+
+	return hash_bits(evsel->core.attr.type, PMU_HASH_BITS);
+}
+
+static bool pmu_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
+{
+	const struct evsel *a = key1;
+	const struct evsel *b = key2;
+
+	return a->core.attr.type == b->core.attr.type;
+}
+
 static void evsel__disable_missing_features(struct evsel *evsel)
 {
 	if (perf_missing_features.weight_struct) {
@@ -1807,8 +1828,14 @@ static void evsel__disable_missing_features(struct evsel *evsel)
 		evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
 	if (perf_missing_features.mmap2)
 		evsel->core.attr.mmap2 = 0;
-	if (perf_missing_features.exclude_guest)
-		evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
+	if (perf_missing_features.exclude_guest) {
+		void *pmu;
+
+		if (hashmap__find(perf_missing_features.pmu, evsel, &pmu)) {
+			evsel->core.attr.exclude_guest = 0;
+			evsel->core.attr.exclude_host = 0;
+		}
+	}
 	if (perf_missing_features.lbr_flags)
 		evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
 				     PERF_SAMPLE_BRANCH_NO_CYCLES);
@@ -1840,6 +1867,9 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
 
 bool evsel__detect_missing_features(struct evsel *evsel)
 {
+	if (perf_missing_features.pmu == NULL)
+		perf_missing_features.pmu = hashmap__new(pmu_hash, pmu_equal, NULL);
+
 	/*
 	 * Must probe features in the order they were added to the
 	 * perf_event_attr interface.
@@ -1900,10 +1930,15 @@ bool evsel__detect_missing_features(struct evsel *evsel)
 		perf_missing_features.mmap2 = true;
 		pr_debug2_peo("switching off mmap2\n");
 		return true;
-	} else if (!perf_missing_features.exclude_guest &&
-		   (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
-		perf_missing_features.exclude_guest = true;
-		pr_debug2_peo("switching off exclude_guest, exclude_host\n");
+	} else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
+		   !hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
+		struct perf_missing_pmu_features pmu_features = { true };
+		hashmap__add(perf_missing_features.pmu, evsel, &pmu_features);
+
+		if (!perf_missing_features.exclude_guest) {
+			perf_missing_features.exclude_guest = true;
+			pr_debug2_peo("switching off exclude_guest, exclude_host\n");
+		}
 		return true;
 	} else if (!perf_missing_features.sample_id_all) {
 		perf_missing_features.sample_id_all = true;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 1f7edfa8568a..8dd11c8e022d 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -172,6 +172,12 @@ struct perf_missing_features {
 	bool data_page_size;
 	bool code_page_size;
 	bool weight_struct;
+
+	struct hashmap *pmu;
+};
+
+struct perf_missing_pmu_features {
+	bool exclude_guest;
 };
 
 extern struct perf_missing_features perf_missing_features;
-- 
2.33.0.1079.g6e70778dc9-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] perf evsel: Fix missing exclude_{host,guest} setting
  2021-10-16  5:12 [PATCH] perf evsel: Fix missing exclude_{host,guest} setting Namhyung Kim
@ 2021-10-17 19:05 ` Jiri Olsa
  2021-10-17 21:03   ` Arnaldo Carvalho de Melo
  2021-10-18  4:44   ` Namhyung Kim
  0 siblings, 2 replies; 6+ messages in thread
From: Jiri Olsa @ 2021-10-17 19:05 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Ingo Molnar, Peter Zijlstra, LKML,
	Andi Kleen, Ian Rogers, Stephane Eranian

On Fri, Oct 15, 2021 at 10:12:53PM -0700, Namhyung Kim wrote:
> The current logic for the perf missing feature has a bug that it can
> wrongly clear some modifiers like G or H.  Actually some PMUs don't
> support any filtering or exclusion while others do.  But we check it
> as a global feature.
> 
> For example, the cycles event can have 'G' modifier to enable it only
> in the guest mode on x86.  When you don't run any VMs it'll return 0.
> 
>   # perf stat -a -e cycles:G sleep 1
> 
>     Performance counter stats for 'system wide':
> 
>                     0      cycles:G
> 
>           1.000721670 seconds time elapsed
> 
> But when it's used with other pmu events that don't support G modifier,
> it'll be reset and return non-zero values.
> 
>   # perf stat -a -e cycles:G,msr/tsc/ sleep 1
> 
>     Performance counter stats for 'system wide':
> 
>           538,029,960      cycles:G
>        16,924,010,738      msr/tsc/
> 
>           1.001815327 seconds time elapsed
> 
> This is because of the missing feature detection logic being global.
> Add a hashmap to set pmu-specific exclude_host/guest features.
> 
> Reported-by: Stephane Eranian <eranian@google.com>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/util/evsel.c | 47 +++++++++++++++++++++++++++++++++++------
>  tools/perf/util/evsel.h |  6 ++++++
>  2 files changed, 47 insertions(+), 6 deletions(-)
> 
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index dbfeceb2546c..437a28e769fe 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1434,6 +1434,10 @@ void evsel__delete(struct evsel *evsel)
>  {
>  	evsel__exit(evsel);
>  	free(evsel);
> +
> +	/* just free it for the first evsel */
> +	hashmap__free(perf_missing_features.pmu);
> +	perf_missing_features.pmu = NULL;
>  }
>  
>  void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
> @@ -1791,6 +1795,23 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>  	return 0;
>  }
>  
> +#define PMU_HASH_BITS  4
> +
> +static size_t pmu_hash(const void *key, void *ctx __maybe_unused)
> +{
> +	const struct evsel *evsel = key;
> +
> +	return hash_bits(evsel->core.attr.type, PMU_HASH_BITS);
> +}
> +
> +static bool pmu_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
> +{
> +	const struct evsel *a = key1;
> +	const struct evsel *b = key2;
> +
> +	return a->core.attr.type == b->core.attr.type;
> +}
> +
>  static void evsel__disable_missing_features(struct evsel *evsel)
>  {
>  	if (perf_missing_features.weight_struct) {
> @@ -1807,8 +1828,14 @@ static void evsel__disable_missing_features(struct evsel *evsel)
>  		evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
>  	if (perf_missing_features.mmap2)
>  		evsel->core.attr.mmap2 = 0;
> -	if (perf_missing_features.exclude_guest)
> -		evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
> +	if (perf_missing_features.exclude_guest) {
> +		void *pmu;

could you just pass NULL in here instead of NULL?

> +
> +		if (hashmap__find(perf_missing_features.pmu, evsel, &pmu)) {
> +			evsel->core.attr.exclude_guest = 0;
> +			evsel->core.attr.exclude_host = 0;
> +		}
> +	}
>  	if (perf_missing_features.lbr_flags)
>  		evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
>  				     PERF_SAMPLE_BRANCH_NO_CYCLES);
> @@ -1840,6 +1867,9 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>  
>  bool evsel__detect_missing_features(struct evsel *evsel)
>  {
> +	if (perf_missing_features.pmu == NULL)
> +		perf_missing_features.pmu = hashmap__new(pmu_hash, pmu_equal, NULL);
> +
>  	/*
>  	 * Must probe features in the order they were added to the
>  	 * perf_event_attr interface.
> @@ -1900,10 +1930,15 @@ bool evsel__detect_missing_features(struct evsel *evsel)
>  		perf_missing_features.mmap2 = true;
>  		pr_debug2_peo("switching off mmap2\n");
>  		return true;
> -	} else if (!perf_missing_features.exclude_guest &&
> -		   (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
> -		perf_missing_features.exclude_guest = true;
> -		pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> +	} else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
> +		   !hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
> +		struct perf_missing_pmu_features pmu_features = { true };

missing new line after declaration

> +		hashmap__add(perf_missing_features.pmu, evsel, &pmu_features);
> +
> +		if (!perf_missing_features.exclude_guest) {
> +			perf_missing_features.exclude_guest = true;
> +			pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> +		}
>  		return true;
>  	} else if (!perf_missing_features.sample_id_all) {
>  		perf_missing_features.sample_id_all = true;
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 1f7edfa8568a..8dd11c8e022d 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -172,6 +172,12 @@ struct perf_missing_features {
>  	bool data_page_size;
>  	bool code_page_size;
>  	bool weight_struct;
> +
> +	struct hashmap *pmu;
> +};
> +
> +struct perf_missing_pmu_features {
> +	bool exclude_guest;
>  };

hum, is this really needed? I think you could just pass '1' as value,
because you care only if the item is hashed, right?

in any case the value is the current stack address of the
  struct perf_missing_pmu_features pmu_features = { true };

so it might as well be just '1' ... I was confused at the beggining
and looked for the reason of this struct ;-)

we do that already in util/stat.c

jirka

>  
>  extern struct perf_missing_features perf_missing_features;
> -- 
> 2.33.0.1079.g6e70778dc9-goog
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] perf evsel: Fix missing exclude_{host,guest} setting
  2021-10-17 19:05 ` Jiri Olsa
@ 2021-10-17 21:03   ` Arnaldo Carvalho de Melo
  2021-10-17 21:06     ` Jiri Olsa
  2021-10-18  4:44   ` Namhyung Kim
  1 sibling, 1 reply; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2021-10-17 21:03 UTC (permalink / raw)
  To: Jiri Olsa, Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Ingo Molnar, Peter Zijlstra, LKML,
	Andi Kleen, Ian Rogers, Stephane Eranian



On October 17, 2021 4:05:46 PM GMT-03:00, Jiri Olsa <jolsa@redhat.com> wrote:
>On Fri, Oct 15, 2021 at 10:12:53PM -0700, Namhyung Kim wrote:
>> The current logic for the perf missing feature has a bug that it can
>> wrongly clear some modifiers like G or H.  Actually some PMUs don't
>> support any filtering or exclusion while others do.  But we check it
>> as a global feature.
>> 
>> For example, the cycles event can have 'G' modifier to enable it only
>> in the guest mode on x86.  When you don't run any VMs it'll return 0.
>> 
>>   # perf stat -a -e cycles:G sleep 1
>> 
>>     Performance counter stats for 'system wide':
>> 
>>                     0      cycles:G
>> 
>>           1.000721670 seconds time elapsed
>> 
>> But when it's used with other pmu events that don't support G modifier,
>> it'll be reset and return non-zero values.
>> 
>>   # perf stat -a -e cycles:G,msr/tsc/ sleep 1
>> 
>>     Performance counter stats for 'system wide':
>> 
>>           538,029,960      cycles:G
>>        16,924,010,738      msr/tsc/
>> 
>>           1.001815327 seconds time elapsed
>> 
>> This is because of the missing feature detection logic being global.
>> Add a hashmap to set pmu-specific exclude_host/guest features.
>> 
>> Reported-by: Stephane Eranian <eranian@google.com>
>> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
>> ---
>>  tools/perf/util/evsel.c | 47 +++++++++++++++++++++++++++++++++++------
>>  tools/perf/util/evsel.h |  6 ++++++
>>  2 files changed, 47 insertions(+), 6 deletions(-)
>> 
>> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
>> index dbfeceb2546c..437a28e769fe 100644
>> --- a/tools/perf/util/evsel.c
>> +++ b/tools/perf/util/evsel.c
>> @@ -1434,6 +1434,10 @@ void evsel__delete(struct evsel *evsel)
>>  {
>>  	evsel__exit(evsel);
>>  	free(evsel);
>> +
>> +	/* just free it for the first evsel */
>> +	hashmap__free(perf_missing_features.pmu);
>> +	perf_missing_features.pmu = NULL;
>>  }
>>  
>>  void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
>> @@ -1791,6 +1795,23 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>>  	return 0;
>>  }
>>  
>> +#define PMU_HASH_BITS  4
>> +
>> +static size_t pmu_hash(const void *key, void *ctx __maybe_unused)
>> +{
>> +	const struct evsel *evsel = key;
>> +
>> +	return hash_bits(evsel->core.attr.type, PMU_HASH_BITS);
>> +}
>> +
>> +static bool pmu_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
>> +{
>> +	const struct evsel *a = key1;
>> +	const struct evsel *b = key2;
>> +
>> +	return a->core.attr.type == b->core.attr.type;
>> +}
>> +
>>  static void evsel__disable_missing_features(struct evsel *evsel)
>>  {
>>  	if (perf_missing_features.weight_struct) {
>> @@ -1807,8 +1828,14 @@ static void evsel__disable_missing_features(struct evsel *evsel)
>>  		evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
>>  	if (perf_missing_features.mmap2)
>>  		evsel->core.attr.mmap2 = 0;
>> -	if (perf_missing_features.exclude_guest)
>> -		evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
>> +	if (perf_missing_features.exclude_guest) {
>> +		void *pmu;
>
>could you just pass NULL in here instead of NULL?

ENOPARSE 

>
>> +
>> +		if (hashmap__find(perf_missing_features.pmu, evsel, &pmu)) {
>> +			evsel->core.attr.exclude_guest = 0;
>> +			evsel->core.attr.exclude_host = 0;
>> +		}
>> +	}
>>  	if (perf_missing_features.lbr_flags)
>>  		evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
>>  				     PERF_SAMPLE_BRANCH_NO_CYCLES);
>> @@ -1840,6 +1867,9 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>>  
>>  bool evsel__detect_missing_features(struct evsel *evsel)
>>  {
>> +	if (perf_missing_features.pmu == NULL)
>> +		perf_missing_features.pmu = hashmap__new(pmu_hash, pmu_equal, NULL);
>> +
>>  	/*
>>  	 * Must probe features in the order they were added to the
>>  	 * perf_event_attr interface.
>> @@ -1900,10 +1930,15 @@ bool evsel__detect_missing_features(struct evsel *evsel)
>>  		perf_missing_features.mmap2 = true;
>>  		pr_debug2_peo("switching off mmap2\n");
>>  		return true;
>> -	} else if (!perf_missing_features.exclude_guest &&
>> -		   (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
>> -		perf_missing_features.exclude_guest = true;
>> -		pr_debug2_peo("switching off exclude_guest, exclude_host\n");
>> +	} else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
>> +		   !hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
>> +		struct perf_missing_pmu_features pmu_features = { true };
>
>missing new line after declaration
>
>> +		hashmap__add(perf_missing_features.pmu, evsel, &pmu_features);
>> +
>> +		if (!perf_missing_features.exclude_guest) {
>> +			perf_missing_features.exclude_guest = true;
>> +			pr_debug2_peo("switching off exclude_guest, exclude_host\n");
>> +		}
>>  		return true;
>>  	} else if (!perf_missing_features.sample_id_all) {
>>  		perf_missing_features.sample_id_all = true;
>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>> index 1f7edfa8568a..8dd11c8e022d 100644
>> --- a/tools/perf/util/evsel.h
>> +++ b/tools/perf/util/evsel.h
>> @@ -172,6 +172,12 @@ struct perf_missing_features {
>>  	bool data_page_size;
>>  	bool code_page_size;
>>  	bool weight_struct;
>> +
>> +	struct hashmap *pmu;
>> +};
>> +
>> +struct perf_missing_pmu_features {
>> +	bool exclude_guest;
>>  };
>
>hum, is this really needed? I think you could just pass '1' as value,
>because you care only if the item is hashed, right?
>
>in any case the value is the current stack address of the
>  struct perf_missing_pmu_features pmu_features = { true };
>
>so it might as well be just '1' ... I was confused at the beggining
>and looked for the reason of this struct ;-)
>
>we do that already in util/stat.c
>
>jirka
>
>>  
>>  extern struct perf_missing_features perf_missing_features;
>> -- 
>> 2.33.0.1079.g6e70778dc9-goog
>> 
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] perf evsel: Fix missing exclude_{host,guest} setting
  2021-10-17 21:03   ` Arnaldo Carvalho de Melo
@ 2021-10-17 21:06     ` Jiri Olsa
  2021-10-18  4:53       ` Namhyung Kim
  0 siblings, 1 reply; 6+ messages in thread
From: Jiri Olsa @ 2021-10-17 21:06 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Namhyung Kim, Arnaldo Carvalho de Melo, Ingo Molnar,
	Peter Zijlstra, LKML, Andi Kleen, Ian Rogers, Stephane Eranian

On Sun, Oct 17, 2021 at 06:03:21PM -0300, Arnaldo Carvalho de Melo wrote:
> 
> 
> On October 17, 2021 4:05:46 PM GMT-03:00, Jiri Olsa <jolsa@redhat.com> wrote:
> >On Fri, Oct 15, 2021 at 10:12:53PM -0700, Namhyung Kim wrote:
> >> The current logic for the perf missing feature has a bug that it can
> >> wrongly clear some modifiers like G or H.  Actually some PMUs don't
> >> support any filtering or exclusion while others do.  But we check it
> >> as a global feature.
> >> 
> >> For example, the cycles event can have 'G' modifier to enable it only
> >> in the guest mode on x86.  When you don't run any VMs it'll return 0.
> >> 
> >>   # perf stat -a -e cycles:G sleep 1
> >> 
> >>     Performance counter stats for 'system wide':
> >> 
> >>                     0      cycles:G
> >> 
> >>           1.000721670 seconds time elapsed
> >> 
> >> But when it's used with other pmu events that don't support G modifier,
> >> it'll be reset and return non-zero values.
> >> 
> >>   # perf stat -a -e cycles:G,msr/tsc/ sleep 1
> >> 
> >>     Performance counter stats for 'system wide':
> >> 
> >>           538,029,960      cycles:G
> >>        16,924,010,738      msr/tsc/
> >> 
> >>           1.001815327 seconds time elapsed
> >> 
> >> This is because of the missing feature detection logic being global.
> >> Add a hashmap to set pmu-specific exclude_host/guest features.
> >> 
> >> Reported-by: Stephane Eranian <eranian@google.com>
> >> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> >> ---
> >>  tools/perf/util/evsel.c | 47 +++++++++++++++++++++++++++++++++++------
> >>  tools/perf/util/evsel.h |  6 ++++++
> >>  2 files changed, 47 insertions(+), 6 deletions(-)
> >> 
> >> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> >> index dbfeceb2546c..437a28e769fe 100644
> >> --- a/tools/perf/util/evsel.c
> >> +++ b/tools/perf/util/evsel.c
> >> @@ -1434,6 +1434,10 @@ void evsel__delete(struct evsel *evsel)
> >>  {
> >>  	evsel__exit(evsel);
> >>  	free(evsel);
> >> +
> >> +	/* just free it for the first evsel */
> >> +	hashmap__free(perf_missing_features.pmu);
> >> +	perf_missing_features.pmu = NULL;
> >>  }
> >>  
> >>  void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
> >> @@ -1791,6 +1795,23 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
> >>  	return 0;
> >>  }
> >>  
> >> +#define PMU_HASH_BITS  4
> >> +
> >> +static size_t pmu_hash(const void *key, void *ctx __maybe_unused)
> >> +{
> >> +	const struct evsel *evsel = key;
> >> +
> >> +	return hash_bits(evsel->core.attr.type, PMU_HASH_BITS);
> >> +}
> >> +
> >> +static bool pmu_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
> >> +{
> >> +	const struct evsel *a = key1;
> >> +	const struct evsel *b = key2;
> >> +
> >> +	return a->core.attr.type == b->core.attr.type;
> >> +}
> >> +
> >>  static void evsel__disable_missing_features(struct evsel *evsel)
> >>  {
> >>  	if (perf_missing_features.weight_struct) {
> >> @@ -1807,8 +1828,14 @@ static void evsel__disable_missing_features(struct evsel *evsel)
> >>  		evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
> >>  	if (perf_missing_features.mmap2)
> >>  		evsel->core.attr.mmap2 = 0;
> >> -	if (perf_missing_features.exclude_guest)
> >> -		evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
> >> +	if (perf_missing_features.exclude_guest) {
> >> +		void *pmu;
> >
> >could you just pass NULL in here instead of NULL?
> 
> ENOPARSE 

ugh right... 'instead of &pmu' ;-)

thanks,
jirka

> 
> >
> >> +
> >> +		if (hashmap__find(perf_missing_features.pmu, evsel, &pmu)) {
> >> +			evsel->core.attr.exclude_guest = 0;
> >> +			evsel->core.attr.exclude_host = 0;
> >> +		}
> >> +	}
> >>  	if (perf_missing_features.lbr_flags)
> >>  		evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
> >>  				     PERF_SAMPLE_BRANCH_NO_CYCLES);
> >> @@ -1840,6 +1867,9 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
> >>  
> >>  bool evsel__detect_missing_features(struct evsel *evsel)
> >>  {
> >> +	if (perf_missing_features.pmu == NULL)
> >> +		perf_missing_features.pmu = hashmap__new(pmu_hash, pmu_equal, NULL);
> >> +
> >>  	/*
> >>  	 * Must probe features in the order they were added to the
> >>  	 * perf_event_attr interface.
> >> @@ -1900,10 +1930,15 @@ bool evsel__detect_missing_features(struct evsel *evsel)
> >>  		perf_missing_features.mmap2 = true;
> >>  		pr_debug2_peo("switching off mmap2\n");
> >>  		return true;
> >> -	} else if (!perf_missing_features.exclude_guest &&
> >> -		   (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
> >> -		perf_missing_features.exclude_guest = true;
> >> -		pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> >> +	} else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
> >> +		   !hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
> >> +		struct perf_missing_pmu_features pmu_features = { true };
> >
> >missing new line after declaration
> >
> >> +		hashmap__add(perf_missing_features.pmu, evsel, &pmu_features);
> >> +
> >> +		if (!perf_missing_features.exclude_guest) {
> >> +			perf_missing_features.exclude_guest = true;
> >> +			pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> >> +		}
> >>  		return true;
> >>  	} else if (!perf_missing_features.sample_id_all) {
> >>  		perf_missing_features.sample_id_all = true;
> >> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> >> index 1f7edfa8568a..8dd11c8e022d 100644
> >> --- a/tools/perf/util/evsel.h
> >> +++ b/tools/perf/util/evsel.h
> >> @@ -172,6 +172,12 @@ struct perf_missing_features {
> >>  	bool data_page_size;
> >>  	bool code_page_size;
> >>  	bool weight_struct;
> >> +
> >> +	struct hashmap *pmu;
> >> +};
> >> +
> >> +struct perf_missing_pmu_features {
> >> +	bool exclude_guest;
> >>  };
> >
> >hum, is this really needed? I think you could just pass '1' as value,
> >because you care only if the item is hashed, right?
> >
> >in any case the value is the current stack address of the
> >  struct perf_missing_pmu_features pmu_features = { true };
> >
> >so it might as well be just '1' ... I was confused at the beggining
> >and looked for the reason of this struct ;-)
> >
> >we do that already in util/stat.c
> >
> >jirka
> >
> >>  
> >>  extern struct perf_missing_features perf_missing_features;
> >> -- 
> >> 2.33.0.1079.g6e70778dc9-goog
> >> 
> >
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] perf evsel: Fix missing exclude_{host,guest} setting
  2021-10-17 19:05 ` Jiri Olsa
  2021-10-17 21:03   ` Arnaldo Carvalho de Melo
@ 2021-10-18  4:44   ` Namhyung Kim
  1 sibling, 0 replies; 6+ messages in thread
From: Namhyung Kim @ 2021-10-18  4:44 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Arnaldo Carvalho de Melo, Ingo Molnar, Peter Zijlstra, LKML,
	Andi Kleen, Ian Rogers, Stephane Eranian

Hi Jiri,

On Sun, Oct 17, 2021 at 12:06 PM Jiri Olsa <jolsa@redhat.com> wrote:
>
> On Fri, Oct 15, 2021 at 10:12:53PM -0700, Namhyung Kim wrote:
> > The current logic for the perf missing feature has a bug that it can
> > wrongly clear some modifiers like G or H.  Actually some PMUs don't
> > support any filtering or exclusion while others do.  But we check it
> > as a global feature.
> >
> > For example, the cycles event can have 'G' modifier to enable it only
> > in the guest mode on x86.  When you don't run any VMs it'll return 0.
> >
> >   # perf stat -a -e cycles:G sleep 1
> >
> >     Performance counter stats for 'system wide':
> >
> >                     0      cycles:G
> >
> >           1.000721670 seconds time elapsed
> >
> > But when it's used with other pmu events that don't support G modifier,
> > it'll be reset and return non-zero values.
> >
> >   # perf stat -a -e cycles:G,msr/tsc/ sleep 1
> >
> >     Performance counter stats for 'system wide':
> >
> >           538,029,960      cycles:G
> >        16,924,010,738      msr/tsc/
> >
> >           1.001815327 seconds time elapsed
> >
> > This is because of the missing feature detection logic being global.
> > Add a hashmap to set pmu-specific exclude_host/guest features.
> >
> > Reported-by: Stephane Eranian <eranian@google.com>
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> >  tools/perf/util/evsel.c | 47 +++++++++++++++++++++++++++++++++++------
> >  tools/perf/util/evsel.h |  6 ++++++
> >  2 files changed, 47 insertions(+), 6 deletions(-)
> >
> > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > index dbfeceb2546c..437a28e769fe 100644
> > --- a/tools/perf/util/evsel.c
> > +++ b/tools/perf/util/evsel.c
> > @@ -1434,6 +1434,10 @@ void evsel__delete(struct evsel *evsel)
> >  {
> >       evsel__exit(evsel);
> >       free(evsel);
> > +
> > +     /* just free it for the first evsel */
> > +     hashmap__free(perf_missing_features.pmu);
> > +     perf_missing_features.pmu = NULL;
> >  }
> >
> >  void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
> > @@ -1791,6 +1795,23 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
> >       return 0;
> >  }
> >
> > +#define PMU_HASH_BITS  4
> > +
> > +static size_t pmu_hash(const void *key, void *ctx __maybe_unused)
> > +{
> > +     const struct evsel *evsel = key;
> > +
> > +     return hash_bits(evsel->core.attr.type, PMU_HASH_BITS);
> > +}
> > +
> > +static bool pmu_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
> > +{
> > +     const struct evsel *a = key1;
> > +     const struct evsel *b = key2;
> > +
> > +     return a->core.attr.type == b->core.attr.type;
> > +}
> > +
> >  static void evsel__disable_missing_features(struct evsel *evsel)
> >  {
> >       if (perf_missing_features.weight_struct) {
> > @@ -1807,8 +1828,14 @@ static void evsel__disable_missing_features(struct evsel *evsel)
> >               evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
> >       if (perf_missing_features.mmap2)
> >               evsel->core.attr.mmap2 = 0;
> > -     if (perf_missing_features.exclude_guest)
> > -             evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
> > +     if (perf_missing_features.exclude_guest) {
> > +             void *pmu;
>
> could you just pass NULL in here instead of NULL?
>
> > +
> > +             if (hashmap__find(perf_missing_features.pmu, evsel, &pmu)) {
> > +                     evsel->core.attr.exclude_guest = 0;
> > +                     evsel->core.attr.exclude_host = 0;
> > +             }
> > +     }
> >       if (perf_missing_features.lbr_flags)
> >               evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
> >                                    PERF_SAMPLE_BRANCH_NO_CYCLES);
> > @@ -1840,6 +1867,9 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
> >
> >  bool evsel__detect_missing_features(struct evsel *evsel)
> >  {
> > +     if (perf_missing_features.pmu == NULL)
> > +             perf_missing_features.pmu = hashmap__new(pmu_hash, pmu_equal, NULL);
> > +
> >       /*
> >        * Must probe features in the order they were added to the
> >        * perf_event_attr interface.
> > @@ -1900,10 +1930,15 @@ bool evsel__detect_missing_features(struct evsel *evsel)
> >               perf_missing_features.mmap2 = true;
> >               pr_debug2_peo("switching off mmap2\n");
> >               return true;
> > -     } else if (!perf_missing_features.exclude_guest &&
> > -                (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
> > -             perf_missing_features.exclude_guest = true;
> > -             pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> > +     } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
> > +                !hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
> > +             struct perf_missing_pmu_features pmu_features = { true };
>
> missing new line after declaration

ok

>
> > +             hashmap__add(perf_missing_features.pmu, evsel, &pmu_features);
> > +
> > +             if (!perf_missing_features.exclude_guest) {
> > +                     perf_missing_features.exclude_guest = true;
> > +                     pr_debug2_peo("switching off exclude_guest, exclude_host\n");
> > +             }
> >               return true;
> >       } else if (!perf_missing_features.sample_id_all) {
> >               perf_missing_features.sample_id_all = true;
> > diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> > index 1f7edfa8568a..8dd11c8e022d 100644
> > --- a/tools/perf/util/evsel.h
> > +++ b/tools/perf/util/evsel.h
> > @@ -172,6 +172,12 @@ struct perf_missing_features {
> >       bool data_page_size;
> >       bool code_page_size;
> >       bool weight_struct;
> > +
> > +     struct hashmap *pmu;
> > +};
> > +
> > +struct perf_missing_pmu_features {
> > +     bool exclude_guest;
> >  };
>
> hum, is this really needed? I think you could just pass '1' as value,
> because you care only if the item is hashed, right?

It's not really needed, but I thought it might be useful to extend
if we want to add another pmu-specific feature check later.
But at this point I don't have a specific feature to add though.

>
> in any case the value is the current stack address of the
>   struct perf_missing_pmu_features pmu_features = { true };
>
> so it might as well be just '1' ... I was confused at the beggining
> and looked for the reason of this struct ;-)

Right, as I said it's not need to be a struct and we can pass a
scalar value instead if you want.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] perf evsel: Fix missing exclude_{host,guest} setting
  2021-10-17 21:06     ` Jiri Olsa
@ 2021-10-18  4:53       ` Namhyung Kim
  0 siblings, 0 replies; 6+ messages in thread
From: Namhyung Kim @ 2021-10-18  4:53 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Arnaldo Carvalho de Melo, Arnaldo Carvalho de Melo, Ingo Molnar,
	Peter Zijlstra, LKML, Andi Kleen, Ian Rogers, Stephane Eranian

On Sun, Oct 17, 2021 at 2:07 PM Jiri Olsa <jolsa@redhat.com> wrote:
>
> On Sun, Oct 17, 2021 at 06:03:21PM -0300, Arnaldo Carvalho de Melo wrote:
> >
> >
> > On October 17, 2021 4:05:46 PM GMT-03:00, Jiri Olsa <jolsa@redhat.com> wrote:
> > >On Fri, Oct 15, 2021 at 10:12:53PM -0700, Namhyung Kim wrote:
> > >> The current logic for the perf missing feature has a bug that it can
> > >> wrongly clear some modifiers like G or H.  Actually some PMUs don't
> > >> support any filtering or exclusion while others do.  But we check it
> > >> as a global feature.
> > >>
> > >> For example, the cycles event can have 'G' modifier to enable it only
> > >> in the guest mode on x86.  When you don't run any VMs it'll return 0.
> > >>
> > >>   # perf stat -a -e cycles:G sleep 1
> > >>
> > >>     Performance counter stats for 'system wide':
> > >>
> > >>                     0      cycles:G
> > >>
> > >>           1.000721670 seconds time elapsed
> > >>
> > >> But when it's used with other pmu events that don't support G modifier,
> > >> it'll be reset and return non-zero values.
> > >>
> > >>   # perf stat -a -e cycles:G,msr/tsc/ sleep 1
> > >>
> > >>     Performance counter stats for 'system wide':
> > >>
> > >>           538,029,960      cycles:G
> > >>        16,924,010,738      msr/tsc/
> > >>
> > >>           1.001815327 seconds time elapsed
> > >>
> > >> This is because of the missing feature detection logic being global.
> > >> Add a hashmap to set pmu-specific exclude_host/guest features.
> > >>
> > >> Reported-by: Stephane Eranian <eranian@google.com>
> > >> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > >> ---
> > >>  tools/perf/util/evsel.c | 47 +++++++++++++++++++++++++++++++++++------
> > >>  tools/perf/util/evsel.h |  6 ++++++
> > >>  2 files changed, 47 insertions(+), 6 deletions(-)
> > >>
> > >> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > >> index dbfeceb2546c..437a28e769fe 100644
> > >> --- a/tools/perf/util/evsel.c
> > >> +++ b/tools/perf/util/evsel.c
> > >> @@ -1434,6 +1434,10 @@ void evsel__delete(struct evsel *evsel)
> > >>  {
> > >>    evsel__exit(evsel);
> > >>    free(evsel);
> > >> +
> > >> +  /* just free it for the first evsel */
> > >> +  hashmap__free(perf_missing_features.pmu);
> > >> +  perf_missing_features.pmu = NULL;
> > >>  }
> > >>
> > >>  void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
> > >> @@ -1791,6 +1795,23 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
> > >>    return 0;
> > >>  }
> > >>
> > >> +#define PMU_HASH_BITS  4
> > >> +
> > >> +static size_t pmu_hash(const void *key, void *ctx __maybe_unused)
> > >> +{
> > >> +  const struct evsel *evsel = key;
> > >> +
> > >> +  return hash_bits(evsel->core.attr.type, PMU_HASH_BITS);
> > >> +}
> > >> +
> > >> +static bool pmu_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
> > >> +{
> > >> +  const struct evsel *a = key1;
> > >> +  const struct evsel *b = key2;
> > >> +
> > >> +  return a->core.attr.type == b->core.attr.type;
> > >> +}
> > >> +
> > >>  static void evsel__disable_missing_features(struct evsel *evsel)
> > >>  {
> > >>    if (perf_missing_features.weight_struct) {
> > >> @@ -1807,8 +1828,14 @@ static void evsel__disable_missing_features(struct evsel *evsel)
> > >>            evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
> > >>    if (perf_missing_features.mmap2)
> > >>            evsel->core.attr.mmap2 = 0;
> > >> -  if (perf_missing_features.exclude_guest)
> > >> -          evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
> > >> +  if (perf_missing_features.exclude_guest) {
> > >> +          void *pmu;
> > >
> > >could you just pass NULL in here instead of NULL?
> >
> > ENOPARSE
>
> ugh right... 'instead of &pmu' ;-)

Yes, we can pass NULL for the current code.
Passing the pointer is just for future change.
Maybe I thought too much.. :)

Anyway, I think it should pass the struct itself,
not the pointer to the local variable.
I'm not sure if it's possible to cast a struct literal
to a pointer, will check and update in v2.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-10-18  4:53 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-16  5:12 [PATCH] perf evsel: Fix missing exclude_{host,guest} setting Namhyung Kim
2021-10-17 19:05 ` Jiri Olsa
2021-10-17 21:03   ` Arnaldo Carvalho de Melo
2021-10-17 21:06     ` Jiri Olsa
2021-10-18  4:53       ` Namhyung Kim
2021-10-18  4:44   ` Namhyung Kim

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.