linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors
@ 2023-11-07  8:33 Ravi Bangoria
  2023-11-07  8:33 ` [PATCH 2/2] perf header: Additional note on AMD IBS for max_precise pmu cap Ravi Bangoria
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Ravi Bangoria @ 2023-11-07  8:33 UTC (permalink / raw)
  To: acme, namhyung
  Cc: ravi.bangoria, kim.phillips, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, kan.liang,
	changbin.du, yangjihong1, zwisler, wangming01, chenhuacai,
	kprateek.nayak, linux-perf-users, linux-kernel, sandipan.das,
	ananth.narayan, santosh.shukla

By default, Perf uses precise cycles event when no explicit event is
specified by user. Precise cycles event is forwarded to ibs_op// pmu
on AMD. However, IBS has hw issue on certain Zen2 processors where
it might raise NMI without sample_valid bit set, which causes Unknown
NMI warnings. (Erratum #1215: IBS (Instruction Based Sampling) Counter
Valid Value May be Incorrect After Exit From Core C6 (CC6) State.) So,
use non-precise cycles as default event on affected processors.

This does not prevent user to use explicit precise cycles event or
ibs_op// pmu directly.

Suggested-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
---
 tools/perf/arch/x86/util/evlist.c | 34 +++++++++++++++++++++++++++++++
 tools/perf/builtin-record.c       |  2 +-
 tools/perf/builtin-top.c          |  2 +-
 tools/perf/util/evlist.c          | 12 ++++++++++-
 tools/perf/util/evlist.h          |  2 ++
 5 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index b1ce0c52d88d..f4478179c91b 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -5,6 +5,8 @@
 #include "util/evlist.h"
 #include "util/parse-events.h"
 #include "util/event.h"
+#include "util/env.h"
+#include "linux/string.h"
 #include "topdown.h"
 #include "evsel.h"
 
@@ -92,3 +94,35 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
 	/* Default ordering by insertion index. */
 	return lhs->core.idx - rhs->core.idx;
 }
+
+/*
+ * Precise cycles event is forwarded to ibs_op// pmu on AMD. However, IBS
+ * has hw issue on certain Zen2 processors where it might raise NMI without
+ * sample_valid bit set, which causes Unknown NMI warnings. So default to
+ * non-precise cycles event on affected processors.
+ */
+const char *arch_evlist__default_cycles_event(bool can_profile_kernel)
+{
+	struct perf_env env = { .total_mem = 0, };
+	unsigned int family, model, stepping;
+	bool is_amd;
+	int ret;
+
+	perf_env__cpuid(&env);
+	is_amd = env.cpuid && strstarts(env.cpuid, "AuthenticAMD");
+	if (!is_amd)
+		goto out;
+
+	ret = sscanf(env.cpuid, "%*[^,],%u,%u,%u", &family, &model, &stepping);
+	if (ret == 3 && family == 0x17 && (
+	    (model >= 0x30 && model <= 0x3f) ||
+	    (model >= 0x60 && model <= 0x7f) ||
+	    (model >= 0x90 && model <= 0x9f))) {
+		perf_env__exit(&env);
+		return can_profile_kernel ? "cycles" : "cycles:u";
+	}
+
+out:
+	perf_env__exit(&env);
+	return can_profile_kernel ? "cycles:P" : "cycles:Pu";
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index dcf288a4fb9a..e58d8ac8a77b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -4150,7 +4150,7 @@ int cmd_record(int argc, const char **argv)
 	if (rec->evlist->core.nr_entries == 0) {
 		bool can_profile_kernel = perf_event_paranoid_check(1);
 
-		err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
+		err = parse_event(rec->evlist, evlist__default_cycles_event(can_profile_kernel));
 		if (err)
 			goto out;
 	}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ea8c7eca5eee..21368421eddd 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1666,7 +1666,7 @@ int cmd_top(int argc, const char **argv)
 
 	if (!top.evlist->core.nr_entries) {
 		bool can_profile_kernel = perf_event_paranoid_check(1);
-		int err = parse_event(top.evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
+		int err = parse_event(top.evlist, evlist__default_cycles_event(can_profile_kernel));
 
 		if (err)
 			goto out_delete_evlist;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e36da58522ef..406ed851cafc 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -90,6 +90,16 @@ struct evlist *evlist__new(void)
 	return evlist;
 }
 
+const char * __weak arch_evlist__default_cycles_event(bool can_profile_kernel)
+{
+	return can_profile_kernel ? "cycles:P" : "cycles:Pu";
+}
+
+const char *evlist__default_cycles_event(bool can_profile_kernel)
+{
+	return arch_evlist__default_cycles_event(can_profile_kernel);
+}
+
 struct evlist *evlist__new_default(void)
 {
 	struct evlist *evlist = evlist__new();
@@ -100,7 +110,7 @@ struct evlist *evlist__new_default(void)
 		return NULL;
 
 	can_profile_kernel = perf_event_paranoid_check(1);
-	err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
+	err = parse_event(evlist, evlist__default_cycles_event(can_profile_kernel));
 	if (err) {
 		evlist__delete(evlist);
 		evlist = NULL;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 98e7ddb2bd30..7267b4fb1981 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -91,6 +91,8 @@ struct evsel_str_handler {
 
 struct evlist *evlist__new(void);
 struct evlist *evlist__new_default(void);
+const char *arch_evlist__default_cycles_event(bool can_profile_kernel);
+const char *evlist__default_cycles_event(bool can_profile_kernel);
 struct evlist *evlist__new_dummy(void);
 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
 		  struct perf_thread_map *threads);
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/2] perf header: Additional note on AMD IBS for max_precise pmu cap
  2023-11-07  8:33 [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors Ravi Bangoria
@ 2023-11-07  8:33 ` Ravi Bangoria
  2023-11-09 19:56   ` Arnaldo Carvalho de Melo
  2023-11-07 18:22 ` [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors Ian Rogers
  2023-11-09 21:53 ` Namhyung Kim
  2 siblings, 1 reply; 11+ messages in thread
From: Ravi Bangoria @ 2023-11-07  8:33 UTC (permalink / raw)
  To: acme, namhyung
  Cc: ravi.bangoria, kim.phillips, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, kan.liang,
	changbin.du, yangjihong1, zwisler, wangming01, chenhuacai,
	kprateek.nayak, linux-perf-users, linux-kernel, sandipan.das,
	ananth.narayan, santosh.shukla

From: Arnaldo Carvalho de Melo <acme@kernel.org>

x86 core pmu exposes supported maximum precision level via max_precise
pmu capability. Although, AMD core pmu does not support precise mode,
certain core pmu events with precise_ip > 0 are allowed and forwarded
to IBS OP pmu. Display a note about this in perf report header and
document the details in the perf-list man page.

Signed-off-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
---
 tools/perf/Documentation/perf-list.txt | 12 +++++++-----
 tools/perf/util/env.c                  | 18 ++++++++++++++++++
 tools/perf/util/env.h                  |  2 ++
 tools/perf/util/header.c               |  8 ++++++++
 4 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index d5f78e125efe..1b90575ee3c8 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -81,11 +81,13 @@ For Intel systems precise event sampling is implemented with PEBS
 which supports up to precise-level 2, and precise level 3 for
 some special cases
 
-On AMD systems it is implemented using IBS (up to precise-level 2).
-The precise modifier works with event types 0x76 (cpu-cycles, CPU
-clocks not halted) and 0xC1 (micro-ops retired). Both events map to
-IBS execution sampling (IBS op) with the IBS Op Counter Control bit
-(IbsOpCntCtl) set respectively (see the
+On AMD systems it is implemented using IBS OP (up to precise-level 2).
+Unlike Intel PEBS which provides levels of precision, AMD core pmu is
+inherently non-precise and IBS is inherently precise. (i.e. ibs_op//,
+ibs_op//p, ibs_op//pp and ibs_op//ppp are all same). The precise modifier
+works with event types 0x76 (cpu-cycles, CPU clocks not halted) and 0xC1
+(micro-ops retired). Both events map to IBS execution sampling (IBS op)
+with the IBS Op Counter Control bit (IbsOpCntCtl) set respectively (see the
 Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS)
 section of the [AMD Processor Programming Reference (PPR)] relevant to the
 family, model and stepping of the processor being used).
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 44140b7f596a..cbc18b22ace5 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -531,6 +531,24 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
 	return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1;
 }
 
+bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name)
+{
+	char *pmu_mapping = env->pmu_mappings, *colon;
+
+	for (int i = 0; i < env->nr_pmu_mappings; ++i) {
+		if (strtoul(pmu_mapping, &colon, 0) == ULONG_MAX || *colon != ':')
+			goto out_error;
+
+		pmu_mapping = colon + 1;
+		if (strcmp(pmu_mapping, pmu_name) == 0)
+			return true;
+
+		pmu_mapping += strlen(pmu_mapping) + 1;
+	}
+out_error:
+	return false;
+}
+
 char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
 			     const char *cap)
 {
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 4566c51f2fd9..56aea562c61b 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -174,4 +174,6 @@ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
 int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu);
 char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
 			     const char *cap);
+
+bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name);
 #endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index e86b9439ffee..3cc288d14002 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2145,6 +2145,14 @@ static void print_pmu_caps(struct feat_fd *ff, FILE *fp)
 		__print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps,
 				 pmu_caps->pmu_name);
 	}
+
+	if (strcmp(perf_env__arch(&ff->ph->env), "x86") == 0 &&
+	    perf_env__has_pmu_mapping(&ff->ph->env, "ibs_op")) {
+		char *max_precise = perf_env__find_pmu_cap(&ff->ph->env, "cpu", "max_precise");
+
+		if (max_precise != NULL && atoi(max_precise) == 0)
+			fprintf(fp, "# AMD systems uses ibs_op// PMU for some precise events, e.g.: cycles:p, see the 'perf list' man page for further details.\n");
+	}
 }
 
 static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors
  2023-11-07  8:33 [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors Ravi Bangoria
  2023-11-07  8:33 ` [PATCH 2/2] perf header: Additional note on AMD IBS for max_precise pmu cap Ravi Bangoria
@ 2023-11-07 18:22 ` Ian Rogers
  2023-11-10  9:37   ` Ravi Bangoria
  2023-11-09 21:53 ` Namhyung Kim
  2 siblings, 1 reply; 11+ messages in thread
From: Ian Rogers @ 2023-11-07 18:22 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: acme, namhyung, kim.phillips, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, adrian.hunter, kan.liang, changbin.du,
	yangjihong1, zwisler, wangming01, chenhuacai, kprateek.nayak,
	linux-perf-users, linux-kernel, sandipan.das, ananth.narayan,
	santosh.shukla

On Tue, Nov 7, 2023 at 12:34 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>
> By default, Perf uses precise cycles event when no explicit event is
> specified by user. Precise cycles event is forwarded to ibs_op// pmu
> on AMD. However, IBS has hw issue on certain Zen2 processors where
> it might raise NMI without sample_valid bit set, which causes Unknown
> NMI warnings. (Erratum #1215: IBS (Instruction Based Sampling) Counter
> Valid Value May be Incorrect After Exit From Core C6 (CC6) State.) So,
> use non-precise cycles as default event on affected processors.
>
> This does not prevent user to use explicit precise cycles event or
> ibs_op// pmu directly.
>
> Suggested-by: Arnaldo Carvalho de Melo <acme@kernel.org>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>

Thanks Ravi,

We read max_precise from sysfs:
https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/pmu.c?h=perf-tools-next#n984

But it appears not to be used:
https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/evsel.c?h=perf-tools-next#n1323

I think:
```
if (evsel->precise_max)
    attr->precise_ip = 3;
```
should be:
```
if (evsel->precise_max)
    attr->precise_ip = evsel->pmu->max_precise;
```

Presumably this is misreporting as some non-zero value on the buggy
systems - if it doesn't we can declare victory here. If not we can
modify max_precise's calculation on perf's struct pmu for cpu for AMD
so that we just hardwire it to zero in arch code.

I think I prefer this approach as it is fixing max_precise in a more
generic way. If someone were to specify "cycles:P" on the command line
then it wouldn't trigger the bug, similarly for perf script and other
places. Wdyt?

Thanks,
Ian

> ---
>  tools/perf/arch/x86/util/evlist.c | 34 +++++++++++++++++++++++++++++++
>  tools/perf/builtin-record.c       |  2 +-
>  tools/perf/builtin-top.c          |  2 +-
>  tools/perf/util/evlist.c          | 12 ++++++++++-
>  tools/perf/util/evlist.h          |  2 ++
>  5 files changed, 49 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
> index b1ce0c52d88d..f4478179c91b 100644
> --- a/tools/perf/arch/x86/util/evlist.c
> +++ b/tools/perf/arch/x86/util/evlist.c
> @@ -5,6 +5,8 @@
>  #include "util/evlist.h"
>  #include "util/parse-events.h"
>  #include "util/event.h"
> +#include "util/env.h"
> +#include "linux/string.h"
>  #include "topdown.h"
>  #include "evsel.h"
>
> @@ -92,3 +94,35 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
>         /* Default ordering by insertion index. */
>         return lhs->core.idx - rhs->core.idx;
>  }
> +
> +/*
> + * Precise cycles event is forwarded to ibs_op// pmu on AMD. However, IBS
> + * has hw issue on certain Zen2 processors where it might raise NMI without
> + * sample_valid bit set, which causes Unknown NMI warnings. So default to
> + * non-precise cycles event on affected processors.
> + */
> +const char *arch_evlist__default_cycles_event(bool can_profile_kernel)
> +{
> +       struct perf_env env = { .total_mem = 0, };
> +       unsigned int family, model, stepping;
> +       bool is_amd;
> +       int ret;
> +
> +       perf_env__cpuid(&env);
> +       is_amd = env.cpuid && strstarts(env.cpuid, "AuthenticAMD");
> +       if (!is_amd)
> +               goto out;
> +
> +       ret = sscanf(env.cpuid, "%*[^,],%u,%u,%u", &family, &model, &stepping);
> +       if (ret == 3 && family == 0x17 && (
> +           (model >= 0x30 && model <= 0x3f) ||
> +           (model >= 0x60 && model <= 0x7f) ||
> +           (model >= 0x90 && model <= 0x9f))) {
> +               perf_env__exit(&env);
> +               return can_profile_kernel ? "cycles" : "cycles:u";
> +       }
> +
> +out:
> +       perf_env__exit(&env);
> +       return can_profile_kernel ? "cycles:P" : "cycles:Pu";
> +}
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index dcf288a4fb9a..e58d8ac8a77b 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -4150,7 +4150,7 @@ int cmd_record(int argc, const char **argv)
>         if (rec->evlist->core.nr_entries == 0) {
>                 bool can_profile_kernel = perf_event_paranoid_check(1);
>
> -               err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
> +               err = parse_event(rec->evlist, evlist__default_cycles_event(can_profile_kernel));
>                 if (err)
>                         goto out;
>         }
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index ea8c7eca5eee..21368421eddd 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -1666,7 +1666,7 @@ int cmd_top(int argc, const char **argv)
>
>         if (!top.evlist->core.nr_entries) {
>                 bool can_profile_kernel = perf_event_paranoid_check(1);
> -               int err = parse_event(top.evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
> +               int err = parse_event(top.evlist, evlist__default_cycles_event(can_profile_kernel));
>
>                 if (err)
>                         goto out_delete_evlist;
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index e36da58522ef..406ed851cafc 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -90,6 +90,16 @@ struct evlist *evlist__new(void)
>         return evlist;
>  }
>
> +const char * __weak arch_evlist__default_cycles_event(bool can_profile_kernel)
> +{
> +       return can_profile_kernel ? "cycles:P" : "cycles:Pu";
> +}
> +
> +const char *evlist__default_cycles_event(bool can_profile_kernel)
> +{
> +       return arch_evlist__default_cycles_event(can_profile_kernel);
> +}
> +
>  struct evlist *evlist__new_default(void)
>  {
>         struct evlist *evlist = evlist__new();
> @@ -100,7 +110,7 @@ struct evlist *evlist__new_default(void)
>                 return NULL;
>
>         can_profile_kernel = perf_event_paranoid_check(1);
> -       err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
> +       err = parse_event(evlist, evlist__default_cycles_event(can_profile_kernel));
>         if (err) {
>                 evlist__delete(evlist);
>                 evlist = NULL;
> diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
> index 98e7ddb2bd30..7267b4fb1981 100644
> --- a/tools/perf/util/evlist.h
> +++ b/tools/perf/util/evlist.h
> @@ -91,6 +91,8 @@ struct evsel_str_handler {
>
>  struct evlist *evlist__new(void);
>  struct evlist *evlist__new_default(void);
> +const char *arch_evlist__default_cycles_event(bool can_profile_kernel);
> +const char *evlist__default_cycles_event(bool can_profile_kernel);
>  struct evlist *evlist__new_dummy(void);
>  void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
>                   struct perf_thread_map *threads);
> --
> 2.41.0
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] perf header: Additional note on AMD IBS for max_precise pmu cap
  2023-11-07  8:33 ` [PATCH 2/2] perf header: Additional note on AMD IBS for max_precise pmu cap Ravi Bangoria
@ 2023-11-09 19:56   ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2023-11-09 19:56 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: namhyung, kim.phillips, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, kan.liang,
	changbin.du, yangjihong1, zwisler, wangming01, chenhuacai,
	kprateek.nayak, linux-perf-users, linux-kernel, sandipan.das,
	ananth.narayan, santosh.shukla

Em Tue, Nov 07, 2023 at 02:03:31PM +0530, Ravi Bangoria escreveu:
> From: Arnaldo Carvalho de Melo <acme@kernel.org>

Applied this one, waiting for some more time to address Ian comments,

- Arnaldo
 
> x86 core pmu exposes supported maximum precision level via max_precise
> pmu capability. Although, AMD core pmu does not support precise mode,
> certain core pmu events with precise_ip > 0 are allowed and forwarded
> to IBS OP pmu. Display a note about this in perf report header and
> document the details in the perf-list man page.
> 
> Signed-off-by: Arnaldo Carvalho de Melo <acme@kernel.org>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
> ---
>  tools/perf/Documentation/perf-list.txt | 12 +++++++-----
>  tools/perf/util/env.c                  | 18 ++++++++++++++++++
>  tools/perf/util/env.h                  |  2 ++
>  tools/perf/util/header.c               |  8 ++++++++
>  4 files changed, 35 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
> index d5f78e125efe..1b90575ee3c8 100644
> --- a/tools/perf/Documentation/perf-list.txt
> +++ b/tools/perf/Documentation/perf-list.txt
> @@ -81,11 +81,13 @@ For Intel systems precise event sampling is implemented with PEBS
>  which supports up to precise-level 2, and precise level 3 for
>  some special cases
>  
> -On AMD systems it is implemented using IBS (up to precise-level 2).
> -The precise modifier works with event types 0x76 (cpu-cycles, CPU
> -clocks not halted) and 0xC1 (micro-ops retired). Both events map to
> -IBS execution sampling (IBS op) with the IBS Op Counter Control bit
> -(IbsOpCntCtl) set respectively (see the
> +On AMD systems it is implemented using IBS OP (up to precise-level 2).
> +Unlike Intel PEBS which provides levels of precision, AMD core pmu is
> +inherently non-precise and IBS is inherently precise. (i.e. ibs_op//,
> +ibs_op//p, ibs_op//pp and ibs_op//ppp are all same). The precise modifier
> +works with event types 0x76 (cpu-cycles, CPU clocks not halted) and 0xC1
> +(micro-ops retired). Both events map to IBS execution sampling (IBS op)
> +with the IBS Op Counter Control bit (IbsOpCntCtl) set respectively (see the
>  Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS)
>  section of the [AMD Processor Programming Reference (PPR)] relevant to the
>  family, model and stepping of the processor being used).
> diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
> index 44140b7f596a..cbc18b22ace5 100644
> --- a/tools/perf/util/env.c
> +++ b/tools/perf/util/env.c
> @@ -531,6 +531,24 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
>  	return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1;
>  }
>  
> +bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name)
> +{
> +	char *pmu_mapping = env->pmu_mappings, *colon;
> +
> +	for (int i = 0; i < env->nr_pmu_mappings; ++i) {
> +		if (strtoul(pmu_mapping, &colon, 0) == ULONG_MAX || *colon != ':')
> +			goto out_error;
> +
> +		pmu_mapping = colon + 1;
> +		if (strcmp(pmu_mapping, pmu_name) == 0)
> +			return true;
> +
> +		pmu_mapping += strlen(pmu_mapping) + 1;
> +	}
> +out_error:
> +	return false;
> +}
> +
>  char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
>  			     const char *cap)
>  {
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index 4566c51f2fd9..56aea562c61b 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -174,4 +174,6 @@ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
>  int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu);
>  char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
>  			     const char *cap);
> +
> +bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name);
>  #endif /* __PERF_ENV_H */
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index e86b9439ffee..3cc288d14002 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -2145,6 +2145,14 @@ static void print_pmu_caps(struct feat_fd *ff, FILE *fp)
>  		__print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps,
>  				 pmu_caps->pmu_name);
>  	}
> +
> +	if (strcmp(perf_env__arch(&ff->ph->env), "x86") == 0 &&
> +	    perf_env__has_pmu_mapping(&ff->ph->env, "ibs_op")) {
> +		char *max_precise = perf_env__find_pmu_cap(&ff->ph->env, "cpu", "max_precise");
> +
> +		if (max_precise != NULL && atoi(max_precise) == 0)
> +			fprintf(fp, "# AMD systems uses ibs_op// PMU for some precise events, e.g.: cycles:p, see the 'perf list' man page for further details.\n");
> +	}
>  }
>  
>  static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
> -- 
> 2.41.0
> 

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors
  2023-11-07  8:33 [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors Ravi Bangoria
  2023-11-07  8:33 ` [PATCH 2/2] perf header: Additional note on AMD IBS for max_precise pmu cap Ravi Bangoria
  2023-11-07 18:22 ` [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors Ian Rogers
@ 2023-11-09 21:53 ` Namhyung Kim
  2023-11-10  9:46   ` Ravi Bangoria
  2 siblings, 1 reply; 11+ messages in thread
From: Namhyung Kim @ 2023-11-09 21:53 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: acme, kim.phillips, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, kan.liang,
	changbin.du, yangjihong1, zwisler, wangming01, chenhuacai,
	kprateek.nayak, linux-perf-users, linux-kernel, sandipan.das,
	ananth.narayan, santosh.shukla

Hi Ravi,

On Tue, Nov 7, 2023 at 12:34 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>
> By default, Perf uses precise cycles event when no explicit event is
> specified by user. Precise cycles event is forwarded to ibs_op// pmu
> on AMD. However, IBS has hw issue on certain Zen2 processors where
> it might raise NMI without sample_valid bit set, which causes Unknown
> NMI warnings. (Erratum #1215: IBS (Instruction Based Sampling) Counter
> Valid Value May be Incorrect After Exit From Core C6 (CC6) State.) So,
> use non-precise cycles as default event on affected processors.

It seems like a kernel issue, do we have a kernel patch not to forward
precise cycles or instructions events to IBS on the affected CPUs?

Thanks,
Namhyung

>
> This does not prevent user to use explicit precise cycles event or
> ibs_op// pmu directly.
>
> Suggested-by: Arnaldo Carvalho de Melo <acme@kernel.org>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
> ---

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors
  2023-11-07 18:22 ` [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors Ian Rogers
@ 2023-11-10  9:37   ` Ravi Bangoria
  0 siblings, 0 replies; 11+ messages in thread
From: Ravi Bangoria @ 2023-11-10  9:37 UTC (permalink / raw)
  To: Ian Rogers
  Cc: acme, namhyung, kim.phillips, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, adrian.hunter, kan.liang, changbin.du,
	yangjihong1, zwisler, wangming01, chenhuacai, kprateek.nayak,
	linux-perf-users, linux-kernel, sandipan.das, ananth.narayan,
	santosh.shukla, Ravi Bangoria

Hi Ian,

> We read max_precise from sysfs:
> https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/pmu.c?h=perf-tools-next#n984
> 
> But it appears not to be used:
> https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/evsel.c?h=perf-tools-next#n1323
> 
> I think:
> ```
> if (evsel->precise_max)
>     attr->precise_ip = 3;
> ```
> should be:
> ```
> if (evsel->precise_max)
>     attr->precise_ip = evsel->pmu->max_precise;
> ```

This segfaults as evsel->pmu is NULL. But I get your point.

If we go with this change, I'll need to switch to cycles:p as default
event on AMD. Otherwise perf will use non-precise event as default event
on AMD.

> Presumably this is misreporting as some non-zero value on the buggy
> systems - if it doesn't we can declare victory here. If not we can
> modify max_precise's calculation on perf's struct pmu for cpu for AMD
> so that we just hardwire it to zero in arch code.

max_precise for AMD core pmu is 0, which is correct as AMD core pmu does
not support precise mode. However, cycles:p, r76:p and rC1:p are special
cases and allowed via core pmu.

Thanks,
Ravi

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors
  2023-11-09 21:53 ` Namhyung Kim
@ 2023-11-10  9:46   ` Ravi Bangoria
  2023-12-08 23:33     ` Namhyung Kim
  0 siblings, 1 reply; 11+ messages in thread
From: Ravi Bangoria @ 2023-11-10  9:46 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: acme, kim.phillips, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, kan.liang,
	changbin.du, yangjihong1, zwisler, wangming01, chenhuacai,
	kprateek.nayak, linux-perf-users, linux-kernel, sandipan.das,
	ananth.narayan, santosh.shukla, Ravi Bangoria

Hi Namhyung,

>> By default, Perf uses precise cycles event when no explicit event is
>> specified by user. Precise cycles event is forwarded to ibs_op// pmu
>> on AMD. However, IBS has hw issue on certain Zen2 processors where
>> it might raise NMI without sample_valid bit set, which causes Unknown
>> NMI warnings. (Erratum #1215: IBS (Instruction Based Sampling) Counter
>> Valid Value May be Incorrect After Exit From Core C6 (CC6) State.) So,
>> use non-precise cycles as default event on affected processors.
> 
> It seems like a kernel issue, do we have a kernel patch not to forward
> precise cycles or instructions events to IBS on the affected CPUs?

I'm not sure how it's a kernel issue. User can use ibs_op// pmu directly
and might hit the hw bug.

Thanks,
Ravi

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors
  2023-11-10  9:46   ` Ravi Bangoria
@ 2023-12-08 23:33     ` Namhyung Kim
  2023-12-11 13:53       ` Ravi Bangoria
  0 siblings, 1 reply; 11+ messages in thread
From: Namhyung Kim @ 2023-12-08 23:33 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: acme, kim.phillips, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, kan.liang,
	changbin.du, yangjihong1, zwisler, wangming01, chenhuacai,
	kprateek.nayak, linux-perf-users, linux-kernel, sandipan.das,
	ananth.narayan, santosh.shukla

Hi Ravi,

On Fri, Nov 10, 2023 at 1:46 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>
> Hi Namhyung,
>
> >> By default, Perf uses precise cycles event when no explicit event is
> >> specified by user. Precise cycles event is forwarded to ibs_op// pmu
> >> on AMD. However, IBS has hw issue on certain Zen2 processors where
> >> it might raise NMI without sample_valid bit set, which causes Unknown
> >> NMI warnings. (Erratum #1215: IBS (Instruction Based Sampling) Counter
> >> Valid Value May be Incorrect After Exit From Core C6 (CC6) State.) So,
> >> use non-precise cycles as default event on affected processors.
> >
> > It seems like a kernel issue, do we have a kernel patch not to forward
> > precise cycles or instructions events to IBS on the affected CPUs?
>
> I'm not sure how it's a kernel issue. User can use ibs_op// pmu directly
> and might hit the hw bug.

Sorry for the late reply.  I know it's the user's fault when using ibs_op//
directly but I think the kernel should not forward cycles:pp to IBS.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors
  2023-12-08 23:33     ` Namhyung Kim
@ 2023-12-11 13:53       ` Ravi Bangoria
  2023-12-11 23:01         ` Namhyung Kim
  0 siblings, 1 reply; 11+ messages in thread
From: Ravi Bangoria @ 2023-12-11 13:53 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: acme, kim.phillips, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, kan.liang,
	changbin.du, yangjihong1, zwisler, wangming01, chenhuacai,
	kprateek.nayak, linux-perf-users, linux-kernel, sandipan.das,
	ananth.narayan, santosh.shukla, Ravi Bangoria

Hi Namhyung,

>>>> By default, Perf uses precise cycles event when no explicit event is
>>>> specified by user. Precise cycles event is forwarded to ibs_op// pmu
>>>> on AMD. However, IBS has hw issue on certain Zen2 processors where
>>>> it might raise NMI without sample_valid bit set, which causes Unknown
>>>> NMI warnings. (Erratum #1215: IBS (Instruction Based Sampling) Counter
>>>> Valid Value May be Incorrect After Exit From Core C6 (CC6) State.) So,
>>>> use non-precise cycles as default event on affected processors.
>>>
>>> It seems like a kernel issue, do we have a kernel patch not to forward
>>> precise cycles or instructions events to IBS on the affected CPUs?
>>
>> I'm not sure how it's a kernel issue. User can use ibs_op// pmu directly
>> and might hit the hw bug.
> 
> Sorry for the late reply.  I know it's the user's fault when using ibs_op//
> directly but I think the kernel should not forward cycles:pp to IBS.

For all AMD zen uarchs, cycles:p is same as ibs_op// (man perf-list has
more detail). It's confusing if ibs_op// works but cycles:p fails on the
same machine.

No strong opinion though. I can prepare a kernel patch, if you want.

Thanks,
Ravi

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors
  2023-12-11 13:53       ` Ravi Bangoria
@ 2023-12-11 23:01         ` Namhyung Kim
  2023-12-12  4:18           ` Ravi Bangoria
  0 siblings, 1 reply; 11+ messages in thread
From: Namhyung Kim @ 2023-12-11 23:01 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: acme, kim.phillips, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, kan.liang,
	changbin.du, yangjihong1, zwisler, wangming01, chenhuacai,
	kprateek.nayak, linux-perf-users, linux-kernel, sandipan.das,
	ananth.narayan, santosh.shukla

On Mon, Dec 11, 2023 at 5:54 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>
> Hi Namhyung,
>
> >>>> By default, Perf uses precise cycles event when no explicit event is
> >>>> specified by user. Precise cycles event is forwarded to ibs_op// pmu
> >>>> on AMD. However, IBS has hw issue on certain Zen2 processors where
> >>>> it might raise NMI without sample_valid bit set, which causes Unknown
> >>>> NMI warnings. (Erratum #1215: IBS (Instruction Based Sampling) Counter
> >>>> Valid Value May be Incorrect After Exit From Core C6 (CC6) State.) So,
> >>>> use non-precise cycles as default event on affected processors.
> >>>
> >>> It seems like a kernel issue, do we have a kernel patch not to forward
> >>> precise cycles or instructions events to IBS on the affected CPUs?
> >>
> >> I'm not sure how it's a kernel issue. User can use ibs_op// pmu directly
> >> and might hit the hw bug.
> >
> > Sorry for the late reply.  I know it's the user's fault when using ibs_op//
> > directly but I think the kernel should not forward cycles:pp to IBS.
>
> For all AMD zen uarchs, cycles:p is same as ibs_op// (man perf-list has
> more detail). It's confusing if ibs_op// works but cycles:p fails on the
> same machine.

Make sense.  Probably perf tools can provide a more detailed message
about the situation.

>
> No strong opinion though. I can prepare a kernel patch, if you want.

I have a report that these unknown NMI messages caused some
performance issues.  So maybe we need to be safe by not using
IBS automatically.  I've also posted a patch to rate-limit the message
itself.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors
  2023-12-11 23:01         ` Namhyung Kim
@ 2023-12-12  4:18           ` Ravi Bangoria
  0 siblings, 0 replies; 11+ messages in thread
From: Ravi Bangoria @ 2023-12-12  4:18 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: acme, kim.phillips, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, kan.liang,
	changbin.du, yangjihong1, zwisler, wangming01, chenhuacai,
	kprateek.nayak, linux-perf-users, linux-kernel, sandipan.das,
	ananth.narayan, santosh.shukla, Ravi Bangoria

>>>>>> By default, Perf uses precise cycles event when no explicit event is
>>>>>> specified by user. Precise cycles event is forwarded to ibs_op// pmu
>>>>>> on AMD. However, IBS has hw issue on certain Zen2 processors where
>>>>>> it might raise NMI without sample_valid bit set, which causes Unknown
>>>>>> NMI warnings. (Erratum #1215: IBS (Instruction Based Sampling) Counter
>>>>>> Valid Value May be Incorrect After Exit From Core C6 (CC6) State.) So,
>>>>>> use non-precise cycles as default event on affected processors.
>>>>>
>>>>> It seems like a kernel issue, do we have a kernel patch not to forward
>>>>> precise cycles or instructions events to IBS on the affected CPUs?
>>>>
>>>> I'm not sure how it's a kernel issue. User can use ibs_op// pmu directly
>>>> and might hit the hw bug.
>>>
>>> Sorry for the late reply.  I know it's the user's fault when using ibs_op//
>>> directly but I think the kernel should not forward cycles:pp to IBS.
>>
>> For all AMD zen uarchs, cycles:p is same as ibs_op// (man perf-list has
>> more detail). It's confusing if ibs_op// works but cycles:p fails on the
>> same machine.
> 
> Make sense.  Probably perf tools can provide a more detailed message
> about the situation.

+1

>> No strong opinion though. I can prepare a kernel patch, if you want.
> 
> I have a report that these unknown NMI messages caused some
> performance issues.  So maybe we need to be safe by not using
> IBS automatically.  I've also posted a patch to rate-limit the message
> itself.

Sure. Will prepare and post the patch.

Thanks,
Ravi

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2023-12-12  4:19 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-07  8:33 [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors Ravi Bangoria
2023-11-07  8:33 ` [PATCH 2/2] perf header: Additional note on AMD IBS for max_precise pmu cap Ravi Bangoria
2023-11-09 19:56   ` Arnaldo Carvalho de Melo
2023-11-07 18:22 ` [PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors Ian Rogers
2023-11-10  9:37   ` Ravi Bangoria
2023-11-09 21:53 ` Namhyung Kim
2023-11-10  9:46   ` Ravi Bangoria
2023-12-08 23:33     ` Namhyung Kim
2023-12-11 13:53       ` Ravi Bangoria
2023-12-11 23:01         ` Namhyung Kim
2023-12-12  4:18           ` Ravi Bangoria

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).