All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V2 1/4] perf tools: Rename group to topdown
       [not found] <20200910134501.11352-1-kan.liang@linux.intel.com>
@ 2020-09-10 13:44 ` kan.liang
  2020-09-10 13:44 ` [PATCH V2 2/4] perf record: Support sample-read topdown metric group kan.liang
  2020-09-10 13:45 ` [PATCH V2 3/4] perf stat: Support new per thread TopDown metrics kan.liang
  2 siblings, 0 replies; 5+ messages in thread
From: kan.liang @ 2020-09-10 13:44 UTC (permalink / raw)
  To: acme, peterz, mingo, jolsa, namhyung, linux-kernel; +Cc: eranian, ak, Kan Liang

From: Kan Liang <kan.liang@linux.intel.com>

The group.h/c only include TopDown group related functions. The name
"group" is too generic and inaccurate. Use the name "topdown" to
replace it.

Move topdown related functions to a dedicated file, topdown.c.

Acked-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 tools/perf/arch/x86/util/Build                |  2 +-
 .../perf/arch/x86/util/{group.c => topdown.c} |  2 +-
 tools/perf/builtin-stat.c                     | 51 +-----------------
 tools/perf/util/Build                         |  1 +
 tools/perf/util/topdown.c                     | 53 +++++++++++++++++++
 tools/perf/util/{group.h => topdown.h}        |  6 ++-
 6 files changed, 61 insertions(+), 54 deletions(-)
 rename tools/perf/arch/x86/util/{group.c => topdown.c} (95%)
 create mode 100644 tools/perf/util/topdown.c
 rename tools/perf/util/{group.h => topdown.h} (52%)

diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 47f9c56e744f..347c39b960eb 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -3,7 +3,7 @@ perf-y += tsc.o
 perf-y += pmu.o
 perf-y += kvm-stat.o
 perf-y += perf_regs.o
-perf-y += group.o
+perf-y += topdown.o
 perf-y += machine.o
 perf-y += event.o
 
diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/topdown.c
similarity index 95%
rename from tools/perf/arch/x86/util/group.c
rename to tools/perf/arch/x86/util/topdown.c
index e2f8034b8973..597e963fb3e7 100644
--- a/tools/perf/arch/x86/util/group.c
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <stdio.h>
 #include "api/fs/fs.h"
-#include "util/group.h"
+#include "util/topdown.h"
 
 /*
  * Check whether we can use a group for top down.
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 483a28ef4ec4..5583e22ca808 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -56,7 +56,7 @@
 #include "util/cpumap.h"
 #include "util/thread_map.h"
 #include "util/counts.h"
-#include "util/group.h"
+#include "util/topdown.h"
 #include "util/session.h"
 #include "util/tool.h"
 #include "util/string2.h"
@@ -1495,55 +1495,6 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
 	return 0;
 }
 
-static int topdown_filter_events(const char **attr, char **str, bool use_group)
-{
-	int off = 0;
-	int i;
-	int len = 0;
-	char *s;
-
-	for (i = 0; attr[i]; i++) {
-		if (pmu_have_event("cpu", attr[i])) {
-			len += strlen(attr[i]) + 1;
-			attr[i - off] = attr[i];
-		} else
-			off++;
-	}
-	attr[i - off] = NULL;
-
-	*str = malloc(len + 1 + 2);
-	if (!*str)
-		return -1;
-	s = *str;
-	if (i - off == 0) {
-		*s = 0;
-		return 0;
-	}
-	if (use_group)
-		*s++ = '{';
-	for (i = 0; attr[i]; i++) {
-		strcpy(s, attr[i]);
-		s += strlen(s);
-		*s++ = ',';
-	}
-	if (use_group) {
-		s[-1] = '}';
-		*s = 0;
-	} else
-		s[-1] = 0;
-	return 0;
-}
-
-__weak bool arch_topdown_check_group(bool *warn)
-{
-	*warn = false;
-	return false;
-}
-
-__weak void arch_topdown_group_warn(void)
-{
-}
-
 /*
  * Add default attributes, if there were no attributes specified or
  * if -d/--detailed, -d -d or -d -d -d is used:
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index cd5e41960e64..eebbd5223cae 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -101,6 +101,7 @@ perf-y += call-path.o
 perf-y += rwsem.o
 perf-y += thread-stack.o
 perf-y += spark.o
+perf-y += topdown.o
 perf-$(CONFIG_AUXTRACE) += auxtrace.o
 perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
 perf-$(CONFIG_AUXTRACE) += intel-pt.o
diff --git a/tools/perf/util/topdown.c b/tools/perf/util/topdown.c
new file mode 100644
index 000000000000..a085b3c77c27
--- /dev/null
+++ b/tools/perf/util/topdown.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "pmu.h"
+#include "topdown.h"
+
+int topdown_filter_events(const char **attr, char **str, bool use_group)
+{
+	int off = 0;
+	int i;
+	int len = 0;
+	char *s;
+
+	for (i = 0; attr[i]; i++) {
+		if (pmu_have_event("cpu", attr[i])) {
+			len += strlen(attr[i]) + 1;
+			attr[i - off] = attr[i];
+		} else
+			off++;
+	}
+	attr[i - off] = NULL;
+
+	*str = malloc(len + 1 + 2);
+	if (!*str)
+		return -1;
+	s = *str;
+	if (i - off == 0) {
+		*s = 0;
+		return 0;
+	}
+	if (use_group)
+		*s++ = '{';
+	for (i = 0; attr[i]; i++) {
+		strcpy(s, attr[i]);
+		s += strlen(s);
+		*s++ = ',';
+	}
+	if (use_group) {
+		s[-1] = '}';
+		*s = 0;
+	} else
+		s[-1] = 0;
+	return 0;
+}
+
+__weak bool arch_topdown_check_group(bool *warn)
+{
+	*warn = false;
+	return false;
+}
+
+__weak void arch_topdown_group_warn(void)
+{
+}
diff --git a/tools/perf/util/group.h b/tools/perf/util/topdown.h
similarity index 52%
rename from tools/perf/util/group.h
rename to tools/perf/util/topdown.h
index f36c7e31780a..e3d70e95f4f1 100644
--- a/tools/perf/util/group.h
+++ b/tools/perf/util/topdown.h
@@ -1,8 +1,10 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef GROUP_H
-#define GROUP_H 1
+#ifndef TOPDOWN_H
+#define TOPDOWN_H 1
 
 bool arch_topdown_check_group(bool *warn);
 void arch_topdown_group_warn(void);
 
+int topdown_filter_events(const char **attr, char **str, bool use_group);
+
 #endif
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH V2 2/4] perf record: Support sample-read topdown metric group
       [not found] <20200910134501.11352-1-kan.liang@linux.intel.com>
  2020-09-10 13:44 ` [PATCH V2 1/4] perf tools: Rename group to topdown kan.liang
@ 2020-09-10 13:44 ` kan.liang
  2020-09-10 13:45 ` [PATCH V2 3/4] perf stat: Support new per thread TopDown metrics kan.liang
  2 siblings, 0 replies; 5+ messages in thread
From: kan.liang @ 2020-09-10 13:44 UTC (permalink / raw)
  To: acme, peterz, mingo, jolsa, namhyung, linux-kernel; +Cc: eranian, ak, Kan Liang

From: Kan Liang <kan.liang@linux.intel.com>

With the hardware TopDown metrics feature, sample-read feature should be
supported for a topdown group, e.g., sample a non-topdown event and read
a topdown metric group. But the current perf record code errors out.

For a topdown metric group, the slots event must be the leader of the
group, but the leader slots event doesn't support sampling.

To support sample-read the topdown metric group, use the 2nd event of
the group as the "leader" for the purposes of sampling.

Only the platform with Topdown metic feature supports sample-read the
topdown group. Add arch_topdown_sample_read() to indicate whether the
topdown group supports sample-read.

Acked-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 tools/perf/arch/x86/util/topdown.c | 35 ++++++++++++++++++++++++++++++
 tools/perf/util/record.c           |  3 ++-
 tools/perf/util/topdown.c          |  5 +++++
 tools/perf/util/topdown.h          |  2 ++
 4 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
index 597e963fb3e7..fcf1f9c17a44 100644
--- a/tools/perf/arch/x86/util/topdown.c
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <stdio.h>
 #include "api/fs/fs.h"
+#include "util/pmu.h"
 #include "util/topdown.h"
 
 /*
@@ -26,3 +27,37 @@ void arch_topdown_group_warn(void)
 		"nmi_watchdog enabled with topdown. May give wrong results.\n"
 		"Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
 }
+
+#define TOPDOWN_SLOTS		0x0400
+
+static bool is_topdown_slots_event(struct evsel *counter)
+{
+	if (!counter->pmu_name)
+		return false;
+
+	if (strcmp(counter->pmu_name, "cpu"))
+		return false;
+
+	if (counter->core.attr.config == TOPDOWN_SLOTS)
+		return true;
+
+	return false;
+}
+
+/*
+ * Check whether a topdown group supports sample-read.
+ *
+ * Only Topdown metic supports sample-read. The slots
+ * event must be the leader of the topdown group.
+ */
+
+bool arch_topdown_sample_read(struct evsel *leader)
+{
+	if (!pmu_have_event("cpu", "slots"))
+		return false;
+
+	if (is_topdown_slots_event(leader))
+		return true;
+
+	return false;
+}
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index a4cc11592f6b..a857a13b0544 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -13,6 +13,7 @@
 #include "util/perf_api_probe.h"
 #include "record.h"
 #include "../perf-sys.h"
+#include "topdown.h"
 
 /*
  * evsel__config_leader_sampling() uses special rules for leader sampling.
@@ -23,7 +24,7 @@ static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evl
 {
 	struct evsel *leader = evsel->leader;
 
-	if (evsel__is_aux_event(leader)) {
+	if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader)) {
 		evlist__for_each_entry(evlist, evsel) {
 			if (evsel->leader == leader && evsel != evsel->leader)
 				return evsel;
diff --git a/tools/perf/util/topdown.c b/tools/perf/util/topdown.c
index a085b3c77c27..1081b20f9891 100644
--- a/tools/perf/util/topdown.c
+++ b/tools/perf/util/topdown.c
@@ -51,3 +51,8 @@ __weak bool arch_topdown_check_group(bool *warn)
 __weak void arch_topdown_group_warn(void)
 {
 }
+
+__weak bool arch_topdown_sample_read(struct evsel *leader __maybe_unused)
+{
+	return false;
+}
diff --git a/tools/perf/util/topdown.h b/tools/perf/util/topdown.h
index e3d70e95f4f1..2f0d0b887639 100644
--- a/tools/perf/util/topdown.h
+++ b/tools/perf/util/topdown.h
@@ -1,9 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef TOPDOWN_H
 #define TOPDOWN_H 1
+#include "evsel.h"
 
 bool arch_topdown_check_group(bool *warn);
 void arch_topdown_group_warn(void);
+bool arch_topdown_sample_read(struct evsel *leader);
 
 int topdown_filter_events(const char **attr, char **str, bool use_group);
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH V2 3/4] perf stat: Support new per thread TopDown metrics
       [not found] <20200910134501.11352-1-kan.liang@linux.intel.com>
  2020-09-10 13:44 ` [PATCH V2 1/4] perf tools: Rename group to topdown kan.liang
  2020-09-10 13:44 ` [PATCH V2 2/4] perf record: Support sample-read topdown metric group kan.liang
@ 2020-09-10 13:45 ` kan.liang
  2020-09-11  3:37   ` Namhyung Kim
  2 siblings, 1 reply; 5+ messages in thread
From: kan.liang @ 2020-09-10 13:45 UTC (permalink / raw)
  To: acme, peterz, mingo, jolsa, namhyung, linux-kernel; +Cc: eranian, ak, Kan Liang

From: Andi Kleen <ak@linux.intel.com>

Icelake has support for reporting per thread TopDown metrics.
These are reported differently than the previous TopDown support,
each metric is standalone, but scaled to pipeline "slots".
We don't need to do anything special for HyperThreading anymore.
Teach perf stat --topdown to handle these new metrics and
print them in the same way as the previous TopDown metrics.
The restrictions of only being able to report information per core is
gone.

Acked-by: Jiri Olsa <jolsa@redhat.com>
Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/Documentation/perf-stat.txt |  7 +-
 tools/perf/builtin-stat.c              | 30 ++++++++-
 tools/perf/util/stat-shadow.c          | 89 ++++++++++++++++++++++++++
 tools/perf/util/stat.c                 |  4 ++
 tools/perf/util/stat.h                 |  8 +++
 5 files changed, 134 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index c9bfefc051fb..e803dbdc88a8 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -357,6 +357,11 @@ if the workload is actually bound by the CPU and not by something else.
 For best results it is usually a good idea to use it with interval
 mode like -I 1000, as the bottleneck of workloads can change often.
 
+This enables --metric-only, unless overridden with --no-metric-only.
+
+The following restrictions only apply to older Intel CPUs and Atom,
+on newer CPUs (IceLake and later) TopDown can be collected for any thread:
+
 The top down metrics are collected per core instead of per
 CPU thread. Per core mode is automatically enabled
 and -a (global monitoring) is needed, requiring root rights or
@@ -368,8 +373,6 @@ echo 0 > /proc/sys/kernel/nmi_watchdog
 for best results. Otherwise the bottlenecks may be inconsistent
 on workload with changing phases.
 
-This enables --metric-only, unless overridden with --no-metric-only.
-
 To interpret the results it is usually needed to know on which
 CPUs the workload runs on. If needed the CPUs can be forced using
 taskset.
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5583e22ca808..6290da5bd142 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -128,6 +128,15 @@ static const char * topdown_attrs[] = {
 	NULL,
 };
 
+static const char *topdown_metric_attrs[] = {
+	"slots",
+	"topdown-retiring",
+	"topdown-bad-spec",
+	"topdown-fe-bound",
+	"topdown-be-bound",
+	NULL,
+};
+
 static const char *smi_cost_attrs = {
 	"{"
 	"msr/aperf/,"
@@ -1691,6 +1700,24 @@ static int add_default_attributes(void)
 		char *str = NULL;
 		bool warn = false;
 
+		if (!force_metric_only)
+			stat_config.metric_only = true;
+
+		if (topdown_filter_events(topdown_metric_attrs, &str, 1) < 0) {
+			pr_err("Out of memory\n");
+			return -1;
+		}
+		if (topdown_metric_attrs[0] && str) {
+			if (!stat_config.interval && !stat_config.metric_only) {
+				fprintf(stat_config.output,
+					"Topdown accuracy may decrease when measuring long periods.\n"
+					"Please print the result regularly, e.g. -I1000\n");
+			}
+			goto setup_metrics;
+		}
+
+		str = NULL;
+
 		if (stat_config.aggr_mode != AGGR_GLOBAL &&
 		    stat_config.aggr_mode != AGGR_CORE) {
 			pr_err("top down event configuration requires --per-core mode\n");
@@ -1702,8 +1729,6 @@ static int add_default_attributes(void)
 			return -1;
 		}
 
-		if (!force_metric_only)
-			stat_config.metric_only = true;
 		if (topdown_filter_events(topdown_attrs, &str,
 				arch_topdown_check_group(&warn)) < 0) {
 			pr_err("Out of memory\n");
@@ -1712,6 +1737,7 @@ static int add_default_attributes(void)
 		if (topdown_attrs[0] && str) {
 			if (warn)
 				arch_topdown_group_warn();
+setup_metrics:
 			err = parse_events(evsel_list, str, &errinfo);
 			if (err) {
 				fprintf(stderr,
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index e1ba6c1b916a..3204084161c9 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -241,6 +241,18 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
 	else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
 		update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
 				    ctx, cpu, count);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
+		update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
+				    ctx, cpu, count);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
+		update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
+				    ctx, cpu, count);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
+		update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
+				    ctx, cpu, count);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
+		update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
+				    ctx, cpu, count);
 	else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
 		update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
 				    ctx, cpu, count);
@@ -705,6 +717,47 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
 	return sanitize_val(1.0 - sum);
 }
 
+/*
+ * Kernel reports metrics multiplied with slots. To get back
+ * the ratios we need to recreate the sum.
+ */
+
+static double td_metric_ratio(int ctx, int cpu,
+			      enum stat_type type,
+			      struct runtime_stat *stat)
+{
+	double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) +
+		runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) +
+		runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) +
+		runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu);
+	double d = runtime_stat_avg(stat, type, ctx, cpu);
+
+	if (sum)
+		return d / sum;
+	return 0;
+}
+
+/*
+ * ... but only if most of the values are actually available.
+ * We allow two missing.
+ */
+
+static bool full_td(int ctx, int cpu,
+		    struct runtime_stat *stat)
+{
+	int c = 0;
+
+	if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0)
+		c++;
+	if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0)
+		c++;
+	if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0)
+		c++;
+	if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0)
+		c++;
+	return c >= 2;
+}
+
 static void print_smi_cost(struct perf_stat_config *config,
 			   int cpu, struct evsel *evsel,
 			   struct perf_stat_output_ctx *out,
@@ -1071,6 +1124,42 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 					be_bound * 100.);
 		else
 			print_metric(config, ctxp, NULL, NULL, name, 0);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
+			full_td(ctx, cpu, st)) {
+		double retiring = td_metric_ratio(ctx, cpu,
+						  STAT_TOPDOWN_RETIRING, st);
+
+		if (retiring > 0.7)
+			color = PERF_COLOR_GREEN;
+		print_metric(config, ctxp, color, "%8.1f%%", "retiring",
+				retiring * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
+			full_td(ctx, cpu, st)) {
+		double fe_bound = td_metric_ratio(ctx, cpu,
+						  STAT_TOPDOWN_FE_BOUND, st);
+
+		if (fe_bound > 0.2)
+			color = PERF_COLOR_RED;
+		print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
+				fe_bound * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
+			full_td(ctx, cpu, st)) {
+		double be_bound = td_metric_ratio(ctx, cpu,
+						  STAT_TOPDOWN_BE_BOUND, st);
+
+		if (be_bound > 0.2)
+			color = PERF_COLOR_RED;
+		print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
+				be_bound * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
+			full_td(ctx, cpu, st)) {
+		double bad_spec = td_metric_ratio(ctx, cpu,
+						  STAT_TOPDOWN_BAD_SPEC, st);
+
+		if (bad_spec > 0.1)
+			color = PERF_COLOR_RED;
+		print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
+				bad_spec * 100.);
 	} else if (evsel->metric_expr) {
 		generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
 				evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index cdb154381a87..bd0decd6d753 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -95,6 +95,10 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
 	ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
 	ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
 	ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
+	ID(TOPDOWN_RETIRING, topdown-retiring),
+	ID(TOPDOWN_BAD_SPEC, topdown-bad-spec),
+	ID(TOPDOWN_FE_BOUND, topdown-fe-bound),
+	ID(TOPDOWN_BE_BOUND, topdown-be-bound),
 	ID(SMI_NUM, msr/smi/),
 	ID(APERF, msr/aperf/),
 };
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index f8778cffd941..6c944d81d726 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -28,6 +28,10 @@ enum perf_stat_evsel_id {
 	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
 	PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
 	PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
+	PERF_STAT_EVSEL_ID__TOPDOWN_RETIRING,
+	PERF_STAT_EVSEL_ID__TOPDOWN_BAD_SPEC,
+	PERF_STAT_EVSEL_ID__TOPDOWN_FE_BOUND,
+	PERF_STAT_EVSEL_ID__TOPDOWN_BE_BOUND,
 	PERF_STAT_EVSEL_ID__SMI_NUM,
 	PERF_STAT_EVSEL_ID__APERF,
 	PERF_STAT_EVSEL_ID__MAX,
@@ -82,6 +86,10 @@ enum stat_type {
 	STAT_TOPDOWN_SLOTS_RETIRED,
 	STAT_TOPDOWN_FETCH_BUBBLES,
 	STAT_TOPDOWN_RECOVERY_BUBBLES,
+	STAT_TOPDOWN_RETIRING,
+	STAT_TOPDOWN_BAD_SPEC,
+	STAT_TOPDOWN_FE_BOUND,
+	STAT_TOPDOWN_BE_BOUND,
 	STAT_SMI_NUM,
 	STAT_APERF,
 	STAT_MAX
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH V2 3/4] perf stat: Support new per thread TopDown metrics
  2020-09-10 13:45 ` [PATCH V2 3/4] perf stat: Support new per thread TopDown metrics kan.liang
@ 2020-09-11  3:37   ` Namhyung Kim
  2020-09-11 14:12     ` Liang, Kan
  0 siblings, 1 reply; 5+ messages in thread
From: Namhyung Kim @ 2020-09-11  3:37 UTC (permalink / raw)
  To: Kan Liang
  Cc: Arnaldo Carvalho de Melo, Peter Zijlstra, Ingo Molnar, Jiri Olsa,
	linux-kernel, Stephane Eranian, Andi Kleen

Hello,

On Thu, Sep 10, 2020 at 10:48 PM <kan.liang@linux.intel.com> wrote:
>
> From: Andi Kleen <ak@linux.intel.com>
>
> Icelake has support for reporting per thread TopDown metrics.
> These are reported differently than the previous TopDown support,
> each metric is standalone, but scaled to pipeline "slots".
> We don't need to do anything special for HyperThreading anymore.
> Teach perf stat --topdown to handle these new metrics and
> print them in the same way as the previous TopDown metrics.
> The restrictions of only being able to report information per core is
> gone.
>
> Acked-by: Jiri Olsa <jolsa@redhat.com>
> Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  tools/perf/Documentation/perf-stat.txt |  7 +-
>  tools/perf/builtin-stat.c              | 30 ++++++++-
>  tools/perf/util/stat-shadow.c          | 89 ++++++++++++++++++++++++++
>  tools/perf/util/stat.c                 |  4 ++
>  tools/perf/util/stat.h                 |  8 +++
>  5 files changed, 134 insertions(+), 4 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
> index c9bfefc051fb..e803dbdc88a8 100644
> --- a/tools/perf/Documentation/perf-stat.txt
> +++ b/tools/perf/Documentation/perf-stat.txt
> @@ -357,6 +357,11 @@ if the workload is actually bound by the CPU and not by something else.
>  For best results it is usually a good idea to use it with interval
>  mode like -I 1000, as the bottleneck of workloads can change often.
>
> +This enables --metric-only, unless overridden with --no-metric-only.
> +
> +The following restrictions only apply to older Intel CPUs and Atom,
> +on newer CPUs (IceLake and later) TopDown can be collected for any thread:
> +
>  The top down metrics are collected per core instead of per
>  CPU thread. Per core mode is automatically enabled
>  and -a (global monitoring) is needed, requiring root rights or
> @@ -368,8 +373,6 @@ echo 0 > /proc/sys/kernel/nmi_watchdog
>  for best results. Otherwise the bottlenecks may be inconsistent
>  on workload with changing phases.
>
> -This enables --metric-only, unless overridden with --no-metric-only.
> -
>  To interpret the results it is usually needed to know on which
>  CPUs the workload runs on. If needed the CPUs can be forced using
>  taskset.
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 5583e22ca808..6290da5bd142 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -128,6 +128,15 @@ static const char * topdown_attrs[] = {
>         NULL,
>  };
>
> +static const char *topdown_metric_attrs[] = {
> +       "slots",
> +       "topdown-retiring",
> +       "topdown-bad-spec",
> +       "topdown-fe-bound",
> +       "topdown-be-bound",
> +       NULL,
> +};
> +
>  static const char *smi_cost_attrs = {
>         "{"
>         "msr/aperf/,"
> @@ -1691,6 +1700,24 @@ static int add_default_attributes(void)
>                 char *str = NULL;
>                 bool warn = false;
>
> +               if (!force_metric_only)
> +                       stat_config.metric_only = true;
> +
> +               if (topdown_filter_events(topdown_metric_attrs, &str, 1) < 0) {
> +                       pr_err("Out of memory\n");
> +                       return -1;
> +               }
> +               if (topdown_metric_attrs[0] && str) {
> +                       if (!stat_config.interval && !stat_config.metric_only) {
> +                               fprintf(stat_config.output,
> +                                       "Topdown accuracy may decrease when measuring long periods.\n"
> +                                       "Please print the result regularly, e.g. -I1000\n");
> +                       }
> +                       goto setup_metrics;
> +               }
> +
> +               str = NULL;

zfree(&str) ?

Thanks
Namhyung


> +
>                 if (stat_config.aggr_mode != AGGR_GLOBAL &&
>                     stat_config.aggr_mode != AGGR_CORE) {
>                         pr_err("top down event configuration requires --per-core mode\n");
> @@ -1702,8 +1729,6 @@ static int add_default_attributes(void)
>                         return -1;
>                 }
>
> -               if (!force_metric_only)
> -                       stat_config.metric_only = true;
>                 if (topdown_filter_events(topdown_attrs, &str,
>                                 arch_topdown_check_group(&warn)) < 0) {
>                         pr_err("Out of memory\n");
> @@ -1712,6 +1737,7 @@ static int add_default_attributes(void)
>                 if (topdown_attrs[0] && str) {
>                         if (warn)
>                                 arch_topdown_group_warn();
> +setup_metrics:
>                         err = parse_events(evsel_list, str, &errinfo);
>                         if (err) {
>                                 fprintf(stderr,

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH V2 3/4] perf stat: Support new per thread TopDown metrics
  2020-09-11  3:37   ` Namhyung Kim
@ 2020-09-11 14:12     ` Liang, Kan
  0 siblings, 0 replies; 5+ messages in thread
From: Liang, Kan @ 2020-09-11 14:12 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Peter Zijlstra, Ingo Molnar, Jiri Olsa,
	linux-kernel, Stephane Eranian, Andi Kleen



On 9/10/2020 11:37 PM, Namhyung Kim wrote:
> Hello,
> 
> On Thu, Sep 10, 2020 at 10:48 PM <kan.liang@linux.intel.com> wrote:
>>
>> From: Andi Kleen <ak@linux.intel.com>
>>
>> Icelake has support for reporting per thread TopDown metrics.
>> These are reported differently than the previous TopDown support,
>> each metric is standalone, but scaled to pipeline "slots".
>> We don't need to do anything special for HyperThreading anymore.
>> Teach perf stat --topdown to handle these new metrics and
>> print them in the same way as the previous TopDown metrics.
>> The restrictions of only being able to report information per core is
>> gone.
>>
>> Acked-by: Jiri Olsa <jolsa@redhat.com>
>> Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
>> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
>> Signed-off-by: Andi Kleen <ak@linux.intel.com>
>> ---
>>   tools/perf/Documentation/perf-stat.txt |  7 +-
>>   tools/perf/builtin-stat.c              | 30 ++++++++-
>>   tools/perf/util/stat-shadow.c          | 89 ++++++++++++++++++++++++++
>>   tools/perf/util/stat.c                 |  4 ++
>>   tools/perf/util/stat.h                 |  8 +++
>>   5 files changed, 134 insertions(+), 4 deletions(-)
>>
>> diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
>> index c9bfefc051fb..e803dbdc88a8 100644
>> --- a/tools/perf/Documentation/perf-stat.txt
>> +++ b/tools/perf/Documentation/perf-stat.txt
>> @@ -357,6 +357,11 @@ if the workload is actually bound by the CPU and not by something else.
>>   For best results it is usually a good idea to use it with interval
>>   mode like -I 1000, as the bottleneck of workloads can change often.
>>
>> +This enables --metric-only, unless overridden with --no-metric-only.
>> +
>> +The following restrictions only apply to older Intel CPUs and Atom,
>> +on newer CPUs (IceLake and later) TopDown can be collected for any thread:
>> +
>>   The top down metrics are collected per core instead of per
>>   CPU thread. Per core mode is automatically enabled
>>   and -a (global monitoring) is needed, requiring root rights or
>> @@ -368,8 +373,6 @@ echo 0 > /proc/sys/kernel/nmi_watchdog
>>   for best results. Otherwise the bottlenecks may be inconsistent
>>   on workload with changing phases.
>>
>> -This enables --metric-only, unless overridden with --no-metric-only.
>> -
>>   To interpret the results it is usually needed to know on which
>>   CPUs the workload runs on. If needed the CPUs can be forced using
>>   taskset.
>> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
>> index 5583e22ca808..6290da5bd142 100644
>> --- a/tools/perf/builtin-stat.c
>> +++ b/tools/perf/builtin-stat.c
>> @@ -128,6 +128,15 @@ static const char * topdown_attrs[] = {
>>          NULL,
>>   };
>>
>> +static const char *topdown_metric_attrs[] = {
>> +       "slots",
>> +       "topdown-retiring",
>> +       "topdown-bad-spec",
>> +       "topdown-fe-bound",
>> +       "topdown-be-bound",
>> +       NULL,
>> +};
>> +
>>   static const char *smi_cost_attrs = {
>>          "{"
>>          "msr/aperf/,"
>> @@ -1691,6 +1700,24 @@ static int add_default_attributes(void)
>>                  char *str = NULL;
>>                  bool warn = false;
>>
>> +               if (!force_metric_only)
>> +                       stat_config.metric_only = true;
>> +
>> +               if (topdown_filter_events(topdown_metric_attrs, &str, 1) < 0) {
>> +                       pr_err("Out of memory\n");
>> +                       return -1;
>> +               }
>> +               if (topdown_metric_attrs[0] && str) {
>> +                       if (!stat_config.interval && !stat_config.metric_only) {
>> +                               fprintf(stat_config.output,
>> +                                       "Topdown accuracy may decrease when measuring long periods.\n"
>> +                                       "Please print the result regularly, e.g. -I1000\n");
>> +                       }
>> +                       goto setup_metrics;
>> +               }
>> +
>> +               str = NULL;
> 
> zfree(&str) ?

Yes, even the topdown events don't exist, str is still allocated.
The str should be free.

I will send V3 to fix it shortly.

Thanks,
Kan

> 
> Thanks
> Namhyung
> 
> 
>> +
>>                  if (stat_config.aggr_mode != AGGR_GLOBAL &&
>>                      stat_config.aggr_mode != AGGR_CORE) {
>>                          pr_err("top down event configuration requires --per-core mode\n");
>> @@ -1702,8 +1729,6 @@ static int add_default_attributes(void)
>>                          return -1;
>>                  }
>>
>> -               if (!force_metric_only)
>> -                       stat_config.metric_only = true;
>>                  if (topdown_filter_events(topdown_attrs, &str,
>>                                  arch_topdown_check_group(&warn)) < 0) {
>>                          pr_err("Out of memory\n");
>> @@ -1712,6 +1737,7 @@ static int add_default_attributes(void)
>>                  if (topdown_attrs[0] && str) {
>>                          if (warn)
>>                                  arch_topdown_group_warn();
>> +setup_metrics:
>>                          err = parse_events(evsel_list, str, &errinfo);
>>                          if (err) {
>>                                  fprintf(stderr,

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-09-11 15:09 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20200910134501.11352-1-kan.liang@linux.intel.com>
2020-09-10 13:44 ` [PATCH V2 1/4] perf tools: Rename group to topdown kan.liang
2020-09-10 13:44 ` [PATCH V2 2/4] perf record: Support sample-read topdown metric group kan.liang
2020-09-10 13:45 ` [PATCH V2 3/4] perf stat: Support new per thread TopDown metrics kan.liang
2020-09-11  3:37   ` Namhyung Kim
2020-09-11 14:12     ` Liang, Kan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.