linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] perf stat: Support metrics with hybrid events
@ 2022-04-22  6:56 zhengjun.xing
  2022-04-22  6:56 ` [PATCH 2/3] perf stat: Merge event counts from all hybrid PMUs zhengjun.xing
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: zhengjun.xing @ 2022-04-22  6:56 UTC (permalink / raw)
  To: acme, peterz, mingo, alexander.shishkin, jolsa
  Cc: linux-kernel, linux-perf-users, irogers, adrian.hunter, ak,
	kan.liang, zhengjun.xing

From: Zhengjun Xing <zhengjun.xing@linux.intel.com>

One metric such as 'Kernel_Utilization' may be from different PMUs and
consists of different events.

For core,
Kernel_Utilization = cpu_clk_unhalted.thread:k / cpu_clk_unhalted.thread

For atom,
Kernel_Utilization = cpu_clk_unhalted.core:k / cpu_clk_unhalted.core

The metric group string for core is:
'{cpu_clk_unhalted.thread/metric-id=cpu_clk_unhalted.thread:k/k,cpu_clk_unhalted.thread/metric-id=cpu_clk_unhalted.thread/}:W'
It's internally expanded to:
'{cpu_clk_unhalted.thread_p/metric-id=cpu_clk_unhalted.thread_p:k/k,cpu_clk_unhalted.thread/metric-id=cpu_clk_unhalted.thread/}:W#cpu_core'

The metric group string for atom is:
'{cpu_clk_unhalted.core/metric-id=cpu_clk_unhalted.core:k/k,cpu_clk_unhalted.core/metric-id=cpu_clk_unhalted.core/}:W'
It's internally expanded to:
'{cpu_clk_unhalted.core/metric-id=cpu_clk_unhalted.core:k/k,cpu_clk_unhalted.core/metric-id=cpu_clk_unhalted.core/}:W#cpu_atom'

That means the group "{cpu_clk_unhalted.thread:k,cpu_clk_unhalted.thread}:W"
is from cpu_core PMU and the group "{cpu_clk_unhalted.core:k,cpu_clk_unhalted.core}"
is from cpu_atom PMU. And then next, check if the events in the group are
valid on that PMU. If one event is not valid on that PMU, the associated
group would be removed internally.

In this example, cpu_clk_unhalted.thread is valid on cpu_core and
cpu_clk_unhalted.core is valid on cpu_atom. So the checks for these two
groups are passed.

Before:

  # ./perf stat -M Kernel_Utilization -a sleep 1
WARNING: events in group from different hybrid PMUs!
WARNING: grouped events cpus do not match, disabling group:
  anon group { CPU_CLK_UNHALTED.THREAD_P:k, CPU_CLK_UNHALTED.THREAD_P:k, CPU_CLK_UNHALTED.THREAD, CPU_CLK_UNHALTED.THREAD }

 Performance counter stats for 'system wide':

        17,639,501      cpu_atom/CPU_CLK_UNHALTED.CORE/ #     1.00 Kernel_Utilization
        17,578,757      cpu_atom/CPU_CLK_UNHALTED.CORE:k/
     1,005,350,226 ns   duration_time
        43,012,352      cpu_core/CPU_CLK_UNHALTED.THREAD_P:k/ #     0.99 Kernel_Utilization
        17,608,010      cpu_atom/CPU_CLK_UNHALTED.THREAD_P:k/
        43,608,755      cpu_core/CPU_CLK_UNHALTED.THREAD/
        17,630,838      cpu_atom/CPU_CLK_UNHALTED.THREAD/
     1,005,350,226 ns   duration_time

       1.005350226 seconds time elapsed

After:

  # ./perf stat -M Kernel_Utilization -a sleep 1

 Performance counter stats for 'system wide':

        17,981,895      CPU_CLK_UNHALTED.CORE [cpu_atom] #     1.00 Kernel_Utilization
        17,925,405      CPU_CLK_UNHALTED.CORE:k [cpu_atom]
     1,004,811,366 ns   duration_time
        41,246,425      CPU_CLK_UNHALTED.THREAD_P:k [cpu_core] #     0.99 Kernel_Utilization
        41,819,129      CPU_CLK_UNHALTED.THREAD [cpu_core]
     1,004,811,366 ns   duration_time

       1.004811366 seconds time elapsed

Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
---
 tools/perf/util/metricgroup.c  | 263 ++++++++++++++++++++++++++++++---
 tools/perf/util/stat-display.c |   8 +-
 2 files changed, 249 insertions(+), 22 deletions(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index d8492e339521..126a43a8917e 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -141,6 +141,11 @@ struct metric {
 	 * output.
 	 */
 	const char *metric_unit;
+	/**
+	 * The name of the CPU such as "cpu_core" or "cpu_atom" in hybrid systems
+	 * and "NULL" in non-hybrid systems.
+	 */
+	const char *pmu_name;
 	/** Optional null terminated array of referenced metrics. */
 	struct metric_ref *metric_refs;
 	/**
@@ -215,6 +220,7 @@ static struct metric *metric__new(const struct pmu_event *pe,
 	}
 	m->metric_expr = pe->metric_expr;
 	m->metric_unit = pe->unit;
+	m->pmu_name = pe->pmu;
 	m->pctx->runtime = runtime;
 	m->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
 	m->metric_refs = NULL;
@@ -250,10 +256,12 @@ static bool contains_metric_id(struct evsel **metric_events, int num_events,
  * @ids: the metric IDs to match.
  * @metric_evlist: the list of perf events.
  * @out_metric_events: holds the created metric events array.
+ * @pmu_name: the name of the CPU.
  */
 static int setup_metric_events(struct hashmap *ids,
 			       struct evlist *metric_evlist,
-			       struct evsel ***out_metric_events)
+			       struct evsel ***out_metric_events,
+			       const char *pmu_name)
 {
 	struct evsel **metric_events;
 	const char *metric_id;
@@ -286,6 +294,10 @@ static int setup_metric_events(struct hashmap *ids,
 		 * about this event.
 		 */
 		if (hashmap__find(ids, metric_id, (void **)&val_ptr)) {
+			if (evsel__is_hybrid(ev) && pmu_name &&
+			    strcmp(pmu_name, ev->pmu_name)) {
+				continue;
+			}
 			metric_events[matched_events++] = ev;
 
 			if (matched_events >= ids_size)
@@ -724,7 +736,8 @@ static int decode_all_metric_ids(struct evlist *perf_evlist, const char *modifie
 static int metricgroup__build_event_string(struct strbuf *events,
 					   const struct expr_parse_ctx *ctx,
 					   const char *modifier,
-					   bool has_constraint)
+					   bool has_constraint,
+					   const char *pmu_name)
 {
 	struct hashmap_entry *cur;
 	size_t bkt;
@@ -806,12 +819,18 @@ static int metricgroup__build_event_string(struct strbuf *events,
 		if (no_group) {
 			/* Strange case of a metric of just duration_time. */
 			ret = strbuf_addf(events, "duration_time");
-		} else if (!has_constraint)
-			ret = strbuf_addf(events, "}:W,duration_time");
-		else
+		} else if (!has_constraint) {
+			ret = strbuf_addf(events, "}:W");
+			if (pmu_name)
+				ret = strbuf_addf(events, "#%s", pmu_name);
 			ret = strbuf_addf(events, ",duration_time");
-	} else if (!no_group && !has_constraint)
+		} else
+			ret = strbuf_addf(events, ",duration_time");
+	} else if (!no_group && !has_constraint) {
 		ret = strbuf_addf(events, "}:W");
+		if (pmu_name)
+			ret = strbuf_addf(events, "#%s", pmu_name);
+	}
 
 	return ret;
 #undef RETURN_IF_NON_ZERO
@@ -1150,11 +1169,13 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
  * @metric_list: The list that the metric or metric group are added to.
  * @map: The map that is searched for metrics, most commonly the table for the
  *       architecture perf is running upon.
+ * @pmu_name: the name of the CPU.
  */
-static int metricgroup__add_metric(const char *metric_name, const char *modifier,
-				   bool metric_no_group,
+static int metricgroup__add_metric(const char *metric_name,
+				   const char *modifier, bool metric_no_group,
 				   struct list_head *metric_list,
-				   const struct pmu_events_map *map)
+				   const struct pmu_events_map *map,
+				   const char *pmu_name)
 {
 	const struct pmu_event *pe;
 	LIST_HEAD(list);
@@ -1167,6 +1188,8 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
 	 */
 	map_for_each_metric(pe, i, map, metric_name) {
 		has_match = true;
+		if (pmu_name && pe->pmu && strcmp(pmu_name, pe->pmu))
+			continue;
 		ret = add_metric(&list, pe, modifier, metric_no_group,
 				 /*root_metric=*/NULL,
 				 /*visited_metrics=*/NULL, map);
@@ -1215,10 +1238,12 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
  * @metric_list: The list that metrics are added to.
  * @map: The map that is searched for metrics, most commonly the table for the
  *       architecture perf is running upon.
+ * @pmu_name: the name of the CPU.
  */
 static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
 					struct list_head *metric_list,
-					const struct pmu_events_map *map)
+					const struct pmu_events_map *map,
+					const char *pmu_name)
 {
 	char *list_itr, *list_copy, *metric_name, *modifier;
 	int ret, count = 0;
@@ -1235,7 +1260,7 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
 
 		ret = metricgroup__add_metric(metric_name, modifier,
 					      metric_no_group, metric_list,
-					      map);
+					      map, pmu_name);
 		if (ret == -EINVAL)
 			pr_err("Cannot find metric or group `%s'\n", metric_name);
 
@@ -1310,6 +1335,183 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
 	return ret;
 }
 
+static char *get_metric_pmus(char *orig_str, struct strbuf *metric_pmus)
+{
+	char *llist, *nlist, *p1, *p2, *new_str = NULL;
+	int ret;
+	struct strbuf new_events;
+
+	if (!strchr(orig_str, '#')) {
+		/*
+		 * pmu name is added after '#'. If no '#' found,
+		 * don't need to process pmu.
+		 */
+		return strdup(orig_str);
+	}
+
+	nlist = strdup(orig_str);
+	if (!nlist)
+		return new_str;
+
+	ret = strbuf_init(&new_events, 100);
+	if (ret)
+		goto err_out;
+
+	ret = strbuf_grow(metric_pmus, 100);
+	if (ret)
+		goto err_out;
+
+	llist = nlist;
+	while ((p1 = strsep(&llist, ",")) != NULL) {
+		p2 = strchr(p1, '#');
+		if (p2) {
+			*p2 = 0;
+			ret = strbuf_addf(&new_events, "%s,", p1);
+			if (ret)
+				goto err_out;
+
+			ret = strbuf_addf(metric_pmus, "%s,", p2 + 1);
+			if (ret)
+				goto err_out;
+
+		} else {
+			ret = strbuf_addf(&new_events, "%s,", p1);
+			if (ret)
+				goto err_out;
+		}
+	}
+
+	new_str = strdup(new_events.buf);
+	if (new_str) {
+		/* Remove last ',' */
+		new_str[strlen(new_str) - 1] = 0;
+	}
+err_out:
+	free(nlist);
+	strbuf_release(&new_events);
+	return new_str;
+}
+
+static void set_pmu_unmatched_events(struct evlist *evlist, int group_idx,
+				     char *pmu_name,
+				     unsigned long *evlist_removed)
+{
+	struct evsel *evsel, *pos;
+	int i = 0, j = 0;
+
+	/*
+	 * Move to the first evsel of a given group
+	 */
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel__is_group_leader(evsel) &&
+		    evsel->core.nr_members >= 1) {
+			if (i < group_idx) {
+				j += evsel->core.nr_members;
+				i++;
+				continue;
+			}
+		}
+	}
+
+	i = 0;
+	evlist__for_each_entry(evlist, evsel) {
+		if (i < j) {
+			i++;
+			continue;
+		}
+
+		/*
+		 * Now we are at the first evsel in the group
+		 */
+		for_each_group_evsel(pos, evsel) {
+			if (evsel__is_hybrid(pos) &&
+			    strcmp(pos->pmu_name, pmu_name)) {
+				set_bit(pos->core.idx, evlist_removed);
+			}
+		}
+		break;
+	}
+}
+
+static void remove_pmu_umatched_events(struct evlist *evlist, char *metric_pmus)
+{
+	struct evsel *evsel, *tmp, *new_leader = NULL;
+	unsigned long *evlist_removed;
+	char *llist, *nlist, *p1;
+	bool need_new_leader = false;
+	int i = 0, new_nr_members = 0;
+
+	nlist = strdup(metric_pmus);
+	if (!nlist)
+		return;
+
+	evlist_removed = bitmap_zalloc(evlist->core.nr_entries);
+	if (!evlist_removed) {
+		free(nlist);
+		return;
+	}
+
+	llist = nlist;
+	while ((p1 = strsep(&llist, ",")) != NULL) {
+		if (strlen(p1) > 0) {
+			/*
+			 * p1 points to the string of pmu name, e.g. "cpu_atom".
+			 * The metric group string has pmu suffixes, e.g.
+			 * "{inst_retired.any,cpu_clk_unhalted.thread}:W#cpu_core,
+			 *  {cpu_clk_unhalted.core,inst_retired.any_p}:W#cpu_atom"
+			 * By counting the pmu name, we can know the index of
+			 * group.
+			 */
+			set_pmu_unmatched_events(evlist, i++, p1,
+						 evlist_removed);
+		}
+	}
+
+	evlist__for_each_entry_safe(evlist, tmp, evsel) {
+		if (test_bit(evsel->core.idx, evlist_removed)) {
+			if (!evsel__is_group_leader(evsel)) {
+				if (!need_new_leader) {
+					if (new_leader)
+						new_leader->core.leader->nr_members--;
+					else
+						evsel->core.leader->nr_members--;
+				} else
+					new_nr_members--;
+			} else {
+				/*
+				 * If group leader is to remove, we need to
+				 * prepare a new leader and adjust all group
+				 * members.
+				 */
+				need_new_leader = true;
+				new_nr_members =
+				    evsel->core.leader->nr_members - 1;
+			}
+
+			evlist__remove(evlist, evsel);
+			evsel__delete(evsel);
+		} else {
+			if (!evsel__is_group_leader(evsel)) {
+				if (need_new_leader) {
+					need_new_leader = false;
+					new_leader = evsel;
+					new_leader->core.leader =
+					    &new_leader->core;
+					new_leader->core.nr_members =
+					    new_nr_members;
+				} else if (new_leader)
+					evsel->core.leader = &new_leader->core;
+			} else {
+				need_new_leader = false;
+				new_leader = NULL;
+			}
+		}
+	}
+
+	bitmap_free(evlist_removed);
+	free(nlist);
+}
+
 /**
  * parse_ids - Build the event string for the ids and parse them creating an
  *             evlist. The encoded metric_ids are decoded.
@@ -1319,14 +1521,18 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
  * @modifier: any modifiers added to the events.
  * @has_constraint: false if events should be placed in a weak group.
  * @out_evlist: the created list of events.
+ * @pmu_name: the name of the CPU.
  */
 static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
 		     struct expr_parse_ctx *ids, const char *modifier,
-		     bool has_constraint, struct evlist **out_evlist)
+		     bool has_constraint, struct evlist **out_evlist,
+		     const char *pmu_name)
 {
 	struct parse_events_error parse_error;
 	struct evlist *parsed_evlist;
 	struct strbuf events = STRBUF_INIT;
+	struct strbuf metric_pmus = STRBUF_INIT;
+	char *nlist = NULL;
 	int ret;
 
 	*out_evlist = NULL;
@@ -1353,7 +1559,7 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
 		ids__insert(ids->ids, tmp);
 	}
 	ret = metricgroup__build_event_string(&events, ids, modifier,
-					      has_constraint);
+					      has_constraint, pmu_name);
 	if (ret)
 		return ret;
 
@@ -1364,11 +1570,20 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
 	}
 	pr_debug("Parsing metric events '%s'\n", events.buf);
 	parse_events_error__init(&parse_error);
-	ret = __parse_events(parsed_evlist, events.buf, &parse_error, fake_pmu);
+	nlist = get_metric_pmus(events.buf, &metric_pmus);
+	if (!nlist) {
+		ret = -ENOMEM;
+		goto err_out;
+	}
+	ret = __parse_events(parsed_evlist, nlist, &parse_error, fake_pmu);
 	if (ret) {
 		parse_events_error__print(&parse_error, events.buf);
 		goto err_out;
 	}
+
+	if (metric_pmus.alloc)
+		remove_pmu_umatched_events(parsed_evlist, metric_pmus.buf);
+
 	ret = decode_all_metric_ids(parsed_evlist, modifier);
 	if (ret)
 		goto err_out;
@@ -1376,9 +1591,12 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
 	*out_evlist = parsed_evlist;
 	parsed_evlist = NULL;
 err_out:
+	if (nlist)
+		free(nlist);
 	parse_events_error__exit(&parse_error);
 	evlist__delete(parsed_evlist);
 	strbuf_release(&events);
+	strbuf_release(&metric_pmus);
 	return ret;
 }
 
@@ -1397,7 +1615,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
 	if (metric_events_list->nr_entries == 0)
 		metricgroup__rblist_init(metric_events_list);
 	ret = metricgroup__add_metric_list(str, metric_no_group,
-					   &metric_list, map);
+					   &metric_list, map,
+					   perf_evlist->hybrid_pmu_name);
 	if (ret)
 		goto out;
 
@@ -1413,7 +1632,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
 			ret = parse_ids(metric_no_merge, fake_pmu, combined,
 					/*modifier=*/NULL,
 					/*has_constraint=*/true,
-					&combined_evlist);
+					&combined_evlist,
+					perf_evlist->hybrid_pmu_name);
 		}
 		if (combined)
 			expr__ctx_free(combined);
@@ -1450,6 +1670,9 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
 					continue;
 
 				if (expr__subset_of_ids(n->pctx, m->pctx)) {
+					if (m->pmu_name && n->pmu_name
+					    && strcmp(m->pmu_name, n->pmu_name))
+						continue;
 					pr_debug("Events in '%s' fully contained within '%s'\n",
 						 m->metric_name, n->metric_name);
 					metric_evlist = n->evlist;
@@ -1459,14 +1682,16 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
 			}
 		}
 		if (!metric_evlist) {
-			ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier,
-					m->has_constraint, &m->evlist);
+			ret = parse_ids(metric_no_merge, fake_pmu, m->pctx,
+				      m->modifier, m->has_constraint,
+				      &m->evlist, m->pmu_name);
 			if (ret)
 				goto out;
 
 			metric_evlist = m->evlist;
 		}
-		ret = setup_metric_events(m->pctx->ids, metric_evlist, &metric_events);
+		ret = setup_metric_events(m->pctx->ids, metric_evlist,
+					  &metric_events, m->pmu_name);
 		if (ret) {
 			pr_debug("Cannot resolve IDs for %s: %s\n",
 				m->metric_name, m->metric_expr);
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 138e3ab9d638..46b3dd134656 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -539,7 +539,8 @@ static void aggr_update_shadow(struct perf_stat_config *config,
 	}
 }
 
-static void uniquify_event_name(struct evsel *counter)
+static void uniquify_event_name(struct evsel *counter,
+				struct perf_stat_config *stat_config)
 {
 	char *new_name;
 	char *config;
@@ -558,7 +559,8 @@ static void uniquify_event_name(struct evsel *counter)
 			counter->name = new_name;
 		}
 	} else {
-		if (perf_pmu__has_hybrid()) {
+		if (perf_pmu__has_hybrid() &&
+		    stat_config->metric_events.nr_entries == 0) {
 			ret = asprintf(&new_name, "%s/%s/",
 				       counter->pmu_name, counter->name);
 		} else {
@@ -619,7 +621,7 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
 		return false;
 	cb(config, counter, data, true);
 	if (config->no_merge || hybrid_uniquify(counter))
-		uniquify_event_name(counter);
+		uniquify_event_name(counter, config);
 	else if (counter->auto_merge_stats)
 		collect_all_aliases(config, counter, cb, data);
 	return true;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] perf stat: Merge event counts from all hybrid PMUs
  2022-04-22  6:56 [PATCH 1/3] perf stat: Support metrics with hybrid events zhengjun.xing
@ 2022-04-22  6:56 ` zhengjun.xing
  2022-05-07  4:03   ` Ian Rogers
  2022-04-22  6:56 ` [PATCH 3/3] perf stat: Support hybrid --topdown option zhengjun.xing
  2022-05-06  8:25 ` [PATCH 1/3] perf stat: Support metrics with hybrid events Ian Rogers
  2 siblings, 1 reply; 7+ messages in thread
From: zhengjun.xing @ 2022-04-22  6:56 UTC (permalink / raw)
  To: acme, peterz, mingo, alexander.shishkin, jolsa
  Cc: linux-kernel, linux-perf-users, irogers, adrian.hunter, ak,
	kan.liang, zhengjun.xing

From: Zhengjun Xing <zhengjun.xing@linux.intel.com>

For hybrid events, by default stat aggregates and reports the event counts
per pmu.

  # ./perf stat -e cycles -a  sleep 1

   Performance counter stats for 'system wide':

      14,066,877,268      cpu_core/cycles/
       6,814,443,147      cpu_atom/cycles/

         1.002760625 seconds time elapsed

Sometimes, it's also useful to aggregate event counts from all PMUs.
Create a new option '--hybrid-merge' to enable that behavior and report
the counts without PMUs.

  # ./perf stat -e cycles -a --hybrid-merge  sleep 1

   Performance counter stats for 'system wide':

      20,732,982,512      cycles

         1.002776793 seconds time elapsed

Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
---
 tools/perf/Documentation/perf-stat.txt | 10 ++++++++++
 tools/perf/builtin-stat.c              |  2 ++
 tools/perf/util/stat-display.c         | 17 +++++++++++++++--
 tools/perf/util/stat.h                 |  1 +
 4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index c06c341e72b9..8d1cde00b8d6 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -454,6 +454,16 @@ Multiple events are created from a single event specification when:
 2. Aliases, which are listed immediately after the Kernel PMU events
    by perf list, are used.
 
+--hybrid-merge::
+Merge the hybrid event counts from all PMUs.
+
+For hybrid events, by default, the stat aggregates and reports the event
+counts per PMU. But sometimes, it's also useful to aggregate event counts
+from all PMUs. This option enables that behavior and reports the counts
+without PMUs.
+
+For non-hybrid events, it should be no effect.
+
 --smi-cost::
 Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a96f106dc93a..ea88ac5bed2d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1235,6 +1235,8 @@ static struct option stat_options[] = {
 	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
 		    "disable CPU count aggregation", AGGR_NONE),
 	OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
+	OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
+		    "Merge identical named hybrid events"),
 	OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
 		   "print counts with custom separator"),
 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 46b3dd134656..d9629a83aa78 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -612,6 +612,19 @@ static bool hybrid_uniquify(struct evsel *evsel)
 	return perf_pmu__has_hybrid() && !is_uncore(evsel);
 }
 
+static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config,
+			 bool check)
+{
+	if (hybrid_uniquify(counter)) {
+		if (check)
+			return config && config->hybrid_merge;
+		else
+			return config && !config->hybrid_merge;
+	}
+
+	return false;
+}
+
 static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
 			    void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
 				       bool first),
@@ -620,9 +633,9 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
 	if (counter->merged_stat)
 		return false;
 	cb(config, counter, data, true);
-	if (config->no_merge || hybrid_uniquify(counter))
+	if (config->no_merge || hybrid_merge(counter, config, false))
 		uniquify_event_name(counter, config);
-	else if (counter->auto_merge_stats)
+	else if (counter->auto_merge_stats || hybrid_merge(counter, config, true))
 		collect_all_aliases(config, counter, cb, data);
 	return true;
 }
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 335d19cc3063..91d989dfeca4 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -122,6 +122,7 @@ struct perf_stat_config {
 	bool			 ru_display;
 	bool			 big_num;
 	bool			 no_merge;
+	bool			 hybrid_merge;
 	bool			 walltime_run_table;
 	bool			 all_kernel;
 	bool			 all_user;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] perf stat: Support hybrid --topdown option
  2022-04-22  6:56 [PATCH 1/3] perf stat: Support metrics with hybrid events zhengjun.xing
  2022-04-22  6:56 ` [PATCH 2/3] perf stat: Merge event counts from all hybrid PMUs zhengjun.xing
@ 2022-04-22  6:56 ` zhengjun.xing
  2022-05-06  8:25 ` [PATCH 1/3] perf stat: Support metrics with hybrid events Ian Rogers
  2 siblings, 0 replies; 7+ messages in thread
From: zhengjun.xing @ 2022-04-22  6:56 UTC (permalink / raw)
  To: acme, peterz, mingo, alexander.shishkin, jolsa
  Cc: linux-kernel, linux-perf-users, irogers, adrian.hunter, ak,
	kan.liang, zhengjun.xing

From: Zhengjun Xing <zhengjun.xing@linux.intel.com>

Since for cpu_core or cpu_atom , they have different topdown
events groups.

For cpu_core, --topdown equals to
"{slots,cpu_core/topdown-retiring/,cpu_core/topdown-bad-spec/,
  cpu_core/topdown-fe-bound/,cpu_core/topdown-be-bound/,
  cpu_core/topdown-heavy-ops/,cpu_core/topdown-br-mispredict/,
  cpu_core/topdown-fetch-lat/,cpu_core/topdown-mem-bound/}"

For cpu_atom, --topdown equals to
"{cpu_atom/topdown-retiring/,cpu_atom/topdown-bad-spec/,
 cpu_atom/topdown-fe-bound/,cpu_atom/topdown-be-bound/}"

To simplify the implementation, on hybrid, --topdown is used
together with --cputype. If without --cputype, it uses cpu_core
topdown events by default.

  # ./perf stat --topdown -a  sleep 1
  WARNING: default to use cpu_core topdown events

   Performance counter stats for 'system wide':

              retiring      bad speculation       frontend bound        backend bound     heavy operations     light operations    branch mispredict       machine clears        fetch latency      fetch bandwidth         memory bound           Core bound
                  4.1%                 0.0%                 5.1%                90.8%                 2.3%                 1.8%                 0.0%                 0.0%                 4.2%                 0.9%                 9.9%                81.0%

         1.002624229 seconds time elapsed

  # ./perf stat --topdown -a --cputype atom  sleep 1

   Performance counter stats for 'system wide':

              retiring      bad speculation       frontend bound        backend bound
                 13.5%                 0.1%                31.2%                55.2%

         1.002366987 seconds time elapsed

Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
---
 tools/perf/builtin-stat.c | 21 ++++++++++++++++++---
 tools/perf/util/stat.c    |  4 +++-
 tools/perf/util/topdown.c | 17 +++++++++++++----
 tools/perf/util/topdown.h |  3 ++-
 4 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ea88ac5bed2d..084b0e53fb62 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1844,11 +1844,23 @@ static int add_default_attributes(void)
 		unsigned int max_level = 1;
 		char *str = NULL;
 		bool warn = false;
+		const char *pmu_name = "cpu";
 
 		if (!force_metric_only)
 			stat_config.metric_only = true;
 
-		if (pmu_have_event("cpu", topdown_metric_L2_attrs[5])) {
+		if (perf_pmu__has_hybrid()) {
+			if (!evsel_list->hybrid_pmu_name) {
+				pr_warning("WARNING: default to use cpu_core topdown events\n");
+				evsel_list->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu("core");
+			}
+
+			pmu_name = evsel_list->hybrid_pmu_name;
+			if (!pmu_name)
+				return -1;
+		}
+
+		if (pmu_have_event(pmu_name, topdown_metric_L2_attrs[5])) {
 			metric_attrs = topdown_metric_L2_attrs;
 			max_level = 2;
 		}
@@ -1859,10 +1871,11 @@ static int add_default_attributes(void)
 		} else if (!stat_config.topdown_level)
 			stat_config.topdown_level = max_level;
 
-		if (topdown_filter_events(metric_attrs, &str, 1) < 0) {
+		if (topdown_filter_events(metric_attrs, &str, 1, pmu_name) < 0) {
 			pr_err("Out of memory\n");
 			return -1;
 		}
+
 		if (metric_attrs[0] && str) {
 			if (!stat_config.interval && !stat_config.metric_only) {
 				fprintf(stat_config.output,
@@ -1886,10 +1899,12 @@ static int add_default_attributes(void)
 		}
 
 		if (topdown_filter_events(topdown_attrs, &str,
-				arch_topdown_check_group(&warn)) < 0) {
+				arch_topdown_check_group(&warn),
+				pmu_name) < 0) {
 			pr_err("Out of memory\n");
 			return -1;
 		}
+
 		if (topdown_attrs[0] && str) {
 			struct parse_events_error errinfo;
 			if (warn)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 817a2de264b4..4a5f3b8ff820 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -117,7 +117,9 @@ static void perf_stat_evsel_id_init(struct evsel *evsel)
 	/* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
 
 	for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
-		if (!strcmp(evsel__name(evsel), id_str[i])) {
+		if (!strcmp(evsel__name(evsel), id_str[i]) ||
+		    (strstr(evsel__name(evsel), id_str[i]) && evsel->pmu_name
+		     && strstr(evsel__name(evsel), evsel->pmu_name))) {
 			ps->id = i;
 			break;
 		}
diff --git a/tools/perf/util/topdown.c b/tools/perf/util/topdown.c
index 1081b20f9891..a369f84ceb6a 100644
--- a/tools/perf/util/topdown.c
+++ b/tools/perf/util/topdown.c
@@ -1,18 +1,24 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <stdio.h>
 #include "pmu.h"
+#include "pmu-hybrid.h"
 #include "topdown.h"
 
-int topdown_filter_events(const char **attr, char **str, bool use_group)
+int topdown_filter_events(const char **attr, char **str, bool use_group,
+			  const char *pmu_name)
 {
 	int off = 0;
 	int i;
 	int len = 0;
 	char *s;
+	bool is_hybrid = perf_pmu__is_hybrid(pmu_name);
 
 	for (i = 0; attr[i]; i++) {
-		if (pmu_have_event("cpu", attr[i])) {
-			len += strlen(attr[i]) + 1;
+		if (pmu_have_event(pmu_name, attr[i])) {
+			if (is_hybrid)
+				len += strlen(attr[i]) + strlen(pmu_name) + 3;
+			else
+				len += strlen(attr[i]) + 1;
 			attr[i - off] = attr[i];
 		} else
 			off++;
@@ -30,7 +36,10 @@ int topdown_filter_events(const char **attr, char **str, bool use_group)
 	if (use_group)
 		*s++ = '{';
 	for (i = 0; attr[i]; i++) {
-		strcpy(s, attr[i]);
+		if (!is_hybrid)
+			strcpy(s, attr[i]);
+		else
+			sprintf(s, "%s/%s/", pmu_name, attr[i]);
 		s += strlen(s);
 		*s++ = ',';
 	}
diff --git a/tools/perf/util/topdown.h b/tools/perf/util/topdown.h
index 2f0d0b887639..118e75281f93 100644
--- a/tools/perf/util/topdown.h
+++ b/tools/perf/util/topdown.h
@@ -7,6 +7,7 @@ bool arch_topdown_check_group(bool *warn);
 void arch_topdown_group_warn(void);
 bool arch_topdown_sample_read(struct evsel *leader);
 
-int topdown_filter_events(const char **attr, char **str, bool use_group);
+int topdown_filter_events(const char **attr, char **str, bool use_group,
+			  const char *pmu_name);
 
 #endif
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] perf stat: Support metrics with hybrid events
  2022-04-22  6:56 [PATCH 1/3] perf stat: Support metrics with hybrid events zhengjun.xing
  2022-04-22  6:56 ` [PATCH 2/3] perf stat: Merge event counts from all hybrid PMUs zhengjun.xing
  2022-04-22  6:56 ` [PATCH 3/3] perf stat: Support hybrid --topdown option zhengjun.xing
@ 2022-05-06  8:25 ` Ian Rogers
  2022-05-06  8:42   ` Ian Rogers
  2 siblings, 1 reply; 7+ messages in thread
From: Ian Rogers @ 2022-05-06  8:25 UTC (permalink / raw)
  To: zhengjun.xing
  Cc: acme, peterz, mingo, alexander.shishkin, jolsa, linux-kernel,
	linux-perf-users, adrian.hunter, ak, kan.liang

 and should be beefor eit.
On Thu, Apr 21, 2022 at 11:56 PM <zhengjun.xing@linux.intel.com> wrote:
>
> From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
>
> One metric such as 'Kernel_Utilization' may be from different PMUs and
> consists of different events.
>
> For core,
> Kernel_Utilization = cpu_clk_unhalted.thread:k / cpu_clk_unhalted.thread
>
> For atom,
> Kernel_Utilization = cpu_clk_unhalted.core:k / cpu_clk_unhalted.core
>
> The metric group string for core is:
> '{cpu_clk_unhalted.thread/metric-id=cpu_clk_unhalted.thread:k/k,cpu_clk_unhalted.thread/metric-id=cpu_clk_unhalted.thread/}:W'
> It's internally expanded to:
> '{cpu_clk_unhalted.thread_p/metric-id=cpu_clk_unhalted.thread_p:k/k,cpu_clk_unhalted.thread/metric-id=cpu_clk_unhalted.thread/}:W#cpu_core'
>
> The metric group string for atom is:
> '{cpu_clk_unhalted.core/metric-id=cpu_clk_unhalted.core:k/k,cpu_clk_unhalted.core/metric-id=cpu_clk_unhalted.core/}:W'
> It's internally expanded to:
> '{cpu_clk_unhalted.core/metric-id=cpu_clk_unhalted.core:k/k,cpu_clk_unhalted.core/metric-id=cpu_clk_unhalted.core/}:W#cpu_atom'
>
> That means the group "{cpu_clk_unhalted.thread:k,cpu_clk_unhalted.thread}:W"
> is from cpu_core PMU and the group "{cpu_clk_unhalted.core:k,cpu_clk_unhalted.core}"
> is from cpu_atom PMU. And then next, check if the events in the group are
> valid on that PMU. If one event is not valid on that PMU, the associated
> group would be removed internally.
>
> In this example, cpu_clk_unhalted.thread is valid on cpu_core and
> cpu_clk_unhalted.core is valid on cpu_atom. So the checks for these two
> groups are passed.
>
> Before:
>
>   # ./perf stat -M Kernel_Utilization -a sleep 1
> WARNING: events in group from different hybrid PMUs!
> WARNING: grouped events cpus do not match, disabling group:
>   anon group { CPU_CLK_UNHALTED.THREAD_P:k, CPU_CLK_UNHALTED.THREAD_P:k, CPU_CLK_UNHALTED.THREAD, CPU_CLK_UNHALTED.THREAD }
>
>  Performance counter stats for 'system wide':
>
>         17,639,501      cpu_atom/CPU_CLK_UNHALTED.CORE/ #     1.00 Kernel_Utilization
>         17,578,757      cpu_atom/CPU_CLK_UNHALTED.CORE:k/
>      1,005,350,226 ns   duration_time
>         43,012,352      cpu_core/CPU_CLK_UNHALTED.THREAD_P:k/ #     0.99 Kernel_Utilization
>         17,608,010      cpu_atom/CPU_CLK_UNHALTED.THREAD_P:k/
>         43,608,755      cpu_core/CPU_CLK_UNHALTED.THREAD/
>         17,630,838      cpu_atom/CPU_CLK_UNHALTED.THREAD/
>      1,005,350,226 ns   duration_time
>
>        1.005350226 seconds time elapsed
>
> After:
>
>   # ./perf stat -M Kernel_Utilization -a sleep 1
>
>  Performance counter stats for 'system wide':
>
>         17,981,895      CPU_CLK_UNHALTED.CORE [cpu_atom] #     1.00 Kernel_Utilization
>         17,925,405      CPU_CLK_UNHALTED.CORE:k [cpu_atom]
>      1,004,811,366 ns   duration_time
>         41,246,425      CPU_CLK_UNHALTED.THREAD_P:k [cpu_core] #     0.99 Kernel_Utilization
>         41,819,129      CPU_CLK_UNHALTED.THREAD [cpu_core]
>      1,004,811,366 ns   duration_time
>
>        1.004811366 seconds time elapsed
>
> Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
> Reviewed-by: Kan Liang <kan.liang@linux.intel.com>

Sorry for the late review, this arrived April 21st and was added to
acme/perf/core on April 22nd - I was on vacation those days. There is
substantial complexity added here and I'm confused by the all too
frequent is_hybrid paths. There is nothing wrong in my eyes with
making the metric code sensitive to a PMU type, why does hybrid need
to be special? More comments below.

> ---
>  tools/perf/util/metricgroup.c  | 263 ++++++++++++++++++++++++++++++---
>  tools/perf/util/stat-display.c |   8 +-
>  2 files changed, 249 insertions(+), 22 deletions(-)
>
> diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> index d8492e339521..126a43a8917e 100644
> --- a/tools/perf/util/metricgroup.c
> +++ b/tools/perf/util/metricgroup.c
> @@ -141,6 +141,11 @@ struct metric {
>          * output.
>          */
>         const char *metric_unit;
> +       /**
> +        * The name of the CPU such as "cpu_core" or "cpu_atom" in hybrid systems
> +        * and "NULL" in non-hybrid systems.
> +        */
> +       const char *pmu_name;

Thanks for the comment. CPU and PMU don't align, but I think PMU is
the better name. I think this is an optional PMU name used for opening
the metric's events. For example, on hybrid it can be cpu_core or
cpu_atom. This is confusing when we have a metric that mixes say core
and uncore events.

>         /** Optional null terminated array of referenced metrics. */
>         struct metric_ref *metric_refs;
>         /**
> @@ -215,6 +220,7 @@ static struct metric *metric__new(const struct pmu_event *pe,
>         }
>         m->metric_expr = pe->metric_expr;
>         m->metric_unit = pe->unit;
> +       m->pmu_name = pe->pmu;

Well this explains why we have a pmu_name. I'm not a fan of metrics
being the same as events. Reading the PMU here feels a bit of a hack.

>         m->pctx->runtime = runtime;
>         m->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
>         m->metric_refs = NULL;
> @@ -250,10 +256,12 @@ static bool contains_metric_id(struct evsel **metric_events, int num_events,
>   * @ids: the metric IDs to match.
>   * @metric_evlist: the list of perf events.
>   * @out_metric_events: holds the created metric events array.
> + * @pmu_name: the name of the CPU.

Could pmu_name just be an optional PMU to be used for the events? We
don't need to be specific it is a CPU here?

>   */
>  static int setup_metric_events(struct hashmap *ids,
>                                struct evlist *metric_evlist,
> -                              struct evsel ***out_metric_events)
> +                              struct evsel ***out_metric_events,
> +                              const char *pmu_name)
>  {
>         struct evsel **metric_events;
>         const char *metric_id;
> @@ -286,6 +294,10 @@ static int setup_metric_events(struct hashmap *ids,
>                  * about this event.
>                  */
>                 if (hashmap__find(ids, metric_id, (void **)&val_ptr)) {
> +                       if (evsel__is_hybrid(ev) && pmu_name &&

The evsel__is_hybrid looks unnecessary here.

> +                           strcmp(pmu_name, ev->pmu_name)) {
> +                               continue;
> +                       }
>                         metric_events[matched_events++] = ev;
>
>                         if (matched_events >= ids_size)
> @@ -724,7 +736,8 @@ static int decode_all_metric_ids(struct evlist *perf_evlist, const char *modifie
>  static int metricgroup__build_event_string(struct strbuf *events,
>                                            const struct expr_parse_ctx *ctx,
>                                            const char *modifier,
> -                                          bool has_constraint)
> +                                          bool has_constraint,
> +                                          const char *pmu_name)
>  {
>         struct hashmap_entry *cur;
>         size_t bkt;
> @@ -806,12 +819,18 @@ static int metricgroup__build_event_string(struct strbuf *events,
>                 if (no_group) {
>                         /* Strange case of a metric of just duration_time. */
>                         ret = strbuf_addf(events, "duration_time");
> -               } else if (!has_constraint)
> -                       ret = strbuf_addf(events, "}:W,duration_time");
> -               else
> +               } else if (!has_constraint) {
> +                       ret = strbuf_addf(events, "}:W");
> +                       if (pmu_name)
> +                               ret = strbuf_addf(events, "#%s", pmu_name);
>                         ret = strbuf_addf(events, ",duration_time");
> -       } else if (!no_group && !has_constraint)
> +               } else
> +                       ret = strbuf_addf(events, ",duration_time");
> +       } else if (!no_group && !has_constraint) {
>                 ret = strbuf_addf(events, "}:W");
> +               if (pmu_name)
> +                       ret = strbuf_addf(events, "#%s", pmu_name);
> +       }

Why add the # and then expand it when you could just expand here?

>         return ret;
>  #undef RETURN_IF_NON_ZERO
> @@ -1150,11 +1169,13 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
>   * @metric_list: The list that the metric or metric group are added to.
>   * @map: The map that is searched for metrics, most commonly the table for the
>   *       architecture perf is running upon.
> + * @pmu_name: the name of the CPU.
>   */
> -static int metricgroup__add_metric(const char *metric_name, const char *modifier,
> -                                  bool metric_no_group,
> +static int metricgroup__add_metric(const char *metric_name,
> +                                  const char *modifier, bool metric_no_group,

This is a whitespace only change.

>                                    struct list_head *metric_list,
> -                                  const struct pmu_events_map *map)
> +                                  const struct pmu_events_map *map,
> +                                  const char *pmu_name)
>  {
>         const struct pmu_event *pe;
>         LIST_HEAD(list);
> @@ -1167,6 +1188,8 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
>          */
>         map_for_each_metric(pe, i, map, metric_name) {
>                 has_match = true;
> +               if (pmu_name && pe->pmu && strcmp(pmu_name, pe->pmu))
> +                       continue;
>                 ret = add_metric(&list, pe, modifier, metric_no_group,
>                                  /*root_metric=*/NULL,
>                                  /*visited_metrics=*/NULL, map);
> @@ -1215,10 +1238,12 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
>   * @metric_list: The list that metrics are added to.
>   * @map: The map that is searched for metrics, most commonly the table for the
>   *       architecture perf is running upon.
> + * @pmu_name: the name of the CPU.
>   */
>  static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
>                                         struct list_head *metric_list,
> -                                       const struct pmu_events_map *map)
> +                                       const struct pmu_events_map *map,
> +                                       const char *pmu_name)
>  {
>         char *list_itr, *list_copy, *metric_name, *modifier;
>         int ret, count = 0;
> @@ -1235,7 +1260,7 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
>
>                 ret = metricgroup__add_metric(metric_name, modifier,
>                                               metric_no_group, metric_list,
> -                                             map);
> +                                             map, pmu_name);
>                 if (ret == -EINVAL)
>                         pr_err("Cannot find metric or group `%s'\n", metric_name);
>
> @@ -1310,6 +1335,183 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
>         return ret;
>  }
>
> +static char *get_metric_pmus(char *orig_str, struct strbuf *metric_pmus)

Could you add an example here. A metric is coming with an associated
PMU so what are we pulling out here? Why are the events modified? What
is the metric_pmus out argument doing?

> +{
> +       char *llist, *nlist, *p1, *p2, *new_str = NULL;
> +       int ret;
> +       struct strbuf new_events;
> +
> +       if (!strchr(orig_str, '#')) {
> +               /*
> +                * pmu name is added after '#'. If no '#' found,
> +                * don't need to process pmu.
> +                */
> +               return strdup(orig_str);
> +       }
> +
> +       nlist = strdup(orig_str);
> +       if (!nlist)
> +               return new_str;
> +
> +       ret = strbuf_init(&new_events, 100);
> +       if (ret)
> +               goto err_out;
> +
> +       ret = strbuf_grow(metric_pmus, 100);
> +       if (ret)
> +               goto err_out;
> +
> +       llist = nlist;
> +       while ((p1 = strsep(&llist, ",")) != NULL) {
> +               p2 = strchr(p1, '#');
> +               if (p2) {
> +                       *p2 = 0;
> +                       ret = strbuf_addf(&new_events, "%s,", p1);
> +                       if (ret)
> +                               goto err_out;
> +
> +                       ret = strbuf_addf(metric_pmus, "%s,", p2 + 1);
> +                       if (ret)
> +                               goto err_out;
> +
> +               } else {
> +                       ret = strbuf_addf(&new_events, "%s,", p1);
> +                       if (ret)
> +                               goto err_out;
> +               }
> +       }
> +
> +       new_str = strdup(new_events.buf);
> +       if (new_str) {
> +               /* Remove last ',' */
> +               new_str[strlen(new_str) - 1] = 0;
> +       }
> +err_out:
> +       free(nlist);
> +       strbuf_release(&new_events);
> +       return new_str;
> +}
> +
> +static void set_pmu_unmatched_events(struct evlist *evlist, int group_idx,
> +                                    char *pmu_name,
> +                                    unsigned long *evlist_removed)
> +{
> +       struct evsel *evsel, *pos;
> +       int i = 0, j = 0;
> +
> +       /*
> +        * Move to the first evsel of a given group
> +        */
> +       evlist__for_each_entry(evlist, evsel) {
> +               if (evsel__is_group_leader(evsel) &&
> +                   evsel->core.nr_members >= 1) {
> +                       if (i < group_idx) {
> +                               j += evsel->core.nr_members;
> +                               i++;
> +                               continue;
> +                       }
> +               }
> +       }
> +
> +       i = 0;
> +       evlist__for_each_entry(evlist, evsel) {
> +               if (i < j) {
> +                       i++;
> +                       continue;
> +               }
> +
> +               /*
> +                * Now we are at the first evsel in the group
> +                */
> +               for_each_group_evsel(pos, evsel) {
> +                       if (evsel__is_hybrid(pos) &&
> +                           strcmp(pos->pmu_name, pmu_name)) {
> +                               set_bit(pos->core.idx, evlist_removed);
> +                       }
> +               }
> +               break;
> +       }
> +}
> +
> +static void remove_pmu_umatched_events(struct evlist *evlist, char *metric_pmus)

Is it possible here that we have an evlist being used by >1 metric.
We've decided one of the metrics is invalid and set about removing the
events thereby breaking the other metric? In general this code treads
lightly around the evlist as previously it has been the source of
bugs:
https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git/commit/tools/perf/util/metricgroup.c?h=perf/core&id=5ecd5a0c7d1cca79f1431093d12e4cd9893b0331

> +{
> +       struct evsel *evsel, *tmp, *new_leader = NULL;
> +       unsigned long *evlist_removed;
> +       char *llist, *nlist, *p1;

Is there a better name than nlist and llist? Could you explain what they mean?

> +       bool need_new_leader = false;
> +       int i = 0, new_nr_members = 0;
> +
> +       nlist = strdup(metric_pmus);
> +       if (!nlist)
> +               return;
> +
> +       evlist_removed = bitmap_zalloc(evlist->core.nr_entries);
> +       if (!evlist_removed) {
> +               free(nlist);
> +               return;
> +       }
> +
> +       llist = nlist;
> +       while ((p1 = strsep(&llist, ",")) != NULL) {
> +               if (strlen(p1) > 0) {
> +                       /*
> +                        * p1 points to the string of pmu name, e.g. "cpu_atom".
> +                        * The metric group string has pmu suffixes, e.g.
> +                        * "{inst_retired.any,cpu_clk_unhalted.thread}:W#cpu_core,
> +                        *  {cpu_clk_unhalted.core,inst_retired.any_p}:W#cpu_atom"
> +                        * By counting the pmu name, we can know the index of
> +                        * group.
> +                        */
> +                       set_pmu_unmatched_events(evlist, i++, p1,
> +                                                evlist_removed);
> +               }
> +       }
> +
> +       evlist__for_each_entry_safe(evlist, tmp, evsel) {
> +               if (test_bit(evsel->core.idx, evlist_removed)) {
> +                       if (!evsel__is_group_leader(evsel)) {
> +                               if (!need_new_leader) {
> +                                       if (new_leader)
> +                                               new_leader->core.leader->nr_members--;
> +                                       else
> +                                               evsel->core.leader->nr_members--;
> +                               } else
> +                                       new_nr_members--;
> +                       } else {
> +                               /*
> +                                * If group leader is to remove, we need to
> +                                * prepare a new leader and adjust all group
> +                                * members.
> +                                */
> +                               need_new_leader = true;
> +                               new_nr_members =
> +                                   evsel->core.leader->nr_members - 1;
> +                       }
> +
> +                       evlist__remove(evlist, evsel);
> +                       evsel__delete(evsel);
> +               } else {
> +                       if (!evsel__is_group_leader(evsel)) {
> +                               if (need_new_leader) {
> +                                       need_new_leader = false;
> +                                       new_leader = evsel;
> +                                       new_leader->core.leader =
> +                                           &new_leader->core;
> +                                       new_leader->core.nr_members =
> +                                           new_nr_members;
> +                               } else if (new_leader)
> +                                       evsel->core.leader = &new_leader->core;
> +                       } else {
> +                               need_new_leader = false;
> +                               new_leader = NULL;
> +                       }
> +               }
> +       }
> +
> +       bitmap_free(evlist_removed);
> +       free(nlist);
> +}
> +
>  /**
>   * parse_ids - Build the event string for the ids and parse them creating an
>   *             evlist. The encoded metric_ids are decoded.
> @@ -1319,14 +1521,18 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
>   * @modifier: any modifiers added to the events.
>   * @has_constraint: false if events should be placed in a weak group.
>   * @out_evlist: the created list of events.
> + * @pmu_name: the name of the CPU.
>   */
>  static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
>                      struct expr_parse_ctx *ids, const char *modifier,
> -                    bool has_constraint, struct evlist **out_evlist)
> +                    bool has_constraint, struct evlist **out_evlist,
> +                    const char *pmu_name)
>  {
>         struct parse_events_error parse_error;
>         struct evlist *parsed_evlist;
>         struct strbuf events = STRBUF_INIT;
> +       struct strbuf metric_pmus = STRBUF_INIT;
> +       char *nlist = NULL;
>         int ret;
>
>         *out_evlist = NULL;
> @@ -1353,7 +1559,7 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
>                 ids__insert(ids->ids, tmp);
>         }
>         ret = metricgroup__build_event_string(&events, ids, modifier,
> -                                             has_constraint);
> +                                             has_constraint, pmu_name);
>         if (ret)
>                 return ret;
>
> @@ -1364,11 +1570,20 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
>         }
>         pr_debug("Parsing metric events '%s'\n", events.buf);
>         parse_events_error__init(&parse_error);
> -       ret = __parse_events(parsed_evlist, events.buf, &parse_error, fake_pmu);
> +       nlist = get_metric_pmus(events.buf, &metric_pmus);
> +       if (!nlist) {
> +               ret = -ENOMEM;
> +               goto err_out;
> +       }
> +       ret = __parse_events(parsed_evlist, nlist, &parse_error, fake_pmu);
>         if (ret) {
>                 parse_events_error__print(&parse_error, events.buf);
>                 goto err_out;
>         }
> +
> +       if (metric_pmus.alloc)
> +               remove_pmu_umatched_events(parsed_evlist, metric_pmus.buf);
> +
>         ret = decode_all_metric_ids(parsed_evlist, modifier);
>         if (ret)
>                 goto err_out;
> @@ -1376,9 +1591,12 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
>         *out_evlist = parsed_evlist;
>         parsed_evlist = NULL;
>  err_out:
> +       if (nlist)
> +               free(nlist);
>         parse_events_error__exit(&parse_error);
>         evlist__delete(parsed_evlist);
>         strbuf_release(&events);
> +       strbuf_release(&metric_pmus);
>         return ret;
>  }
>
> @@ -1397,7 +1615,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
>         if (metric_events_list->nr_entries == 0)
>                 metricgroup__rblist_init(metric_events_list);
>         ret = metricgroup__add_metric_list(str, metric_no_group,
> -                                          &metric_list, map);
> +                                          &metric_list, map,
> +                                          perf_evlist->hybrid_pmu_name);

This confuses me. perf_evlist is an out argument that we build the
list of evsels for the metrics into, yet here you are reading it for
the hybrid_pmu_name. Why is this? What values can hybrid_pmu_name be
here?

>         if (ret)
>                 goto out;
>
> @@ -1413,7 +1632,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
>                         ret = parse_ids(metric_no_merge, fake_pmu, combined,
>                                         /*modifier=*/NULL,
>                                         /*has_constraint=*/true,
> -                                       &combined_evlist);
> +                                       &combined_evlist,
> +                                       perf_evlist->hybrid_pmu_name);

I don't understand this. combined is a set of event names, it seems
now we need to combine the event name with the PMU to form an id.

>                 }
>                 if (combined)
>                         expr__ctx_free(combined);
> @@ -1450,6 +1670,9 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
>                                         continue;
>
>                                 if (expr__subset_of_ids(n->pctx, m->pctx)) {
> +                                       if (m->pmu_name && n->pmu_name
> +                                           && strcmp(m->pmu_name, n->pmu_name))
> +                                               continue;

This test is cheaper than expr__subset_of_ids and should be before it.

>                                         pr_debug("Events in '%s' fully contained within '%s'\n",
>                                                  m->metric_name, n->metric_name);
>                                         metric_evlist = n->evlist;
> @@ -1459,14 +1682,16 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
>                         }
>                 }
>                 if (!metric_evlist) {
> -                       ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier,
> -                                       m->has_constraint, &m->evlist);
> +                       ret = parse_ids(metric_no_merge, fake_pmu, m->pctx,
> +                                     m->modifier, m->has_constraint,
> +                                     &m->evlist, m->pmu_name);
>                         if (ret)
>                                 goto out;
>
>                         metric_evlist = m->evlist;
>                 }
> -               ret = setup_metric_events(m->pctx->ids, metric_evlist, &metric_events);
> +               ret = setup_metric_events(m->pctx->ids, metric_evlist,
> +                                         &metric_events, m->pmu_name);
>                 if (ret) {
>                         pr_debug("Cannot resolve IDs for %s: %s\n",
>                                 m->metric_name, m->metric_expr);
> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> index 138e3ab9d638..46b3dd134656 100644
> --- a/tools/perf/util/stat-display.c
> +++ b/tools/perf/util/stat-display.c
> @@ -539,7 +539,8 @@ static void aggr_update_shadow(struct perf_stat_config *config,
>         }
>  }
>
> -static void uniquify_event_name(struct evsel *counter)
> +static void uniquify_event_name(struct evsel *counter,
> +                               struct perf_stat_config *stat_config)
>  {
>         char *new_name;
>         char *config;
> @@ -558,7 +559,8 @@ static void uniquify_event_name(struct evsel *counter)
>                         counter->name = new_name;
>                 }
>         } else {
> -               if (perf_pmu__has_hybrid()) {
> +               if (perf_pmu__has_hybrid() &&
> +                   stat_config->metric_events.nr_entries == 0) {
>                         ret = asprintf(&new_name, "%s/%s/",
>                                        counter->pmu_name, counter->name);
>                 } else {
> @@ -619,7 +621,7 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>                 return false;
>         cb(config, counter, data, true);
>         if (config->no_merge || hybrid_uniquify(counter))
> -               uniquify_event_name(counter);
> +               uniquify_event_name(counter, config);
>         else if (counter->auto_merge_stats)
>                 collect_all_aliases(config, counter, cb, data);
>         return true;
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] perf stat: Support metrics with hybrid events
  2022-05-06  8:25 ` [PATCH 1/3] perf stat: Support metrics with hybrid events Ian Rogers
@ 2022-05-06  8:42   ` Ian Rogers
  0 siblings, 0 replies; 7+ messages in thread
From: Ian Rogers @ 2022-05-06  8:42 UTC (permalink / raw)
  To: zhengjun.xing
  Cc: acme, peterz, mingo, alexander.shishkin, jolsa, linux-kernel,
	linux-perf-users, adrian.hunter, ak, kan.liang

On Fri, May 6, 2022 at 1:25 AM Ian Rogers <irogers@google.com> wrote:
>
>  and should be beefor eit.
> On Thu, Apr 21, 2022 at 11:56 PM <zhengjun.xing@linux.intel.com> wrote:
> >
> > From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
> >
> > One metric such as 'Kernel_Utilization' may be from different PMUs and
> > consists of different events.
> >
> > For core,
> > Kernel_Utilization = cpu_clk_unhalted.thread:k / cpu_clk_unhalted.thread
> >
> > For atom,
> > Kernel_Utilization = cpu_clk_unhalted.core:k / cpu_clk_unhalted.core
> >
> > The metric group string for core is:
> > '{cpu_clk_unhalted.thread/metric-id=cpu_clk_unhalted.thread:k/k,cpu_clk_unhalted.thread/metric-id=cpu_clk_unhalted.thread/}:W'
> > It's internally expanded to:
> > '{cpu_clk_unhalted.thread_p/metric-id=cpu_clk_unhalted.thread_p:k/k,cpu_clk_unhalted.thread/metric-id=cpu_clk_unhalted.thread/}:W#cpu_core'
> >
> > The metric group string for atom is:
> > '{cpu_clk_unhalted.core/metric-id=cpu_clk_unhalted.core:k/k,cpu_clk_unhalted.core/metric-id=cpu_clk_unhalted.core/}:W'
> > It's internally expanded to:
> > '{cpu_clk_unhalted.core/metric-id=cpu_clk_unhalted.core:k/k,cpu_clk_unhalted.core/metric-id=cpu_clk_unhalted.core/}:W#cpu_atom'
> >
> > That means the group "{cpu_clk_unhalted.thread:k,cpu_clk_unhalted.thread}:W"
> > is from cpu_core PMU and the group "{cpu_clk_unhalted.core:k,cpu_clk_unhalted.core}"
> > is from cpu_atom PMU. And then next, check if the events in the group are
> > valid on that PMU. If one event is not valid on that PMU, the associated
> > group would be removed internally.
> >
> > In this example, cpu_clk_unhalted.thread is valid on cpu_core and
> > cpu_clk_unhalted.core is valid on cpu_atom. So the checks for these two
> > groups are passed.
> >
> > Before:
> >
> >   # ./perf stat -M Kernel_Utilization -a sleep 1
> > WARNING: events in group from different hybrid PMUs!
> > WARNING: grouped events cpus do not match, disabling group:
> >   anon group { CPU_CLK_UNHALTED.THREAD_P:k, CPU_CLK_UNHALTED.THREAD_P:k, CPU_CLK_UNHALTED.THREAD, CPU_CLK_UNHALTED.THREAD }
> >
> >  Performance counter stats for 'system wide':
> >
> >         17,639,501      cpu_atom/CPU_CLK_UNHALTED.CORE/ #     1.00 Kernel_Utilization
> >         17,578,757      cpu_atom/CPU_CLK_UNHALTED.CORE:k/
> >      1,005,350,226 ns   duration_time
> >         43,012,352      cpu_core/CPU_CLK_UNHALTED.THREAD_P:k/ #     0.99 Kernel_Utilization
> >         17,608,010      cpu_atom/CPU_CLK_UNHALTED.THREAD_P:k/
> >         43,608,755      cpu_core/CPU_CLK_UNHALTED.THREAD/
> >         17,630,838      cpu_atom/CPU_CLK_UNHALTED.THREAD/
> >      1,005,350,226 ns   duration_time
> >
> >        1.005350226 seconds time elapsed
> >
> > After:
> >
> >   # ./perf stat -M Kernel_Utilization -a sleep 1
> >
> >  Performance counter stats for 'system wide':
> >
> >         17,981,895      CPU_CLK_UNHALTED.CORE [cpu_atom] #     1.00 Kernel_Utilization
> >         17,925,405      CPU_CLK_UNHALTED.CORE:k [cpu_atom]
> >      1,004,811,366 ns   duration_time
> >         41,246,425      CPU_CLK_UNHALTED.THREAD_P:k [cpu_core] #     0.99 Kernel_Utilization
> >         41,819,129      CPU_CLK_UNHALTED.THREAD [cpu_core]
> >      1,004,811,366 ns   duration_time
> >
> >        1.004811366 seconds time elapsed
> >
> > Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
> > Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
>
> Sorry for the late review, this arrived April 21st and was added to
> acme/perf/core on April 22nd - I was on vacation those days. There is
> substantial complexity added here and I'm confused by the all too
> frequent is_hybrid paths. There is nothing wrong in my eyes with
> making the metric code sensitive to a PMU type, why does hybrid need
> to be special? More comments below.
>
> > ---
> >  tools/perf/util/metricgroup.c  | 263 ++++++++++++++++++++++++++++++---
> >  tools/perf/util/stat-display.c |   8 +-
> >  2 files changed, 249 insertions(+), 22 deletions(-)
> >
> > diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> > index d8492e339521..126a43a8917e 100644
> > --- a/tools/perf/util/metricgroup.c
> > +++ b/tools/perf/util/metricgroup.c
> > @@ -141,6 +141,11 @@ struct metric {
> >          * output.
> >          */
> >         const char *metric_unit;
> > +       /**
> > +        * The name of the CPU such as "cpu_core" or "cpu_atom" in hybrid systems
> > +        * and "NULL" in non-hybrid systems.
> > +        */
> > +       const char *pmu_name;
>
> Thanks for the comment. CPU and PMU don't align, but I think PMU is
> the better name. I think this is an optional PMU name used for opening
> the metric's events. For example, on hybrid it can be cpu_core or
> cpu_atom. This is confusing when we have a metric that mixes say core
> and uncore events.
>
> >         /** Optional null terminated array of referenced metrics. */
> >         struct metric_ref *metric_refs;
> >         /**
> > @@ -215,6 +220,7 @@ static struct metric *metric__new(const struct pmu_event *pe,
> >         }
> >         m->metric_expr = pe->metric_expr;
> >         m->metric_unit = pe->unit;
> > +       m->pmu_name = pe->pmu;
>
> Well this explains why we have a pmu_name. I'm not a fan of metrics
> being the same as events. Reading the PMU here feels a bit of a hack.
>
> >         m->pctx->runtime = runtime;
> >         m->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
> >         m->metric_refs = NULL;
> > @@ -250,10 +256,12 @@ static bool contains_metric_id(struct evsel **metric_events, int num_events,
> >   * @ids: the metric IDs to match.
> >   * @metric_evlist: the list of perf events.
> >   * @out_metric_events: holds the created metric events array.
> > + * @pmu_name: the name of the CPU.
>
> Could pmu_name just be an optional PMU to be used for the events? We
> don't need to be specific it is a CPU here?
>
> >   */
> >  static int setup_metric_events(struct hashmap *ids,
> >                                struct evlist *metric_evlist,
> > -                              struct evsel ***out_metric_events)
> > +                              struct evsel ***out_metric_events,
> > +                              const char *pmu_name)
> >  {
> >         struct evsel **metric_events;
> >         const char *metric_id;
> > @@ -286,6 +294,10 @@ static int setup_metric_events(struct hashmap *ids,
> >                  * about this event.
> >                  */
> >                 if (hashmap__find(ids, metric_id, (void **)&val_ptr)) {
> > +                       if (evsel__is_hybrid(ev) && pmu_name &&
>
> The evsel__is_hybrid looks unnecessary here.
>
> > +                           strcmp(pmu_name, ev->pmu_name)) {
> > +                               continue;
> > +                       }
> >                         metric_events[matched_events++] = ev;
> >
> >                         if (matched_events >= ids_size)
> > @@ -724,7 +736,8 @@ static int decode_all_metric_ids(struct evlist *perf_evlist, const char *modifie
> >  static int metricgroup__build_event_string(struct strbuf *events,
> >                                            const struct expr_parse_ctx *ctx,
> >                                            const char *modifier,
> > -                                          bool has_constraint)
> > +                                          bool has_constraint,
> > +                                          const char *pmu_name)
> >  {
> >         struct hashmap_entry *cur;
> >         size_t bkt;
> > @@ -806,12 +819,18 @@ static int metricgroup__build_event_string(struct strbuf *events,
> >                 if (no_group) {
> >                         /* Strange case of a metric of just duration_time. */
> >                         ret = strbuf_addf(events, "duration_time");
> > -               } else if (!has_constraint)
> > -                       ret = strbuf_addf(events, "}:W,duration_time");
> > -               else
> > +               } else if (!has_constraint) {
> > +                       ret = strbuf_addf(events, "}:W");
> > +                       if (pmu_name)
> > +                               ret = strbuf_addf(events, "#%s", pmu_name);
> >                         ret = strbuf_addf(events, ",duration_time");
> > -       } else if (!no_group && !has_constraint)
> > +               } else
> > +                       ret = strbuf_addf(events, ",duration_time");
> > +       } else if (!no_group && !has_constraint) {
> >                 ret = strbuf_addf(events, "}:W");
> > +               if (pmu_name)
> > +                       ret = strbuf_addf(events, "#%s", pmu_name);
> > +       }
>
> Why add the # and then expand it when you could just expand here?

Also, you are only adding the #cpu_atom or #cpu_core when events are
grouped. This means --metric-no-group or metrics with the NMI watchdog
constraint will not have a PMU name. Given this is needed in the
grouped case then the flag/constraint are probably broken.

Thanks,
Ian

>
> >         return ret;
> >  #undef RETURN_IF_NON_ZERO
> > @@ -1150,11 +1169,13 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
> >   * @metric_list: The list that the metric or metric group are added to.
> >   * @map: The map that is searched for metrics, most commonly the table for the
> >   *       architecture perf is running upon.
> > + * @pmu_name: the name of the CPU.
> >   */
> > -static int metricgroup__add_metric(const char *metric_name, const char *modifier,
> > -                                  bool metric_no_group,
> > +static int metricgroup__add_metric(const char *metric_name,
> > +                                  const char *modifier, bool metric_no_group,
>
> This is a whitespace only change.
>
> >                                    struct list_head *metric_list,
> > -                                  const struct pmu_events_map *map)
> > +                                  const struct pmu_events_map *map,
> > +                                  const char *pmu_name)
> >  {
> >         const struct pmu_event *pe;
> >         LIST_HEAD(list);
> > @@ -1167,6 +1188,8 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
> >          */
> >         map_for_each_metric(pe, i, map, metric_name) {
> >                 has_match = true;
> > +               if (pmu_name && pe->pmu && strcmp(pmu_name, pe->pmu))
> > +                       continue;
> >                 ret = add_metric(&list, pe, modifier, metric_no_group,
> >                                  /*root_metric=*/NULL,
> >                                  /*visited_metrics=*/NULL, map);
> > @@ -1215,10 +1238,12 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
> >   * @metric_list: The list that metrics are added to.
> >   * @map: The map that is searched for metrics, most commonly the table for the
> >   *       architecture perf is running upon.
> > + * @pmu_name: the name of the CPU.
> >   */
> >  static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
> >                                         struct list_head *metric_list,
> > -                                       const struct pmu_events_map *map)
> > +                                       const struct pmu_events_map *map,
> > +                                       const char *pmu_name)
> >  {
> >         char *list_itr, *list_copy, *metric_name, *modifier;
> >         int ret, count = 0;
> > @@ -1235,7 +1260,7 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
> >
> >                 ret = metricgroup__add_metric(metric_name, modifier,
> >                                               metric_no_group, metric_list,
> > -                                             map);
> > +                                             map, pmu_name);
> >                 if (ret == -EINVAL)
> >                         pr_err("Cannot find metric or group `%s'\n", metric_name);
> >
> > @@ -1310,6 +1335,183 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
> >         return ret;
> >  }
> >
> > +static char *get_metric_pmus(char *orig_str, struct strbuf *metric_pmus)
>
> Could you add an example here. A metric is coming with an associated
> PMU so what are we pulling out here? Why are the events modified? What
> is the metric_pmus out argument doing?
>
> > +{
> > +       char *llist, *nlist, *p1, *p2, *new_str = NULL;
> > +       int ret;
> > +       struct strbuf new_events;
> > +
> > +       if (!strchr(orig_str, '#')) {
> > +               /*
> > +                * pmu name is added after '#'. If no '#' found,
> > +                * don't need to process pmu.
> > +                */
> > +               return strdup(orig_str);
> > +       }
> > +
> > +       nlist = strdup(orig_str);
> > +       if (!nlist)
> > +               return new_str;
> > +
> > +       ret = strbuf_init(&new_events, 100);
> > +       if (ret)
> > +               goto err_out;
> > +
> > +       ret = strbuf_grow(metric_pmus, 100);
> > +       if (ret)
> > +               goto err_out;
> > +
> > +       llist = nlist;
> > +       while ((p1 = strsep(&llist, ",")) != NULL) {
> > +               p2 = strchr(p1, '#');
> > +               if (p2) {
> > +                       *p2 = 0;
> > +                       ret = strbuf_addf(&new_events, "%s,", p1);
> > +                       if (ret)
> > +                               goto err_out;
> > +
> > +                       ret = strbuf_addf(metric_pmus, "%s,", p2 + 1);
> > +                       if (ret)
> > +                               goto err_out;
> > +
> > +               } else {
> > +                       ret = strbuf_addf(&new_events, "%s,", p1);
> > +                       if (ret)
> > +                               goto err_out;
> > +               }
> > +       }
> > +
> > +       new_str = strdup(new_events.buf);
> > +       if (new_str) {
> > +               /* Remove last ',' */
> > +               new_str[strlen(new_str) - 1] = 0;
> > +       }
> > +err_out:
> > +       free(nlist);
> > +       strbuf_release(&new_events);
> > +       return new_str;
> > +}
> > +
> > +static void set_pmu_unmatched_events(struct evlist *evlist, int group_idx,
> > +                                    char *pmu_name,
> > +                                    unsigned long *evlist_removed)
> > +{
> > +       struct evsel *evsel, *pos;
> > +       int i = 0, j = 0;
> > +
> > +       /*
> > +        * Move to the first evsel of a given group
> > +        */
> > +       evlist__for_each_entry(evlist, evsel) {
> > +               if (evsel__is_group_leader(evsel) &&
> > +                   evsel->core.nr_members >= 1) {
> > +                       if (i < group_idx) {
> > +                               j += evsel->core.nr_members;
> > +                               i++;
> > +                               continue;
> > +                       }
> > +               }
> > +       }
> > +
> > +       i = 0;
> > +       evlist__for_each_entry(evlist, evsel) {
> > +               if (i < j) {
> > +                       i++;
> > +                       continue;
> > +               }
> > +
> > +               /*
> > +                * Now we are at the first evsel in the group
> > +                */
> > +               for_each_group_evsel(pos, evsel) {
> > +                       if (evsel__is_hybrid(pos) &&
> > +                           strcmp(pos->pmu_name, pmu_name)) {
> > +                               set_bit(pos->core.idx, evlist_removed);
> > +                       }
> > +               }
> > +               break;
> > +       }
> > +}
> > +
> > +static void remove_pmu_umatched_events(struct evlist *evlist, char *metric_pmus)
>
> Is it possible here that we have an evlist being used by >1 metric.
> We've decided one of the metrics is invalid and set about removing the
> events thereby breaking the other metric? In general this code treads
> lightly around the evlist as previously it has been the source of
> bugs:
> https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git/commit/tools/perf/util/metricgroup.c?h=perf/core&id=5ecd5a0c7d1cca79f1431093d12e4cd9893b0331
>
> > +{
> > +       struct evsel *evsel, *tmp, *new_leader = NULL;
> > +       unsigned long *evlist_removed;
> > +       char *llist, *nlist, *p1;
>
> Is there a better name than nlist and llist? Could you explain what they mean?
>
> > +       bool need_new_leader = false;
> > +       int i = 0, new_nr_members = 0;
> > +
> > +       nlist = strdup(metric_pmus);
> > +       if (!nlist)
> > +               return;
> > +
> > +       evlist_removed = bitmap_zalloc(evlist->core.nr_entries);
> > +       if (!evlist_removed) {
> > +               free(nlist);
> > +               return;
> > +       }
> > +
> > +       llist = nlist;
> > +       while ((p1 = strsep(&llist, ",")) != NULL) {
> > +               if (strlen(p1) > 0) {
> > +                       /*
> > +                        * p1 points to the string of pmu name, e.g. "cpu_atom".
> > +                        * The metric group string has pmu suffixes, e.g.
> > +                        * "{inst_retired.any,cpu_clk_unhalted.thread}:W#cpu_core,
> > +                        *  {cpu_clk_unhalted.core,inst_retired.any_p}:W#cpu_atom"
> > +                        * By counting the pmu name, we can know the index of
> > +                        * group.
> > +                        */
> > +                       set_pmu_unmatched_events(evlist, i++, p1,
> > +                                                evlist_removed);
> > +               }
> > +       }
> > +
> > +       evlist__for_each_entry_safe(evlist, tmp, evsel) {
> > +               if (test_bit(evsel->core.idx, evlist_removed)) {
> > +                       if (!evsel__is_group_leader(evsel)) {
> > +                               if (!need_new_leader) {
> > +                                       if (new_leader)
> > +                                               new_leader->core.leader->nr_members--;
> > +                                       else
> > +                                               evsel->core.leader->nr_members--;
> > +                               } else
> > +                                       new_nr_members--;
> > +                       } else {
> > +                               /*
> > +                                * If group leader is to remove, we need to
> > +                                * prepare a new leader and adjust all group
> > +                                * members.
> > +                                */
> > +                               need_new_leader = true;
> > +                               new_nr_members =
> > +                                   evsel->core.leader->nr_members - 1;
> > +                       }
> > +
> > +                       evlist__remove(evlist, evsel);
> > +                       evsel__delete(evsel);
> > +               } else {
> > +                       if (!evsel__is_group_leader(evsel)) {
> > +                               if (need_new_leader) {
> > +                                       need_new_leader = false;
> > +                                       new_leader = evsel;
> > +                                       new_leader->core.leader =
> > +                                           &new_leader->core;
> > +                                       new_leader->core.nr_members =
> > +                                           new_nr_members;
> > +                               } else if (new_leader)
> > +                                       evsel->core.leader = &new_leader->core;
> > +                       } else {
> > +                               need_new_leader = false;
> > +                               new_leader = NULL;
> > +                       }
> > +               }
> > +       }
> > +
> > +       bitmap_free(evlist_removed);
> > +       free(nlist);
> > +}
> > +
> >  /**
> >   * parse_ids - Build the event string for the ids and parse them creating an
> >   *             evlist. The encoded metric_ids are decoded.
> > @@ -1319,14 +1521,18 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
> >   * @modifier: any modifiers added to the events.
> >   * @has_constraint: false if events should be placed in a weak group.
> >   * @out_evlist: the created list of events.
> > + * @pmu_name: the name of the CPU.
> >   */
> >  static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
> >                      struct expr_parse_ctx *ids, const char *modifier,
> > -                    bool has_constraint, struct evlist **out_evlist)
> > +                    bool has_constraint, struct evlist **out_evlist,
> > +                    const char *pmu_name)
> >  {
> >         struct parse_events_error parse_error;
> >         struct evlist *parsed_evlist;
> >         struct strbuf events = STRBUF_INIT;
> > +       struct strbuf metric_pmus = STRBUF_INIT;
> > +       char *nlist = NULL;
> >         int ret;
> >
> >         *out_evlist = NULL;
> > @@ -1353,7 +1559,7 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
> >                 ids__insert(ids->ids, tmp);
> >         }
> >         ret = metricgroup__build_event_string(&events, ids, modifier,
> > -                                             has_constraint);
> > +                                             has_constraint, pmu_name);
> >         if (ret)
> >                 return ret;
> >
> > @@ -1364,11 +1570,20 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
> >         }
> >         pr_debug("Parsing metric events '%s'\n", events.buf);
> >         parse_events_error__init(&parse_error);
> > -       ret = __parse_events(parsed_evlist, events.buf, &parse_error, fake_pmu);
> > +       nlist = get_metric_pmus(events.buf, &metric_pmus);
> > +       if (!nlist) {
> > +               ret = -ENOMEM;
> > +               goto err_out;
> > +       }
> > +       ret = __parse_events(parsed_evlist, nlist, &parse_error, fake_pmu);
> >         if (ret) {
> >                 parse_events_error__print(&parse_error, events.buf);
> >                 goto err_out;
> >         }
> > +
> > +       if (metric_pmus.alloc)
> > +               remove_pmu_umatched_events(parsed_evlist, metric_pmus.buf);
> > +
> >         ret = decode_all_metric_ids(parsed_evlist, modifier);
> >         if (ret)
> >                 goto err_out;
> > @@ -1376,9 +1591,12 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
> >         *out_evlist = parsed_evlist;
> >         parsed_evlist = NULL;
> >  err_out:
> > +       if (nlist)
> > +               free(nlist);
> >         parse_events_error__exit(&parse_error);
> >         evlist__delete(parsed_evlist);
> >         strbuf_release(&events);
> > +       strbuf_release(&metric_pmus);
> >         return ret;
> >  }
> >
> > @@ -1397,7 +1615,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
> >         if (metric_events_list->nr_entries == 0)
> >                 metricgroup__rblist_init(metric_events_list);
> >         ret = metricgroup__add_metric_list(str, metric_no_group,
> > -                                          &metric_list, map);
> > +                                          &metric_list, map,
> > +                                          perf_evlist->hybrid_pmu_name);
>
> This confuses me. perf_evlist is an out argument that we build the
> list of evsels for the metrics into, yet here you are reading it for
> the hybrid_pmu_name. Why is this? What values can hybrid_pmu_name be
> here?
>
> >         if (ret)
> >                 goto out;
> >
> > @@ -1413,7 +1632,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
> >                         ret = parse_ids(metric_no_merge, fake_pmu, combined,
> >                                         /*modifier=*/NULL,
> >                                         /*has_constraint=*/true,
> > -                                       &combined_evlist);
> > +                                       &combined_evlist,
> > +                                       perf_evlist->hybrid_pmu_name);
>
> I don't understand this. combined is a set of event names, it seems
> now we need to combine the event name with the PMU to form an id.
>
> >                 }
> >                 if (combined)
> >                         expr__ctx_free(combined);
> > @@ -1450,6 +1670,9 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
> >                                         continue;
> >
> >                                 if (expr__subset_of_ids(n->pctx, m->pctx)) {
> > +                                       if (m->pmu_name && n->pmu_name
> > +                                           && strcmp(m->pmu_name, n->pmu_name))
> > +                                               continue;
>
> This test is cheaper than expr__subset_of_ids and should be before it.
>
> >                                         pr_debug("Events in '%s' fully contained within '%s'\n",
> >                                                  m->metric_name, n->metric_name);
> >                                         metric_evlist = n->evlist;
> > @@ -1459,14 +1682,16 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
> >                         }
> >                 }
> >                 if (!metric_evlist) {
> > -                       ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier,
> > -                                       m->has_constraint, &m->evlist);
> > +                       ret = parse_ids(metric_no_merge, fake_pmu, m->pctx,
> > +                                     m->modifier, m->has_constraint,
> > +                                     &m->evlist, m->pmu_name);
> >                         if (ret)
> >                                 goto out;
> >
> >                         metric_evlist = m->evlist;
> >                 }
> > -               ret = setup_metric_events(m->pctx->ids, metric_evlist, &metric_events);
> > +               ret = setup_metric_events(m->pctx->ids, metric_evlist,
> > +                                         &metric_events, m->pmu_name);
> >                 if (ret) {
> >                         pr_debug("Cannot resolve IDs for %s: %s\n",
> >                                 m->metric_name, m->metric_expr);
> > diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> > index 138e3ab9d638..46b3dd134656 100644
> > --- a/tools/perf/util/stat-display.c
> > +++ b/tools/perf/util/stat-display.c
> > @@ -539,7 +539,8 @@ static void aggr_update_shadow(struct perf_stat_config *config,
> >         }
> >  }
> >
> > -static void uniquify_event_name(struct evsel *counter)
> > +static void uniquify_event_name(struct evsel *counter,
> > +                               struct perf_stat_config *stat_config)
> >  {
> >         char *new_name;
> >         char *config;
> > @@ -558,7 +559,8 @@ static void uniquify_event_name(struct evsel *counter)
> >                         counter->name = new_name;
> >                 }
> >         } else {
> > -               if (perf_pmu__has_hybrid()) {
> > +               if (perf_pmu__has_hybrid() &&
> > +                   stat_config->metric_events.nr_entries == 0) {
> >                         ret = asprintf(&new_name, "%s/%s/",
> >                                        counter->pmu_name, counter->name);
> >                 } else {
> > @@ -619,7 +621,7 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
> >                 return false;
> >         cb(config, counter, data, true);
> >         if (config->no_merge || hybrid_uniquify(counter))
> > -               uniquify_event_name(counter);
> > +               uniquify_event_name(counter, config);
> >         else if (counter->auto_merge_stats)
> >                 collect_all_aliases(config, counter, cb, data);
> >         return true;
> > --
> > 2.25.1
> >

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] perf stat: Merge event counts from all hybrid PMUs
  2022-04-22  6:56 ` [PATCH 2/3] perf stat: Merge event counts from all hybrid PMUs zhengjun.xing
@ 2022-05-07  4:03   ` Ian Rogers
  2022-05-07  5:09     ` Xing Zhengjun
  0 siblings, 1 reply; 7+ messages in thread
From: Ian Rogers @ 2022-05-07  4:03 UTC (permalink / raw)
  To: zhengjun.xing
  Cc: acme, peterz, mingo, alexander.shishkin, jolsa, linux-kernel,
	linux-perf-users, adrian.hunter, ak, kan.liang

On Thu, Apr 21, 2022 at 11:57 PM <zhengjun.xing@linux.intel.com> wrote:
>
> From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
>
> For hybrid events, by default stat aggregates and reports the event counts
> per pmu.
>
>   # ./perf stat -e cycles -a  sleep 1
>
>    Performance counter stats for 'system wide':
>
>       14,066,877,268      cpu_core/cycles/
>        6,814,443,147      cpu_atom/cycles/
>
>          1.002760625 seconds time elapsed
>
> Sometimes, it's also useful to aggregate event counts from all PMUs.
> Create a new option '--hybrid-merge' to enable that behavior and report
> the counts without PMUs.
>
>   # ./perf stat -e cycles -a --hybrid-merge  sleep 1
>
>    Performance counter stats for 'system wide':
>
>       20,732,982,512      cycles
>
>          1.002776793 seconds time elapsed
>
> Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
> Reviewed-by: Kan Liang <kan.liang@linux.intel.com>

This feels related to aggregation, but aggregation is for a single
evsel on a single PMU. What happens if you have both instructions and
cycles with --hybrid-merge? Normally we aggregate all counts for each
CPU into a the two evsels and then compute a metric:
```
$ perf stat -e instructions,cycles /bin/true

 Performance counter stats for '/bin/true':

         1,830,554      instructions              #    1.17  insn per
cycle
         1,561,415      cycles
```
This kind of aggregation behavior may be needed more widely for metrics.

Thanks,
Ian

> ---
>  tools/perf/Documentation/perf-stat.txt | 10 ++++++++++
>  tools/perf/builtin-stat.c              |  2 ++
>  tools/perf/util/stat-display.c         | 17 +++++++++++++++--
>  tools/perf/util/stat.h                 |  1 +
>  4 files changed, 28 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
> index c06c341e72b9..8d1cde00b8d6 100644
> --- a/tools/perf/Documentation/perf-stat.txt
> +++ b/tools/perf/Documentation/perf-stat.txt
> @@ -454,6 +454,16 @@ Multiple events are created from a single event specification when:
>  2. Aliases, which are listed immediately after the Kernel PMU events
>     by perf list, are used.
>
> +--hybrid-merge::
> +Merge the hybrid event counts from all PMUs.
> +
> +For hybrid events, by default, the stat aggregates and reports the event
> +counts per PMU. But sometimes, it's also useful to aggregate event counts
> +from all PMUs. This option enables that behavior and reports the counts
> +without PMUs.
> +
> +For non-hybrid events, it should be no effect.
> +
>  --smi-cost::
>  Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index a96f106dc93a..ea88ac5bed2d 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -1235,6 +1235,8 @@ static struct option stat_options[] = {
>         OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
>                     "disable CPU count aggregation", AGGR_NONE),
>         OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
> +       OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
> +                   "Merge identical named hybrid events"),
>         OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
>                    "print counts with custom separator"),
>         OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> index 46b3dd134656..d9629a83aa78 100644
> --- a/tools/perf/util/stat-display.c
> +++ b/tools/perf/util/stat-display.c
> @@ -612,6 +612,19 @@ static bool hybrid_uniquify(struct evsel *evsel)
>         return perf_pmu__has_hybrid() && !is_uncore(evsel);
>  }
>
> +static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config,
> +                        bool check)
> +{
> +       if (hybrid_uniquify(counter)) {
> +               if (check)
> +                       return config && config->hybrid_merge;
> +               else
> +                       return config && !config->hybrid_merge;
> +       }
> +
> +       return false;
> +}
> +
>  static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>                             void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
>                                        bool first),
> @@ -620,9 +633,9 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>         if (counter->merged_stat)
>                 return false;
>         cb(config, counter, data, true);
> -       if (config->no_merge || hybrid_uniquify(counter))
> +       if (config->no_merge || hybrid_merge(counter, config, false))
>                 uniquify_event_name(counter, config);
> -       else if (counter->auto_merge_stats)
> +       else if (counter->auto_merge_stats || hybrid_merge(counter, config, true))
>                 collect_all_aliases(config, counter, cb, data);
>         return true;
>  }
> diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
> index 335d19cc3063..91d989dfeca4 100644
> --- a/tools/perf/util/stat.h
> +++ b/tools/perf/util/stat.h
> @@ -122,6 +122,7 @@ struct perf_stat_config {
>         bool                     ru_display;
>         bool                     big_num;
>         bool                     no_merge;
> +       bool                     hybrid_merge;
>         bool                     walltime_run_table;
>         bool                     all_kernel;
>         bool                     all_user;
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] perf stat: Merge event counts from all hybrid PMUs
  2022-05-07  4:03   ` Ian Rogers
@ 2022-05-07  5:09     ` Xing Zhengjun
  0 siblings, 0 replies; 7+ messages in thread
From: Xing Zhengjun @ 2022-05-07  5:09 UTC (permalink / raw)
  To: Ian Rogers
  Cc: acme, peterz, mingo, alexander.shishkin, jolsa, linux-kernel,
	linux-perf-users, adrian.hunter, ak, kan.liang



On 5/7/2022 12:03 PM, Ian Rogers wrote:
> On Thu, Apr 21, 2022 at 11:57 PM <zhengjun.xing@linux.intel.com> wrote:
>>
>> From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
>>
>> For hybrid events, by default stat aggregates and reports the event counts
>> per pmu.
>>
>>    # ./perf stat -e cycles -a  sleep 1
>>
>>     Performance counter stats for 'system wide':
>>
>>        14,066,877,268      cpu_core/cycles/
>>         6,814,443,147      cpu_atom/cycles/
>>
>>           1.002760625 seconds time elapsed
>>
>> Sometimes, it's also useful to aggregate event counts from all PMUs.
>> Create a new option '--hybrid-merge' to enable that behavior and report
>> the counts without PMUs.
>>
>>    # ./perf stat -e cycles -a --hybrid-merge  sleep 1
>>
>>     Performance counter stats for 'system wide':
>>
>>        20,732,982,512      cycles
>>
>>           1.002776793 seconds time elapsed
>>
>> Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
>> Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
> 
> This feels related to aggregation, but aggregation is for a single
> evsel on a single PMU. What happens if you have both instructions and
> cycles with --hybrid-merge? Normally we aggregate all counts for each
> CPU into a the two evsels and then compute a metric:
> ```
# ./perf stat -e instructions,cycles  -a /bin/true

  Performance counter stats for 'system wide':

          2,416,092      cpu_core/instructions/
            305,840      cpu_atom/instructions/
          2,645,138      cpu_core/cycles/
            789,631      cpu_atom/cycles/

        0.002345159 seconds time elapsed

# ./perf stat -e instructions,cycles  -a --hybrid-merge /bin/true

  Performance counter stats for 'system wide':

          2,702,612      instructions
          3,607,773      cycles

        0.002475749 seconds time elapsed

Currently, no metrics showed for the hybrid systems.

> $ perf stat -e instructions,cycles /bin/true
> 
>   Performance counter stats for '/bin/true':
> 
>           1,830,554      instructions              #    1.17  insn per
> cycle
>           1,561,415      cycles
> ```
> This kind of aggregation behavior may be needed more widely for metrics.
> 
> Thanks,
> Ian
> 
>> ---
>>   tools/perf/Documentation/perf-stat.txt | 10 ++++++++++
>>   tools/perf/builtin-stat.c              |  2 ++
>>   tools/perf/util/stat-display.c         | 17 +++++++++++++++--
>>   tools/perf/util/stat.h                 |  1 +
>>   4 files changed, 28 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
>> index c06c341e72b9..8d1cde00b8d6 100644
>> --- a/tools/perf/Documentation/perf-stat.txt
>> +++ b/tools/perf/Documentation/perf-stat.txt
>> @@ -454,6 +454,16 @@ Multiple events are created from a single event specification when:
>>   2. Aliases, which are listed immediately after the Kernel PMU events
>>      by perf list, are used.
>>
>> +--hybrid-merge::
>> +Merge the hybrid event counts from all PMUs.
>> +
>> +For hybrid events, by default, the stat aggregates and reports the event
>> +counts per PMU. But sometimes, it's also useful to aggregate event counts
>> +from all PMUs. This option enables that behavior and reports the counts
>> +without PMUs.
>> +
>> +For non-hybrid events, it should be no effect.
>> +
>>   --smi-cost::
>>   Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
>>
>> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
>> index a96f106dc93a..ea88ac5bed2d 100644
>> --- a/tools/perf/builtin-stat.c
>> +++ b/tools/perf/builtin-stat.c
>> @@ -1235,6 +1235,8 @@ static struct option stat_options[] = {
>>          OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
>>                      "disable CPU count aggregation", AGGR_NONE),
>>          OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
>> +       OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
>> +                   "Merge identical named hybrid events"),
>>          OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
>>                     "print counts with custom separator"),
>>          OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
>> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
>> index 46b3dd134656..d9629a83aa78 100644
>> --- a/tools/perf/util/stat-display.c
>> +++ b/tools/perf/util/stat-display.c
>> @@ -612,6 +612,19 @@ static bool hybrid_uniquify(struct evsel *evsel)
>>          return perf_pmu__has_hybrid() && !is_uncore(evsel);
>>   }
>>
>> +static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config,
>> +                        bool check)
>> +{
>> +       if (hybrid_uniquify(counter)) {
>> +               if (check)
>> +                       return config && config->hybrid_merge;
>> +               else
>> +                       return config && !config->hybrid_merge;
>> +       }
>> +
>> +       return false;
>> +}
>> +
>>   static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>>                              void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
>>                                         bool first),
>> @@ -620,9 +633,9 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>>          if (counter->merged_stat)
>>                  return false;
>>          cb(config, counter, data, true);
>> -       if (config->no_merge || hybrid_uniquify(counter))
>> +       if (config->no_merge || hybrid_merge(counter, config, false))
>>                  uniquify_event_name(counter, config);
>> -       else if (counter->auto_merge_stats)
>> +       else if (counter->auto_merge_stats || hybrid_merge(counter, config, true))
>>                  collect_all_aliases(config, counter, cb, data);
>>          return true;
>>   }
>> diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
>> index 335d19cc3063..91d989dfeca4 100644
>> --- a/tools/perf/util/stat.h
>> +++ b/tools/perf/util/stat.h
>> @@ -122,6 +122,7 @@ struct perf_stat_config {
>>          bool                     ru_display;
>>          bool                     big_num;
>>          bool                     no_merge;
>> +       bool                     hybrid_merge;
>>          bool                     walltime_run_table;
>>          bool                     all_kernel;
>>          bool                     all_user;
>> --
>> 2.25.1
>>

-- 
Zhengjun Xing

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-05-07  5:09 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-22  6:56 [PATCH 1/3] perf stat: Support metrics with hybrid events zhengjun.xing
2022-04-22  6:56 ` [PATCH 2/3] perf stat: Merge event counts from all hybrid PMUs zhengjun.xing
2022-05-07  4:03   ` Ian Rogers
2022-05-07  5:09     ` Xing Zhengjun
2022-04-22  6:56 ` [PATCH 3/3] perf stat: Support hybrid --topdown option zhengjun.xing
2022-05-06  8:25 ` [PATCH 1/3] perf stat: Support metrics with hybrid events Ian Rogers
2022-05-06  8:42   ` Ian Rogers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).