linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1 1/3] perf stat: Pass fewer metric arguments
@ 2024-02-02  2:25 Ian Rogers
  2024-02-02  2:25 ` [PATCH v1 2/3] perf metrics: Compute unmerged uncore metrics individually Ian Rogers
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Ian Rogers @ 2024-02-02  2:25 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Kan Liang, Kajol Jain, John Garry,
	Kaige Ye, K Prateek Nayak, linux-perf-users, linux-kernel
  Cc: Stephane Eranian

Pass metric_expr and evsel rather than specific variables from the
struct, thereby reducing the number of arguments. This will enable
later fixes.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/stat-shadow.c | 75 +++++++++++++++--------------------
 1 file changed, 33 insertions(+), 42 deletions(-)

diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index e31426167852..f6c9d2f98835 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -355,23 +355,22 @@ static void print_nsecs(struct perf_stat_config *config,
 		print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
 }
 
-static int prepare_metric(struct evsel **metric_events,
-			  struct metric_ref *metric_refs,
+static int prepare_metric(const struct metric_expr *mexp,
 			  struct expr_parse_ctx *pctx,
 			  int aggr_idx)
 {
 	int i;
 
-	for (i = 0; metric_events[i]; i++) {
+	for (i = 0; mexp->metric_events[i]; i++) {
 		char *n;
 		double val;
 		int source_count = 0;
 
-		if (evsel__is_tool(metric_events[i])) {
+		if (evsel__is_tool(mexp->metric_events[i])) {
 			struct stats *stats;
 			double scale;
 
-			switch (metric_events[i]->tool_event) {
+			switch (mexp->metric_events[i]->tool_event) {
 			case PERF_TOOL_DURATION_TIME:
 				stats = &walltime_nsecs_stats;
 				scale = 1e-9;
@@ -391,19 +390,20 @@ static int prepare_metric(struct evsel **metric_events,
 				pr_err("Invalid tool event 'max'");
 				abort();
 			default:
-				pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
+				pr_err("Unknown tool event '%s'",
+				       evsel__name(mexp->metric_events[i]));
 				abort();
 			}
 			val = avg_stats(stats) * scale;
 			source_count = 1;
 		} else {
-			struct perf_stat_evsel *ps = metric_events[i]->stats;
+			struct perf_stat_evsel *ps = mexp->metric_events[i]->stats;
 			struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
 
 			if (!aggr)
 				break;
 
-                        if (!metric_events[i]->supported) {
+			if (!mexp->metric_events[i]->supported) {
 				/*
 				 * Not supported events will have a count of 0,
 				 * which can be confusing in a
@@ -419,19 +419,19 @@ static int prepare_metric(struct evsel **metric_events,
 				 * reverse the scale before computing the
 				 * metric.
 				 */
-				val = aggr->counts.val * (1.0 / metric_events[i]->scale);
-				source_count = evsel__source_count(metric_events[i]);
+				val = aggr->counts.val * (1.0 / mexp->metric_events[i]->scale);
+				source_count = evsel__source_count(mexp->metric_events[i]);
 			}
 		}
-		n = strdup(evsel__metric_id(metric_events[i]));
+		n = strdup(evsel__metric_id(mexp->metric_events[i]));
 		if (!n)
 			return -ENOMEM;
 
 		expr__add_id_val_source_count(pctx, n, val, source_count);
 	}
 
-	for (int j = 0; metric_refs && metric_refs[j].metric_name; j++) {
-		int ret = expr__add_ref(pctx, &metric_refs[j]);
+	for (int j = 0; mexp->metric_refs && mexp->metric_refs[j].metric_name; j++) {
+		int ret = expr__add_ref(pctx, &mexp->metric_refs[j]);
 
 		if (ret)
 			return ret;
@@ -441,14 +441,8 @@ static int prepare_metric(struct evsel **metric_events,
 }
 
 static void generic_metric(struct perf_stat_config *config,
-			   const char *metric_expr,
-			   const char *metric_threshold,
-			   struct evsel **metric_events,
-			   struct metric_ref *metric_refs,
-			   char *name,
-			   const char *metric_name,
-			   const char *metric_unit,
-			   int runtime,
+			struct metric_expr *mexp,
+			struct evsel *evsel,
 			   int aggr_idx,
 			   struct perf_stat_output_ctx *out)
 {
@@ -465,55 +459,55 @@ static void generic_metric(struct perf_stat_config *config,
 
 	if (config->user_requested_cpu_list)
 		pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
-	pctx->sctx.runtime = runtime;
+	pctx->sctx.runtime = mexp->runtime;
 	pctx->sctx.system_wide = config->system_wide;
-	i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx);
+	i = prepare_metric(mexp, pctx, aggr_idx);
 	if (i < 0) {
 		expr__ctx_free(pctx);
 		return;
 	}
-	if (!metric_events[i]) {
-		if (expr__parse(&ratio, pctx, metric_expr) == 0) {
+	if (!mexp->metric_events[i]) {
+		if (expr__parse(&ratio, pctx, mexp->metric_expr) == 0) {
 			char *unit;
 			char metric_bf[64];
 
-			if (metric_threshold &&
-			    expr__parse(&threshold, pctx, metric_threshold) == 0 &&
+			if (mexp->metric_threshold &&
+			    expr__parse(&threshold, pctx, mexp->metric_threshold) == 0 &&
 			    !isnan(threshold)) {
 				color = fpclassify(threshold) == FP_ZERO
 					? PERF_COLOR_GREEN : PERF_COLOR_RED;
 			}
 
-			if (metric_unit && metric_name) {
-				if (perf_pmu__convert_scale(metric_unit,
+			if (mexp->metric_unit && mexp->metric_name) {
+				if (perf_pmu__convert_scale(mexp->metric_unit,
 					&unit, &scale) >= 0) {
 					ratio *= scale;
 				}
-				if (strstr(metric_expr, "?"))
+				if (strstr(mexp->metric_expr, "?"))
 					scnprintf(metric_bf, sizeof(metric_bf),
-					  "%s  %s_%d", unit, metric_name, runtime);
+					  "%s  %s_%d", unit, mexp->metric_name, mexp->runtime);
 				else
 					scnprintf(metric_bf, sizeof(metric_bf),
-					  "%s  %s", unit, metric_name);
+					  "%s  %s", unit, mexp->metric_name);
 
 				print_metric(config, ctxp, color, "%8.1f",
 					     metric_bf, ratio);
 			} else {
 				print_metric(config, ctxp, color, "%8.2f",
-					metric_name ?
-					metric_name :
-					out->force_header ?  name : "",
+					mexp->metric_name ?
+					mexp->metric_name :
+					out->force_header ?  evsel->name : "",
 					ratio);
 			}
 		} else {
 			print_metric(config, ctxp, color, /*unit=*/NULL,
 				     out->force_header ?
-				     (metric_name ? metric_name : name) : "", 0);
+				     (mexp->metric_name ?: evsel->name) : "", 0);
 		}
 	} else {
 		print_metric(config, ctxp, color, /*unit=*/NULL,
 			     out->force_header ?
-			     (metric_name ? metric_name : name) : "", 0);
+			     (mexp->metric_name ?: evsel->name) : "", 0);
 	}
 
 	expr__ctx_free(pctx);
@@ -528,7 +522,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
 	if (!pctx)
 		return NAN;
 
-	if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0)
+	if (prepare_metric(mexp, pctx, aggr_idx) < 0)
 		goto out;
 
 	if (expr__parse(&ratio, pctx, mexp->metric_expr))
@@ -630,10 +624,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
 
 		if ((*num)++ > 0)
 			out->new_line(config, ctxp);
-		generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
-			       mexp->metric_events, mexp->metric_refs, evsel->name,
-			       mexp->metric_name, mexp->metric_unit, mexp->runtime,
-			       aggr_idx, out);
+		generic_metric(config, mexp, evsel, aggr_idx, out);
 	}
 
 	return NULL;
-- 
2.43.0.594.gd9cf4e227d-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v1 2/3] perf metrics: Compute unmerged uncore metrics individually
  2024-02-02  2:25 [PATCH v1 1/3] perf stat: Pass fewer metric arguments Ian Rogers
@ 2024-02-02  2:25 ` Ian Rogers
  2024-02-06  2:02   ` Namhyung Kim
  2024-02-02  2:25 ` [PATCH v1 3/3] perf stat: Fix metric-only aggregation index Ian Rogers
  2024-02-06  1:59 ` [PATCH v1 1/3] perf stat: Pass fewer metric arguments Namhyung Kim
  2 siblings, 1 reply; 7+ messages in thread
From: Ian Rogers @ 2024-02-02  2:25 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Kan Liang, Kajol Jain, John Garry,
	Kaige Ye, K Prateek Nayak, linux-perf-users, linux-kernel
  Cc: Stephane Eranian

When merging counts from multiple uncore PMUs the metric is only
computed for the metric leader. When merging/aggregation is disabled,
prior to this patch just the leader's metric would be computed. Fix
this by computing the metric for each PMU.

On a SkylakeX:
Before:
```
$ perf stat -A -M memory_bandwidth_total -a sleep 1

 Performance counter stats for 'system wide':

CPU0               82,217      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      9.2 MB/s  memory_bandwidth_total
CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      0.0 MB/s  memory_bandwidth_total
CPU0               61,395      UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_1]
CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_1]
CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU0               81,570      UNC_M_CAS_COUNT.RD [uncore_imc_2]
CPU18             113,886      UNC_M_CAS_COUNT.RD [uncore_imc_2]
CPU0               62,330      UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU18              66,942      UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU0               75,489      UNC_M_CAS_COUNT.RD [uncore_imc_3]
CPU18              27,958      UNC_M_CAS_COUNT.RD [uncore_imc_3]
CPU0               55,864      UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU18              38,727      UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_4]
CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_4]
CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU0               75,423      UNC_M_CAS_COUNT.RD [uncore_imc_5]
CPU18             104,527      UNC_M_CAS_COUNT.RD [uncore_imc_5]
CPU0               57,596      UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU18              56,777      UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU0        1,003,440,851 ns   duration_time

       1.003440851 seconds time elapsed
```

After:
```
$ perf stat -A -M memory_bandwidth_total -a sleep 1

 Performance counter stats for 'system wide':

CPU0               88,968      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      9.5 MB/s  memory_bandwidth_total
CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      0.0 MB/s  memory_bandwidth_total
CPU0               59,498      UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_0]
CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_1] #      0.0 MB/s  memory_bandwidth_total
CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_1] #      0.0 MB/s  memory_bandwidth_total
CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
CPU0               88,635      UNC_M_CAS_COUNT.RD [uncore_imc_2] #      9.5 MB/s  memory_bandwidth_total
CPU18             117,975      UNC_M_CAS_COUNT.RD [uncore_imc_2] #     11.5 MB/s  memory_bandwidth_total
CPU0               60,829      UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU18              62,105      UNC_M_CAS_COUNT.WR [uncore_imc_2]
CPU0               82,238      UNC_M_CAS_COUNT.RD [uncore_imc_3] #      8.7 MB/s  memory_bandwidth_total
CPU18              22,906      UNC_M_CAS_COUNT.RD [uncore_imc_3] #      3.6 MB/s  memory_bandwidth_total
CPU0               53,959      UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU18              32,990      UNC_M_CAS_COUNT.WR [uncore_imc_3]
CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_4] #      0.0 MB/s  memory_bandwidth_total
CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_4] #      0.0 MB/s  memory_bandwidth_total
CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
CPU0               83,595      UNC_M_CAS_COUNT.RD [uncore_imc_5] #      8.9 MB/s  memory_bandwidth_total
CPU18             110,151      UNC_M_CAS_COUNT.RD [uncore_imc_5] #     10.5 MB/s  memory_bandwidth_total
CPU0               56,540      UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU18              53,816      UNC_M_CAS_COUNT.WR [uncore_imc_5]
CPU0        1,003,353,416 ns   duration_time
```

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/metricgroup.c |  2 ++
 tools/perf/util/stat-shadow.c | 31 +++++++++++++++++++++++++++----
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index ca3e0404f187..c33ffee837ca 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -44,6 +44,8 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
 	if (!metric_events)
 		return NULL;
 
+	if (evsel->metric_leader)
+		me.evsel = evsel->metric_leader;
 	nd = rblist__find(metric_events, &me);
 	if (nd)
 		return container_of(nd, struct metric_event, nd);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index f6c9d2f98835..1be23b0eee2f 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -356,6 +356,7 @@ static void print_nsecs(struct perf_stat_config *config,
 }
 
 static int prepare_metric(const struct metric_expr *mexp,
+			  const struct evsel *evsel,
 			  struct expr_parse_ctx *pctx,
 			  int aggr_idx)
 {
@@ -398,8 +399,29 @@ static int prepare_metric(const struct metric_expr *mexp,
 			source_count = 1;
 		} else {
 			struct perf_stat_evsel *ps = mexp->metric_events[i]->stats;
-			struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
+			struct perf_stat_aggr *aggr;
 
+			/*
+			 * If there are multiple uncore PMUs and we're not
+			 * reading the leader's stats, determine the stats for
+			 * the appropriate uncore PMU.
+			 */
+			if (evsel && evsel->metric_leader &&
+			    evsel->pmu != evsel->metric_leader->pmu &&
+			    mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) {
+				struct evsel *pos;
+
+				evlist__for_each_entry(evsel->evlist, pos) {
+					if (pos->pmu != evsel->pmu)
+						continue;
+					if (pos->metric_leader != mexp->metric_events[i])
+						continue;
+					ps = pos->stats;
+					source_count = 1;
+					break;
+				}
+			}
+			aggr = &ps->aggr[aggr_idx];
 			if (!aggr)
 				break;
 
@@ -420,7 +442,8 @@ static int prepare_metric(const struct metric_expr *mexp,
 				 * metric.
 				 */
 				val = aggr->counts.val * (1.0 / mexp->metric_events[i]->scale);
-				source_count = evsel__source_count(mexp->metric_events[i]);
+				if (!source_count)
+					source_count = evsel__source_count(mexp->metric_events[i]);
 			}
 		}
 		n = strdup(evsel__metric_id(mexp->metric_events[i]));
@@ -461,7 +484,7 @@ static void generic_metric(struct perf_stat_config *config,
 		pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
 	pctx->sctx.runtime = mexp->runtime;
 	pctx->sctx.system_wide = config->system_wide;
-	i = prepare_metric(mexp, pctx, aggr_idx);
+	i = prepare_metric(mexp, evsel, pctx, aggr_idx);
 	if (i < 0) {
 		expr__ctx_free(pctx);
 		return;
@@ -522,7 +545,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
 	if (!pctx)
 		return NAN;
 
-	if (prepare_metric(mexp, pctx, aggr_idx) < 0)
+	if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0)
 		goto out;
 
 	if (expr__parse(&ratio, pctx, mexp->metric_expr))
-- 
2.43.0.594.gd9cf4e227d-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v1 3/3] perf stat: Fix metric-only aggregation index
  2024-02-02  2:25 [PATCH v1 1/3] perf stat: Pass fewer metric arguments Ian Rogers
  2024-02-02  2:25 ` [PATCH v1 2/3] perf metrics: Compute unmerged uncore metrics individually Ian Rogers
@ 2024-02-02  2:25 ` Ian Rogers
  2024-02-06  1:59 ` [PATCH v1 1/3] perf stat: Pass fewer metric arguments Namhyung Kim
  2 siblings, 0 replies; 7+ messages in thread
From: Ian Rogers @ 2024-02-02  2:25 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Kan Liang, Kajol Jain, John Garry,
	Kaige Ye, K Prateek Nayak, linux-perf-users, linux-kernel
  Cc: Stephane Eranian

Aggregation index was being computed using the evsel's cpumap which
may have a different (typically the same or fewer) entries.

Before:
```
$ perf stat --metric-only -A -M memory_bandwidth_total -a sleep 1

 Performance counter stats for 'system wide':

       MB/s  memory_bandwidth_total MB/s  memory_bandwidth_total MB/s  memory_bandwidth_total MB/s  memory_bandwidth_total MB/s  memory_bandwidth_total MB/s  memory_bandwidth_total
CPU0                            12.8                           0.0                          12.9                          12.7                           0.0                          12.6
CPU1

       1.007806367 seconds time elapsed
```

After:
```
$ perf stat --metric-only -A -M memory_bandwidth_total -a sleep 1

 Performance counter stats for 'system wide':

       MB/s  memory_bandwidth_total MB/s  memory_bandwidth_total MB/s  memory_bandwidth_total MB/s  memory_bandwidth_total MB/s  memory_bandwidth_total MB/s  memory_bandwidth_total
CPU0                            15.4                           0.0                          15.3                          15.0                           0.0                          14.9
CPU18                            0.0                           0.0                          13.5                           5.2                           0.0                          11.9

       1.007858736 seconds time elapsed
```

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/stat-display.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 8c61f8627ebc..ce830c6afdf2 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1126,11 +1126,16 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 			u64 ena, run, val;
 			double uval;
 			struct perf_stat_evsel *ps = counter->stats;
-			int aggr_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu);
+			int aggr_idx = 0;
 
-			if (aggr_idx < 0)
+			if (!perf_cpu_map__has(evsel__cpus(counter), cpu))
 				continue;
 
+			cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) {
+				if (config->aggr_map->map[aggr_idx].cpu.cpu == cpu.cpu)
+					break;
+			}
+
 			os->evsel = counter;
 			os->id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
 			if (first) {
-- 
2.43.0.594.gd9cf4e227d-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 1/3] perf stat: Pass fewer metric arguments
  2024-02-02  2:25 [PATCH v1 1/3] perf stat: Pass fewer metric arguments Ian Rogers
  2024-02-02  2:25 ` [PATCH v1 2/3] perf metrics: Compute unmerged uncore metrics individually Ian Rogers
  2024-02-02  2:25 ` [PATCH v1 3/3] perf stat: Fix metric-only aggregation index Ian Rogers
@ 2024-02-06  1:59 ` Namhyung Kim
  2024-02-06  2:23   ` Ian Rogers
  2 siblings, 1 reply; 7+ messages in thread
From: Namhyung Kim @ 2024-02-06  1:59 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Adrian Hunter,
	Kan Liang, Kajol Jain, John Garry, Kaige Ye, K Prateek Nayak,
	linux-perf-users, linux-kernel, Stephane Eranian

On Thu, Feb 1, 2024 at 6:25 PM Ian Rogers <irogers@google.com> wrote:
>
> Pass metric_expr and evsel rather than specific variables from the
> struct, thereby reducing the number of arguments. This will enable
> later fixes.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/util/stat-shadow.c | 75 +++++++++++++++--------------------
>  1 file changed, 33 insertions(+), 42 deletions(-)
>
> diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
> index e31426167852..f6c9d2f98835 100644
> --- a/tools/perf/util/stat-shadow.c
> +++ b/tools/perf/util/stat-shadow.c
> @@ -355,23 +355,22 @@ static void print_nsecs(struct perf_stat_config *config,
>                 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
>  }
>
> -static int prepare_metric(struct evsel **metric_events,
> -                         struct metric_ref *metric_refs,
> +static int prepare_metric(const struct metric_expr *mexp,
>                           struct expr_parse_ctx *pctx,
>                           int aggr_idx)
>  {
>         int i;

You may add local variables with the same name to reduce
the amount of diff.

Thanks,
Namhyung

>
> -       for (i = 0; metric_events[i]; i++) {
> +       for (i = 0; mexp->metric_events[i]; i++) {
>                 char *n;
>                 double val;
>                 int source_count = 0;
>
> -               if (evsel__is_tool(metric_events[i])) {
> +               if (evsel__is_tool(mexp->metric_events[i])) {
>                         struct stats *stats;
>                         double scale;
>
> -                       switch (metric_events[i]->tool_event) {
> +                       switch (mexp->metric_events[i]->tool_event) {
>                         case PERF_TOOL_DURATION_TIME:
>                                 stats = &walltime_nsecs_stats;
>                                 scale = 1e-9;
> @@ -391,19 +390,20 @@ static int prepare_metric(struct evsel **metric_events,
>                                 pr_err("Invalid tool event 'max'");
>                                 abort();
>                         default:
> -                               pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
> +                               pr_err("Unknown tool event '%s'",
> +                                      evsel__name(mexp->metric_events[i]));
>                                 abort();
>                         }
>                         val = avg_stats(stats) * scale;
>                         source_count = 1;
>                 } else {
> -                       struct perf_stat_evsel *ps = metric_events[i]->stats;
> +                       struct perf_stat_evsel *ps = mexp->metric_events[i]->stats;
>                         struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
>
>                         if (!aggr)
>                                 break;
>
> -                        if (!metric_events[i]->supported) {
> +                       if (!mexp->metric_events[i]->supported) {
>                                 /*
>                                  * Not supported events will have a count of 0,
>                                  * which can be confusing in a
> @@ -419,19 +419,19 @@ static int prepare_metric(struct evsel **metric_events,
>                                  * reverse the scale before computing the
>                                  * metric.
>                                  */
> -                               val = aggr->counts.val * (1.0 / metric_events[i]->scale);
> -                               source_count = evsel__source_count(metric_events[i]);
> +                               val = aggr->counts.val * (1.0 / mexp->metric_events[i]->scale);
> +                               source_count = evsel__source_count(mexp->metric_events[i]);
>                         }
>                 }
> -               n = strdup(evsel__metric_id(metric_events[i]));
> +               n = strdup(evsel__metric_id(mexp->metric_events[i]));
>                 if (!n)
>                         return -ENOMEM;
>
>                 expr__add_id_val_source_count(pctx, n, val, source_count);
>         }
>
> -       for (int j = 0; metric_refs && metric_refs[j].metric_name; j++) {
> -               int ret = expr__add_ref(pctx, &metric_refs[j]);
> +       for (int j = 0; mexp->metric_refs && mexp->metric_refs[j].metric_name; j++) {
> +               int ret = expr__add_ref(pctx, &mexp->metric_refs[j]);
>
>                 if (ret)
>                         return ret;
> @@ -441,14 +441,8 @@ static int prepare_metric(struct evsel **metric_events,
>  }
>
>  static void generic_metric(struct perf_stat_config *config,
> -                          const char *metric_expr,
> -                          const char *metric_threshold,
> -                          struct evsel **metric_events,
> -                          struct metric_ref *metric_refs,
> -                          char *name,
> -                          const char *metric_name,
> -                          const char *metric_unit,
> -                          int runtime,
> +                       struct metric_expr *mexp,
> +                       struct evsel *evsel,
>                            int aggr_idx,
>                            struct perf_stat_output_ctx *out)
>  {
> @@ -465,55 +459,55 @@ static void generic_metric(struct perf_stat_config *config,
>
>         if (config->user_requested_cpu_list)
>                 pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
> -       pctx->sctx.runtime = runtime;
> +       pctx->sctx.runtime = mexp->runtime;
>         pctx->sctx.system_wide = config->system_wide;
> -       i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx);
> +       i = prepare_metric(mexp, pctx, aggr_idx);
>         if (i < 0) {
>                 expr__ctx_free(pctx);
>                 return;
>         }
> -       if (!metric_events[i]) {
> -               if (expr__parse(&ratio, pctx, metric_expr) == 0) {
> +       if (!mexp->metric_events[i]) {
> +               if (expr__parse(&ratio, pctx, mexp->metric_expr) == 0) {
>                         char *unit;
>                         char metric_bf[64];
>
> -                       if (metric_threshold &&
> -                           expr__parse(&threshold, pctx, metric_threshold) == 0 &&
> +                       if (mexp->metric_threshold &&
> +                           expr__parse(&threshold, pctx, mexp->metric_threshold) == 0 &&
>                             !isnan(threshold)) {
>                                 color = fpclassify(threshold) == FP_ZERO
>                                         ? PERF_COLOR_GREEN : PERF_COLOR_RED;
>                         }
>
> -                       if (metric_unit && metric_name) {
> -                               if (perf_pmu__convert_scale(metric_unit,
> +                       if (mexp->metric_unit && mexp->metric_name) {
> +                               if (perf_pmu__convert_scale(mexp->metric_unit,
>                                         &unit, &scale) >= 0) {
>                                         ratio *= scale;
>                                 }
> -                               if (strstr(metric_expr, "?"))
> +                               if (strstr(mexp->metric_expr, "?"))
>                                         scnprintf(metric_bf, sizeof(metric_bf),
> -                                         "%s  %s_%d", unit, metric_name, runtime);
> +                                         "%s  %s_%d", unit, mexp->metric_name, mexp->runtime);
>                                 else
>                                         scnprintf(metric_bf, sizeof(metric_bf),
> -                                         "%s  %s", unit, metric_name);
> +                                         "%s  %s", unit, mexp->metric_name);
>
>                                 print_metric(config, ctxp, color, "%8.1f",
>                                              metric_bf, ratio);
>                         } else {
>                                 print_metric(config, ctxp, color, "%8.2f",
> -                                       metric_name ?
> -                                       metric_name :
> -                                       out->force_header ?  name : "",
> +                                       mexp->metric_name ?
> +                                       mexp->metric_name :
> +                                       out->force_header ?  evsel->name : "",
>                                         ratio);
>                         }
>                 } else {
>                         print_metric(config, ctxp, color, /*unit=*/NULL,
>                                      out->force_header ?
> -                                    (metric_name ? metric_name : name) : "", 0);
> +                                    (mexp->metric_name ?: evsel->name) : "", 0);
>                 }
>         } else {
>                 print_metric(config, ctxp, color, /*unit=*/NULL,
>                              out->force_header ?
> -                            (metric_name ? metric_name : name) : "", 0);
> +                            (mexp->metric_name ?: evsel->name) : "", 0);
>         }
>
>         expr__ctx_free(pctx);
> @@ -528,7 +522,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
>         if (!pctx)
>                 return NAN;
>
> -       if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0)
> +       if (prepare_metric(mexp, pctx, aggr_idx) < 0)
>                 goto out;
>
>         if (expr__parse(&ratio, pctx, mexp->metric_expr))
> @@ -630,10 +624,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
>
>                 if ((*num)++ > 0)
>                         out->new_line(config, ctxp);
> -               generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
> -                              mexp->metric_events, mexp->metric_refs, evsel->name,
> -                              mexp->metric_name, mexp->metric_unit, mexp->runtime,
> -                              aggr_idx, out);
> +               generic_metric(config, mexp, evsel, aggr_idx, out);
>         }
>
>         return NULL;
> --
> 2.43.0.594.gd9cf4e227d-goog
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 2/3] perf metrics: Compute unmerged uncore metrics individually
  2024-02-02  2:25 ` [PATCH v1 2/3] perf metrics: Compute unmerged uncore metrics individually Ian Rogers
@ 2024-02-06  2:02   ` Namhyung Kim
  2024-02-06  2:21     ` Ian Rogers
  0 siblings, 1 reply; 7+ messages in thread
From: Namhyung Kim @ 2024-02-06  2:02 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Adrian Hunter,
	Kan Liang, Kajol Jain, John Garry, Kaige Ye, K Prateek Nayak,
	linux-perf-users, linux-kernel, Stephane Eranian

On Thu, Feb 1, 2024 at 6:25 PM Ian Rogers <irogers@google.com> wrote:
>
> When merging counts from multiple uncore PMUs the metric is only
> computed for the metric leader. When merging/aggregation is disabled,
> prior to this patch just the leader's metric would be computed. Fix
> this by computing the metric for each PMU.
>
> On a SkylakeX:
> Before:
> ```
> $ perf stat -A -M memory_bandwidth_total -a sleep 1
>
>  Performance counter stats for 'system wide':
>
> CPU0               82,217      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      9.2 MB/s  memory_bandwidth_total
> CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      0.0 MB/s  memory_bandwidth_total
> CPU0               61,395      UNC_M_CAS_COUNT.WR [uncore_imc_0]
> CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_0]
> CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_1]
> CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_1]
> CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
> CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
> CPU0               81,570      UNC_M_CAS_COUNT.RD [uncore_imc_2]
> CPU18             113,886      UNC_M_CAS_COUNT.RD [uncore_imc_2]
> CPU0               62,330      UNC_M_CAS_COUNT.WR [uncore_imc_2]
> CPU18              66,942      UNC_M_CAS_COUNT.WR [uncore_imc_2]
> CPU0               75,489      UNC_M_CAS_COUNT.RD [uncore_imc_3]
> CPU18              27,958      UNC_M_CAS_COUNT.RD [uncore_imc_3]
> CPU0               55,864      UNC_M_CAS_COUNT.WR [uncore_imc_3]
> CPU18              38,727      UNC_M_CAS_COUNT.WR [uncore_imc_3]
> CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_4]
> CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_4]
> CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
> CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
> CPU0               75,423      UNC_M_CAS_COUNT.RD [uncore_imc_5]
> CPU18             104,527      UNC_M_CAS_COUNT.RD [uncore_imc_5]
> CPU0               57,596      UNC_M_CAS_COUNT.WR [uncore_imc_5]
> CPU18              56,777      UNC_M_CAS_COUNT.WR [uncore_imc_5]
> CPU0        1,003,440,851 ns   duration_time
>
>        1.003440851 seconds time elapsed
> ```
>
> After:
> ```
> $ perf stat -A -M memory_bandwidth_total -a sleep 1
>
>  Performance counter stats for 'system wide':
>
> CPU0               88,968      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      9.5 MB/s  memory_bandwidth_total
> CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      0.0 MB/s  memory_bandwidth_total
> CPU0               59,498      UNC_M_CAS_COUNT.WR [uncore_imc_0]
> CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_0]
> CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_1] #      0.0 MB/s  memory_bandwidth_total
> CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_1] #      0.0 MB/s  memory_bandwidth_total
> CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
> CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
> CPU0               88,635      UNC_M_CAS_COUNT.RD [uncore_imc_2] #      9.5 MB/s  memory_bandwidth_total
> CPU18             117,975      UNC_M_CAS_COUNT.RD [uncore_imc_2] #     11.5 MB/s  memory_bandwidth_total
> CPU0               60,829      UNC_M_CAS_COUNT.WR [uncore_imc_2]
> CPU18              62,105      UNC_M_CAS_COUNT.WR [uncore_imc_2]
> CPU0               82,238      UNC_M_CAS_COUNT.RD [uncore_imc_3] #      8.7 MB/s  memory_bandwidth_total
> CPU18              22,906      UNC_M_CAS_COUNT.RD [uncore_imc_3] #      3.6 MB/s  memory_bandwidth_total
> CPU0               53,959      UNC_M_CAS_COUNT.WR [uncore_imc_3]
> CPU18              32,990      UNC_M_CAS_COUNT.WR [uncore_imc_3]
> CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_4] #      0.0 MB/s  memory_bandwidth_total
> CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_4] #      0.0 MB/s  memory_bandwidth_total
> CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
> CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
> CPU0               83,595      UNC_M_CAS_COUNT.RD [uncore_imc_5] #      8.9 MB/s  memory_bandwidth_total
> CPU18             110,151      UNC_M_CAS_COUNT.RD [uncore_imc_5] #     10.5 MB/s  memory_bandwidth_total
> CPU0               56,540      UNC_M_CAS_COUNT.WR [uncore_imc_5]
> CPU18              53,816      UNC_M_CAS_COUNT.WR [uncore_imc_5]
> CPU0        1,003,353,416 ns   duration_time
> ```
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/util/metricgroup.c |  2 ++
>  tools/perf/util/stat-shadow.c | 31 +++++++++++++++++++++++++++----
>  2 files changed, 29 insertions(+), 4 deletions(-)
>
> diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> index ca3e0404f187..c33ffee837ca 100644
> --- a/tools/perf/util/metricgroup.c
> +++ b/tools/perf/util/metricgroup.c
> @@ -44,6 +44,8 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
>         if (!metric_events)
>                 return NULL;
>
> +       if (evsel->metric_leader)
> +               me.evsel = evsel->metric_leader;
>         nd = rblist__find(metric_events, &me);
>         if (nd)
>                 return container_of(nd, struct metric_event, nd);
> diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
> index f6c9d2f98835..1be23b0eee2f 100644
> --- a/tools/perf/util/stat-shadow.c
> +++ b/tools/perf/util/stat-shadow.c
> @@ -356,6 +356,7 @@ static void print_nsecs(struct perf_stat_config *config,
>  }
>
>  static int prepare_metric(const struct metric_expr *mexp,
> +                         const struct evsel *evsel,
>                           struct expr_parse_ctx *pctx,
>                           int aggr_idx)
>  {
> @@ -398,8 +399,29 @@ static int prepare_metric(const struct metric_expr *mexp,
>                         source_count = 1;
>                 } else {
>                         struct perf_stat_evsel *ps = mexp->metric_events[i]->stats;
> -                       struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
> +                       struct perf_stat_aggr *aggr;
>
> +                       /*
> +                        * If there are multiple uncore PMUs and we're not
> +                        * reading the leader's stats, determine the stats for
> +                        * the appropriate uncore PMU.
> +                        */
> +                       if (evsel && evsel->metric_leader &&
> +                           evsel->pmu != evsel->metric_leader->pmu &&
> +                           mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) {

Is it just to check we're in --no-aggr (or --no-merge)?
Then it'd be simpler to use stat_config->aggr_mode.

Thanks,
Namhyung


> +                               struct evsel *pos;
> +
> +                               evlist__for_each_entry(evsel->evlist, pos) {
> +                                       if (pos->pmu != evsel->pmu)
> +                                               continue;
> +                                       if (pos->metric_leader != mexp->metric_events[i])
> +                                               continue;
> +                                       ps = pos->stats;
> +                                       source_count = 1;
> +                                       break;
> +                               }
> +                       }
> +                       aggr = &ps->aggr[aggr_idx];
>                         if (!aggr)
>                                 break;
>
> @@ -420,7 +442,8 @@ static int prepare_metric(const struct metric_expr *mexp,
>                                  * metric.
>                                  */
>                                 val = aggr->counts.val * (1.0 / mexp->metric_events[i]->scale);
> -                               source_count = evsel__source_count(mexp->metric_events[i]);
> +                               if (!source_count)
> +                                       source_count = evsel__source_count(mexp->metric_events[i]);
>                         }
>                 }
>                 n = strdup(evsel__metric_id(mexp->metric_events[i]));
> @@ -461,7 +484,7 @@ static void generic_metric(struct perf_stat_config *config,
>                 pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
>         pctx->sctx.runtime = mexp->runtime;
>         pctx->sctx.system_wide = config->system_wide;
> -       i = prepare_metric(mexp, pctx, aggr_idx);
> +       i = prepare_metric(mexp, evsel, pctx, aggr_idx);
>         if (i < 0) {
>                 expr__ctx_free(pctx);
>                 return;
> @@ -522,7 +545,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
>         if (!pctx)
>                 return NAN;
>
> -       if (prepare_metric(mexp, pctx, aggr_idx) < 0)
> +       if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0)
>                 goto out;
>
>         if (expr__parse(&ratio, pctx, mexp->metric_expr))
> --
> 2.43.0.594.gd9cf4e227d-goog
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 2/3] perf metrics: Compute unmerged uncore metrics individually
  2024-02-06  2:02   ` Namhyung Kim
@ 2024-02-06  2:21     ` Ian Rogers
  0 siblings, 0 replies; 7+ messages in thread
From: Ian Rogers @ 2024-02-06  2:21 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Adrian Hunter,
	Kan Liang, Kajol Jain, John Garry, Kaige Ye, K Prateek Nayak,
	linux-perf-users, linux-kernel, Stephane Eranian

On Mon, Feb 5, 2024 at 6:02 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Thu, Feb 1, 2024 at 6:25 PM Ian Rogers <irogers@google.com> wrote:
> >
> > When merging counts from multiple uncore PMUs the metric is only
> > computed for the metric leader. When merging/aggregation is disabled,
> > prior to this patch just the leader's metric would be computed. Fix
> > this by computing the metric for each PMU.
> >
> > On a SkylakeX:
> > Before:
> > ```
> > $ perf stat -A -M memory_bandwidth_total -a sleep 1
> >
> >  Performance counter stats for 'system wide':
> >
> > CPU0               82,217      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      9.2 MB/s  memory_bandwidth_total
> > CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      0.0 MB/s  memory_bandwidth_total
> > CPU0               61,395      UNC_M_CAS_COUNT.WR [uncore_imc_0]
> > CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_0]
> > CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_1]
> > CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_1]
> > CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
> > CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
> > CPU0               81,570      UNC_M_CAS_COUNT.RD [uncore_imc_2]
> > CPU18             113,886      UNC_M_CAS_COUNT.RD [uncore_imc_2]
> > CPU0               62,330      UNC_M_CAS_COUNT.WR [uncore_imc_2]
> > CPU18              66,942      UNC_M_CAS_COUNT.WR [uncore_imc_2]
> > CPU0               75,489      UNC_M_CAS_COUNT.RD [uncore_imc_3]
> > CPU18              27,958      UNC_M_CAS_COUNT.RD [uncore_imc_3]
> > CPU0               55,864      UNC_M_CAS_COUNT.WR [uncore_imc_3]
> > CPU18              38,727      UNC_M_CAS_COUNT.WR [uncore_imc_3]
> > CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_4]
> > CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_4]
> > CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
> > CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
> > CPU0               75,423      UNC_M_CAS_COUNT.RD [uncore_imc_5]
> > CPU18             104,527      UNC_M_CAS_COUNT.RD [uncore_imc_5]
> > CPU0               57,596      UNC_M_CAS_COUNT.WR [uncore_imc_5]
> > CPU18              56,777      UNC_M_CAS_COUNT.WR [uncore_imc_5]
> > CPU0        1,003,440,851 ns   duration_time
> >
> >        1.003440851 seconds time elapsed
> > ```
> >
> > After:
> > ```
> > $ perf stat -A -M memory_bandwidth_total -a sleep 1
> >
> >  Performance counter stats for 'system wide':
> >
> > CPU0               88,968      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      9.5 MB/s  memory_bandwidth_total
> > CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_0] #      0.0 MB/s  memory_bandwidth_total
> > CPU0               59,498      UNC_M_CAS_COUNT.WR [uncore_imc_0]
> > CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_0]
> > CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_1] #      0.0 MB/s  memory_bandwidth_total
> > CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_1] #      0.0 MB/s  memory_bandwidth_total
> > CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
> > CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_1]
> > CPU0               88,635      UNC_M_CAS_COUNT.RD [uncore_imc_2] #      9.5 MB/s  memory_bandwidth_total
> > CPU18             117,975      UNC_M_CAS_COUNT.RD [uncore_imc_2] #     11.5 MB/s  memory_bandwidth_total
> > CPU0               60,829      UNC_M_CAS_COUNT.WR [uncore_imc_2]
> > CPU18              62,105      UNC_M_CAS_COUNT.WR [uncore_imc_2]
> > CPU0               82,238      UNC_M_CAS_COUNT.RD [uncore_imc_3] #      8.7 MB/s  memory_bandwidth_total
> > CPU18              22,906      UNC_M_CAS_COUNT.RD [uncore_imc_3] #      3.6 MB/s  memory_bandwidth_total
> > CPU0               53,959      UNC_M_CAS_COUNT.WR [uncore_imc_3]
> > CPU18              32,990      UNC_M_CAS_COUNT.WR [uncore_imc_3]
> > CPU0                    0      UNC_M_CAS_COUNT.RD [uncore_imc_4] #      0.0 MB/s  memory_bandwidth_total
> > CPU18                   0      UNC_M_CAS_COUNT.RD [uncore_imc_4] #      0.0 MB/s  memory_bandwidth_total
> > CPU0                    0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
> > CPU18                   0      UNC_M_CAS_COUNT.WR [uncore_imc_4]
> > CPU0               83,595      UNC_M_CAS_COUNT.RD [uncore_imc_5] #      8.9 MB/s  memory_bandwidth_total
> > CPU18             110,151      UNC_M_CAS_COUNT.RD [uncore_imc_5] #     10.5 MB/s  memory_bandwidth_total
> > CPU0               56,540      UNC_M_CAS_COUNT.WR [uncore_imc_5]
> > CPU18              53,816      UNC_M_CAS_COUNT.WR [uncore_imc_5]
> > CPU0        1,003,353,416 ns   duration_time
> > ```
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/perf/util/metricgroup.c |  2 ++
> >  tools/perf/util/stat-shadow.c | 31 +++++++++++++++++++++++++++----
> >  2 files changed, 29 insertions(+), 4 deletions(-)
> >
> > diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> > index ca3e0404f187..c33ffee837ca 100644
> > --- a/tools/perf/util/metricgroup.c
> > +++ b/tools/perf/util/metricgroup.c
> > @@ -44,6 +44,8 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
> >         if (!metric_events)
> >                 return NULL;
> >
> > +       if (evsel->metric_leader)
> > +               me.evsel = evsel->metric_leader;
> >         nd = rblist__find(metric_events, &me);
> >         if (nd)
> >                 return container_of(nd, struct metric_event, nd);
> > diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
> > index f6c9d2f98835..1be23b0eee2f 100644
> > --- a/tools/perf/util/stat-shadow.c
> > +++ b/tools/perf/util/stat-shadow.c
> > @@ -356,6 +356,7 @@ static void print_nsecs(struct perf_stat_config *config,
> >  }
> >
> >  static int prepare_metric(const struct metric_expr *mexp,
> > +                         const struct evsel *evsel,
> >                           struct expr_parse_ctx *pctx,
> >                           int aggr_idx)
> >  {
> > @@ -398,8 +399,29 @@ static int prepare_metric(const struct metric_expr *mexp,
> >                         source_count = 1;
> >                 } else {
> >                         struct perf_stat_evsel *ps = mexp->metric_events[i]->stats;
> > -                       struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
> > +                       struct perf_stat_aggr *aggr;
> >
> > +                       /*
> > +                        * If there are multiple uncore PMUs and we're not
> > +                        * reading the leader's stats, determine the stats for
> > +                        * the appropriate uncore PMU.
> > +                        */
> > +                       if (evsel && evsel->metric_leader &&
> > +                           evsel->pmu != evsel->metric_leader->pmu &&
> > +                           mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) {
>
> Is it just to check we're in --no-aggr (or --no-merge)?
> Then it'd be simpler to use stat_config->aggr_mode.

For most metrics the events will be on the same PMU, but there is
nothing stopping mixing events from different PMUs (grouping can be
disabled). There may also be software and tool evsels.

Thanks,
Ian

> Thanks,
> Namhyung
>
>
> > +                               struct evsel *pos;
> > +
> > +                               evlist__for_each_entry(evsel->evlist, pos) {
> > +                                       if (pos->pmu != evsel->pmu)
> > +                                               continue;
> > +                                       if (pos->metric_leader != mexp->metric_events[i])
> > +                                               continue;
> > +                                       ps = pos->stats;
> > +                                       source_count = 1;
> > +                                       break;
> > +                               }
> > +                       }
> > +                       aggr = &ps->aggr[aggr_idx];
> >                         if (!aggr)
> >                                 break;
> >
> > @@ -420,7 +442,8 @@ static int prepare_metric(const struct metric_expr *mexp,
> >                                  * metric.
> >                                  */
> >                                 val = aggr->counts.val * (1.0 / mexp->metric_events[i]->scale);
> > -                               source_count = evsel__source_count(mexp->metric_events[i]);
> > +                               if (!source_count)
> > +                                       source_count = evsel__source_count(mexp->metric_events[i]);
> >                         }
> >                 }
> >                 n = strdup(evsel__metric_id(mexp->metric_events[i]));
> > @@ -461,7 +484,7 @@ static void generic_metric(struct perf_stat_config *config,
> >                 pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
> >         pctx->sctx.runtime = mexp->runtime;
> >         pctx->sctx.system_wide = config->system_wide;
> > -       i = prepare_metric(mexp, pctx, aggr_idx);
> > +       i = prepare_metric(mexp, evsel, pctx, aggr_idx);
> >         if (i < 0) {
> >                 expr__ctx_free(pctx);
> >                 return;
> > @@ -522,7 +545,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
> >         if (!pctx)
> >                 return NAN;
> >
> > -       if (prepare_metric(mexp, pctx, aggr_idx) < 0)
> > +       if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0)
> >                 goto out;
> >
> >         if (expr__parse(&ratio, pctx, mexp->metric_expr))
> > --
> > 2.43.0.594.gd9cf4e227d-goog
> >

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 1/3] perf stat: Pass fewer metric arguments
  2024-02-06  1:59 ` [PATCH v1 1/3] perf stat: Pass fewer metric arguments Namhyung Kim
@ 2024-02-06  2:23   ` Ian Rogers
  0 siblings, 0 replies; 7+ messages in thread
From: Ian Rogers @ 2024-02-06  2:23 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Adrian Hunter,
	Kan Liang, Kajol Jain, John Garry, Kaige Ye, K Prateek Nayak,
	linux-perf-users, linux-kernel, Stephane Eranian

On Mon, Feb 5, 2024 at 5:59 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Thu, Feb 1, 2024 at 6:25 PM Ian Rogers <irogers@google.com> wrote:
> >
> > Pass metric_expr and evsel rather than specific variables from the
> > struct, thereby reducing the number of arguments. This will enable
> > later fixes.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/perf/util/stat-shadow.c | 75 +++++++++++++++--------------------
> >  1 file changed, 33 insertions(+), 42 deletions(-)
> >
> > diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
> > index e31426167852..f6c9d2f98835 100644
> > --- a/tools/perf/util/stat-shadow.c
> > +++ b/tools/perf/util/stat-shadow.c
> > @@ -355,23 +355,22 @@ static void print_nsecs(struct perf_stat_config *config,
> >                 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
> >  }
> >
> > -static int prepare_metric(struct evsel **metric_events,
> > -                         struct metric_ref *metric_refs,
> > +static int prepare_metric(const struct metric_expr *mexp,
> >                           struct expr_parse_ctx *pctx,
> >                           int aggr_idx)
> >  {
> >         int i;
>
> You may add local variables with the same name to reduce
> the amount of diff.

I'll see what I can do in v2.

Thanks,
Ian

> Thanks,
> Namhyung
>
> >
> > -       for (i = 0; metric_events[i]; i++) {
> > +       for (i = 0; mexp->metric_events[i]; i++) {
> >                 char *n;
> >                 double val;
> >                 int source_count = 0;
> >
> > -               if (evsel__is_tool(metric_events[i])) {
> > +               if (evsel__is_tool(mexp->metric_events[i])) {
> >                         struct stats *stats;
> >                         double scale;
> >
> > -                       switch (metric_events[i]->tool_event) {
> > +                       switch (mexp->metric_events[i]->tool_event) {
> >                         case PERF_TOOL_DURATION_TIME:
> >                                 stats = &walltime_nsecs_stats;
> >                                 scale = 1e-9;
> > @@ -391,19 +390,20 @@ static int prepare_metric(struct evsel **metric_events,
> >                                 pr_err("Invalid tool event 'max'");
> >                                 abort();
> >                         default:
> > -                               pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
> > +                               pr_err("Unknown tool event '%s'",
> > +                                      evsel__name(mexp->metric_events[i]));
> >                                 abort();
> >                         }
> >                         val = avg_stats(stats) * scale;
> >                         source_count = 1;
> >                 } else {
> > -                       struct perf_stat_evsel *ps = metric_events[i]->stats;
> > +                       struct perf_stat_evsel *ps = mexp->metric_events[i]->stats;
> >                         struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
> >
> >                         if (!aggr)
> >                                 break;
> >
> > -                        if (!metric_events[i]->supported) {
> > +                       if (!mexp->metric_events[i]->supported) {
> >                                 /*
> >                                  * Not supported events will have a count of 0,
> >                                  * which can be confusing in a
> > @@ -419,19 +419,19 @@ static int prepare_metric(struct evsel **metric_events,
> >                                  * reverse the scale before computing the
> >                                  * metric.
> >                                  */
> > -                               val = aggr->counts.val * (1.0 / metric_events[i]->scale);
> > -                               source_count = evsel__source_count(metric_events[i]);
> > +                               val = aggr->counts.val * (1.0 / mexp->metric_events[i]->scale);
> > +                               source_count = evsel__source_count(mexp->metric_events[i]);
> >                         }
> >                 }
> > -               n = strdup(evsel__metric_id(metric_events[i]));
> > +               n = strdup(evsel__metric_id(mexp->metric_events[i]));
> >                 if (!n)
> >                         return -ENOMEM;
> >
> >                 expr__add_id_val_source_count(pctx, n, val, source_count);
> >         }
> >
> > -       for (int j = 0; metric_refs && metric_refs[j].metric_name; j++) {
> > -               int ret = expr__add_ref(pctx, &metric_refs[j]);
> > +       for (int j = 0; mexp->metric_refs && mexp->metric_refs[j].metric_name; j++) {
> > +               int ret = expr__add_ref(pctx, &mexp->metric_refs[j]);
> >
> >                 if (ret)
> >                         return ret;
> > @@ -441,14 +441,8 @@ static int prepare_metric(struct evsel **metric_events,
> >  }
> >
> >  static void generic_metric(struct perf_stat_config *config,
> > -                          const char *metric_expr,
> > -                          const char *metric_threshold,
> > -                          struct evsel **metric_events,
> > -                          struct metric_ref *metric_refs,
> > -                          char *name,
> > -                          const char *metric_name,
> > -                          const char *metric_unit,
> > -                          int runtime,
> > +                       struct metric_expr *mexp,
> > +                       struct evsel *evsel,
> >                            int aggr_idx,
> >                            struct perf_stat_output_ctx *out)
> >  {
> > @@ -465,55 +459,55 @@ static void generic_metric(struct perf_stat_config *config,
> >
> >         if (config->user_requested_cpu_list)
> >                 pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
> > -       pctx->sctx.runtime = runtime;
> > +       pctx->sctx.runtime = mexp->runtime;
> >         pctx->sctx.system_wide = config->system_wide;
> > -       i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx);
> > +       i = prepare_metric(mexp, pctx, aggr_idx);
> >         if (i < 0) {
> >                 expr__ctx_free(pctx);
> >                 return;
> >         }
> > -       if (!metric_events[i]) {
> > -               if (expr__parse(&ratio, pctx, metric_expr) == 0) {
> > +       if (!mexp->metric_events[i]) {
> > +               if (expr__parse(&ratio, pctx, mexp->metric_expr) == 0) {
> >                         char *unit;
> >                         char metric_bf[64];
> >
> > -                       if (metric_threshold &&
> > -                           expr__parse(&threshold, pctx, metric_threshold) == 0 &&
> > +                       if (mexp->metric_threshold &&
> > +                           expr__parse(&threshold, pctx, mexp->metric_threshold) == 0 &&
> >                             !isnan(threshold)) {
> >                                 color = fpclassify(threshold) == FP_ZERO
> >                                         ? PERF_COLOR_GREEN : PERF_COLOR_RED;
> >                         }
> >
> > -                       if (metric_unit && metric_name) {
> > -                               if (perf_pmu__convert_scale(metric_unit,
> > +                       if (mexp->metric_unit && mexp->metric_name) {
> > +                               if (perf_pmu__convert_scale(mexp->metric_unit,
> >                                         &unit, &scale) >= 0) {
> >                                         ratio *= scale;
> >                                 }
> > -                               if (strstr(metric_expr, "?"))
> > +                               if (strstr(mexp->metric_expr, "?"))
> >                                         scnprintf(metric_bf, sizeof(metric_bf),
> > -                                         "%s  %s_%d", unit, metric_name, runtime);
> > +                                         "%s  %s_%d", unit, mexp->metric_name, mexp->runtime);
> >                                 else
> >                                         scnprintf(metric_bf, sizeof(metric_bf),
> > -                                         "%s  %s", unit, metric_name);
> > +                                         "%s  %s", unit, mexp->metric_name);
> >
> >                                 print_metric(config, ctxp, color, "%8.1f",
> >                                              metric_bf, ratio);
> >                         } else {
> >                                 print_metric(config, ctxp, color, "%8.2f",
> > -                                       metric_name ?
> > -                                       metric_name :
> > -                                       out->force_header ?  name : "",
> > +                                       mexp->metric_name ?
> > +                                       mexp->metric_name :
> > +                                       out->force_header ?  evsel->name : "",
> >                                         ratio);
> >                         }
> >                 } else {
> >                         print_metric(config, ctxp, color, /*unit=*/NULL,
> >                                      out->force_header ?
> > -                                    (metric_name ? metric_name : name) : "", 0);
> > +                                    (mexp->metric_name ?: evsel->name) : "", 0);
> >                 }
> >         } else {
> >                 print_metric(config, ctxp, color, /*unit=*/NULL,
> >                              out->force_header ?
> > -                            (metric_name ? metric_name : name) : "", 0);
> > +                            (mexp->metric_name ?: evsel->name) : "", 0);
> >         }
> >
> >         expr__ctx_free(pctx);
> > @@ -528,7 +522,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
> >         if (!pctx)
> >                 return NAN;
> >
> > -       if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0)
> > +       if (prepare_metric(mexp, pctx, aggr_idx) < 0)
> >                 goto out;
> >
> >         if (expr__parse(&ratio, pctx, mexp->metric_expr))
> > @@ -630,10 +624,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
> >
> >                 if ((*num)++ > 0)
> >                         out->new_line(config, ctxp);
> > -               generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
> > -                              mexp->metric_events, mexp->metric_refs, evsel->name,
> > -                              mexp->metric_name, mexp->metric_unit, mexp->runtime,
> > -                              aggr_idx, out);
> > +               generic_metric(config, mexp, evsel, aggr_idx, out);
> >         }
> >
> >         return NULL;
> > --
> > 2.43.0.594.gd9cf4e227d-goog
> >

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-02-06  2:24 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-02  2:25 [PATCH v1 1/3] perf stat: Pass fewer metric arguments Ian Rogers
2024-02-02  2:25 ` [PATCH v1 2/3] perf metrics: Compute unmerged uncore metrics individually Ian Rogers
2024-02-06  2:02   ` Namhyung Kim
2024-02-06  2:21     ` Ian Rogers
2024-02-02  2:25 ` [PATCH v1 3/3] perf stat: Fix metric-only aggregation index Ian Rogers
2024-02-06  1:59 ` [PATCH v1 1/3] perf stat: Pass fewer metric arguments Namhyung Kim
2024-02-06  2:23   ` Ian Rogers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).