From: Vikas Shivappa <vikas.shivappa@linux.intel.com>
To: vikas.shivappa@intel.com, vikas.shivappa@linux.intel.com
Cc: linux-kernel@vger.kernel.org, x86@kernel.org, tglx@linutronix.de,
peterz@infradead.org, ravi.v.shankar@intel.com,
tony.luck@intel.com, fenghua.yu@intel.com, andi.kleen@intel.com,
davidcc@google.com, eranian@google.com, hpa@zytor.com
Subject: [PATCH 14/14] perf/stat: revamp read error handling, snapshot and per_pkg events
Date: Fri, 16 Dec 2016 15:13:08 -0800 [thread overview]
Message-ID: <1481929988-31569-15-git-send-email-vikas.shivappa@linux.intel.com> (raw)
In-Reply-To: <1481929988-31569-1-git-send-email-vikas.shivappa@linux.intel.com>
From: David Carrillo-Cisneros <davidcc@google.com>
A package wide event can return a valid read even if it has not run in a
specific cpu, this does not fit well with the assumption that run == 0
is equivalent to a <not counted>.
To fix the problem, this patch defines special error values for val,
run and ena (~0ULL), and use them to signal read errors, allowing run == 0
to be a valid value for package events. A new value, (NA), is output on
read error and when event has not been enabled (timed enabled == 0).
Finally, this patch revamps calculation of deltas and scaling for snapshot
events, removing the calculation of deltas for time running and enabled in
snapshot event, as should be.
Tests: After this patch, the user mode can see the package llc_occupancy
count when user calls per stat -C <cpux> -G cgroup_y and the systemwide
count when run with -a option for the cgroup
Reviewed-by: Stephane Eranian <eranian@google.com>
Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
Signed-off-by: Vikas Shivappa <vikas.shivappa@linux.intel.com>
---
tools/perf/builtin-stat.c | 36 +++++++++++++++++++++++-----------
tools/perf/util/counts.h | 19 ++++++++++++++++++
tools/perf/util/evsel.c | 49 ++++++++++++++++++++++++++++++++++++-----------
tools/perf/util/evsel.h | 8 ++++++--
tools/perf/util/stat.c | 35 +++++++++++----------------------
5 files changed, 99 insertions(+), 48 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c3c4b49..79043a3 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -311,10 +311,8 @@ static int read_counter(struct perf_evsel *counter)
count = perf_counts(counter->counts, cpu, thread);
if (perf_evsel__read(counter, cpu, thread, count)) {
- counter->counts->scaled = -1;
- perf_counts(counter->counts, cpu, thread)->ena = 0;
- perf_counts(counter->counts, cpu, thread)->run = 0;
- return -1;
+ /* do not write stat for failed reads. */
+ continue;
}
if (STAT_RECORD) {
@@ -725,12 +723,16 @@ static int run_perf_stat(int argc, const char **argv)
static void print_running(u64 run, u64 ena)
{
+ bool is_na = run == PERF_COUNTS_NA || ena == PERF_COUNTS_NA || !ena;
+
if (csv_output) {
- fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
- csv_sep,
- run,
- csv_sep,
- ena ? 100.0 * run / ena : 100.0);
+ if (is_na)
+ fprintf(stat_config.output, "%sNA%sNA", csv_sep, csv_sep);
+ else
+ fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
+ csv_sep, run, csv_sep, 100.0 * run / ena);
+ } else if (is_na) {
+ fprintf(stat_config.output, " (NA)");
} else if (run != ena) {
fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena);
}
@@ -1103,7 +1105,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
if (counter->cgrp)
os.nfields++;
}
- if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+ if (run == PERF_COUNTS_NA || ena == PERF_COUNTS_NA || counter->counts->scaled == -1) {
if (metric_only) {
pm(&os, NULL, "", "", 0);
return;
@@ -1209,12 +1211,17 @@ static void print_aggr(char *prefix)
id = aggr_map->map[s];
first = true;
evlist__for_each_entry(evsel_list, counter) {
+ bool all_nan = true;
val = ena = run = 0;
nr = 0;
for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
if (s2 != id)
continue;
+ /* skip NA reads. */
+ if (perf_counts_values__is_na(perf_counts(counter->counts, cpu, 0)))
+ continue;
+ all_nan = false;
val += perf_counts(counter->counts, cpu, 0)->val;
ena += perf_counts(counter->counts, cpu, 0)->ena;
run += perf_counts(counter->counts, cpu, 0)->run;
@@ -1228,6 +1235,10 @@ static void print_aggr(char *prefix)
fprintf(output, "%s", prefix);
uval = val * counter->scale;
+ if (all_nan) {
+ run = PERF_COUNTS_NA;
+ ena = PERF_COUNTS_NA;
+ }
printout(id, nr, counter, uval, prefix, run, ena, 1.0);
if (!metric_only)
fputc('\n', output);
@@ -1306,7 +1317,10 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
if (prefix)
fprintf(output, "%s", prefix);
- uval = val * counter->scale;
+ if (val != PERF_COUNTS_NA)
+ uval = val * counter->scale;
+ else
+ uval = NAN;
printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
fputc('\n', output);
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 34d8baa..b65e97a 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -3,6 +3,9 @@
#include "xyarray.h"
+/* Not Available (NA) value. Any operation with a NA equals a NA. */
+#define PERF_COUNTS_NA ((u64)~0ULL)
+
struct perf_counts_values {
union {
struct {
@@ -14,6 +17,22 @@ struct perf_counts_values {
};
};
+static inline void
+perf_counts_values__make_na(struct perf_counts_values *values)
+{
+ values->val = PERF_COUNTS_NA;
+ values->ena = PERF_COUNTS_NA;
+ values->run = PERF_COUNTS_NA;
+}
+
+static inline bool
+perf_counts_values__is_na(struct perf_counts_values *values)
+{
+ return values->val == PERF_COUNTS_NA ||
+ values->ena == PERF_COUNTS_NA ||
+ values->run == PERF_COUNTS_NA;
+}
+
struct perf_counts {
s8 scaled;
struct perf_counts_values aggr;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d54efb5..fa0ba96 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1180,6 +1180,9 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
if (!evsel->prev_raw_counts)
return;
+ if (perf_counts_values__is_na(count))
+ return;
+
if (cpu == -1) {
tmp = evsel->prev_raw_counts->aggr;
evsel->prev_raw_counts->aggr = *count;
@@ -1188,26 +1191,43 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
*perf_counts(evsel->prev_raw_counts, cpu, thread) = *count;
}
- count->val = count->val - tmp.val;
+ /* Snapshot events do not calculate deltas for count values. */
+ if (!evsel->snapshot)
+ count->val = count->val - tmp.val;
count->ena = count->ena - tmp.ena;
count->run = count->run - tmp.run;
}
void perf_counts_values__scale(struct perf_counts_values *count,
- bool scale, s8 *pscaled)
+ bool scale, bool per_pkg, bool snapshot, s8 *pscaled)
{
s8 scaled = 0;
+ if (perf_counts_values__is_na(count)) {
+ if (pscaled)
+ *pscaled = -1;
+ return;
+ }
+
if (scale) {
- if (count->run == 0) {
+ /*
+ * per-pkg events can have run == 0 in a CPU and still be
+ * valid.
+ */
+ if (count->run == 0 && !per_pkg) {
scaled = -1;
count->val = 0;
} else if (count->run < count->ena) {
scaled = 1;
- count->val = (u64)((double) count->val * count->ena / count->run + 0.5);
+ /* Snapshot events do not scale counts values. */
+ if (!snapshot && count->run)
+ count->val = (u64)((double) count->val * count->ena /
+ count->run + 0.5);
}
- } else
- count->ena = count->run = 0;
+
+ } else {
+ count->run = count->ena;
+ }
if (pscaled)
*pscaled = scaled;
@@ -1221,8 +1241,10 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
if (FD(evsel, cpu, thread) < 0)
return -EINVAL;
- if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) <= 0)
+ if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) <= 0) {
+ perf_counts_values__make_na(count);
return -errno;
+ }
return 0;
}
@@ -1230,6 +1252,7 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
int cpu, int thread, bool scale)
{
+ int ret = 0;
struct perf_counts_values count;
size_t nv = scale ? 3 : 1;
@@ -1239,13 +1262,17 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
return -ENOMEM;
- if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
- return -errno;
+ if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) {
+ perf_counts_values__make_na(&count);
+ ret = -errno;
+ goto exit;
+ }
perf_evsel__compute_deltas(evsel, cpu, thread, &count);
- perf_counts_values__scale(&count, scale, NULL);
+ perf_counts_values__scale(&count, scale, evsel->per_pkg, evsel->snapshot, NULL);
+exit:
*perf_counts(evsel->counts, cpu, thread) = count;
- return 0;
+ return ret;
}
static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b1503b0..facb6494 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -80,6 +80,10 @@ struct perf_evsel_config_term {
* @is_pos: the position (counting backwards) of the event id (PERF_SAMPLE_ID or
* PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if sample_id_all
* is used there is an id sample appended to non-sample events
+ * @snapshot: an event that whose raw value cannot be extrapolated based on
+ * the ratio of running/enabled time.
+ * @per_pkg: an event that runs package wide. All cores in same package will
+ * read the same value, even if running time == 0.
* @priv: And what is in its containing unnamed union are tool specific
*/
struct perf_evsel {
@@ -150,8 +154,8 @@ static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
return perf_evsel__cpus(evsel)->nr;
}
-void perf_counts_values__scale(struct perf_counts_values *count,
- bool scale, s8 *pscaled);
+void perf_counts_values__scale(struct perf_counts_values *count, bool scale,
+ bool per_pkg, bool snapshot, s8 *pscaled);
void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 39345c2d..514b953 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -202,7 +202,7 @@ static void zero_per_pkg(struct perf_evsel *counter)
}
static int check_per_pkg(struct perf_evsel *counter,
- struct perf_counts_values *vals, int cpu, bool *skip)
+ int cpu, bool *skip)
{
unsigned long *mask = counter->per_pkg_mask;
struct cpu_map *cpus = perf_evsel__cpus(counter);
@@ -224,17 +224,6 @@ static int check_per_pkg(struct perf_evsel *counter,
counter->per_pkg_mask = mask;
}
- /*
- * we do not consider an event that has not run as a good
- * instance to mark a package as used (skip=1). Otherwise
- * we may run into a situation where the first CPU in a package
- * is not running anything, yet the second is, and this function
- * would mark the package as used after the first CPU and would
- * not read the values from the second CPU.
- */
- if (!(vals->run && vals->ena))
- return 0;
-
s = cpu_map__get_socket(cpus, cpu, NULL);
if (s < 0)
return -1;
@@ -249,30 +238,27 @@ static int check_per_pkg(struct perf_evsel *counter,
struct perf_counts_values *count)
{
struct perf_counts_values *aggr = &evsel->counts->aggr;
- static struct perf_counts_values zero;
bool skip = false;
- if (check_per_pkg(evsel, count, cpu, &skip)) {
+ if (check_per_pkg(evsel, cpu, &skip)) {
pr_err("failed to read per-pkg counter\n");
return -1;
}
- if (skip)
- count = &zero;
-
switch (config->aggr_mode) {
case AGGR_THREAD:
case AGGR_CORE:
case AGGR_SOCKET:
case AGGR_NONE:
- if (!evsel->snapshot)
- perf_evsel__compute_deltas(evsel, cpu, thread, count);
- perf_counts_values__scale(count, config->scale, NULL);
+ perf_evsel__compute_deltas(evsel, cpu, thread, count);
+ perf_counts_values__scale(count, config->scale,
+ evsel->per_pkg, evsel->snapshot, NULL);
if (config->aggr_mode == AGGR_NONE)
perf_stat__update_shadow_stats(evsel, count->values, cpu);
break;
case AGGR_GLOBAL:
- aggr->val += count->val;
+ if (!skip)
+ aggr->val += count->val;
if (config->scale) {
aggr->ena += count->ena;
aggr->run += count->run;
@@ -337,9 +323,10 @@ int perf_stat_process_counter(struct perf_stat_config *config,
if (config->aggr_mode != AGGR_GLOBAL)
return 0;
- if (!counter->snapshot)
- perf_evsel__compute_deltas(counter, -1, -1, aggr);
- perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
+ perf_evsel__compute_deltas(counter, -1, -1, aggr);
+ perf_counts_values__scale(aggr, config->scale,
+ counter->per_pkg, counter->snapshot,
+ &counter->counts->scaled);
for (i = 0; i < 3; i++)
update_stats(&ps->res_stats[i], count[i]);
--
1.9.1
prev parent reply other threads:[~2016-12-16 23:14 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-12-16 23:12 [PATCH V4 00/14] Cqm2: Intel Cache Monitoring fixes and enhancements Vikas Shivappa
2016-12-16 23:12 ` [PATCH 01/14] x86/cqm: Intel Resource Monitoring Documentation Vikas Shivappa
2016-12-23 12:32 ` Peter Zijlstra
2016-12-23 19:35 ` Shivappa Vikas
2016-12-23 20:33 ` Peter Zijlstra
2016-12-23 21:41 ` Shivappa Vikas
2016-12-25 1:51 ` Shivappa Vikas
2016-12-27 7:13 ` David Carrillo-Cisneros
2016-12-27 20:00 ` Andi Kleen
2016-12-27 20:21 ` Shivappa Vikas
2016-12-27 21:38 ` David Carrillo-Cisneros
2016-12-27 21:33 ` David Carrillo-Cisneros
2016-12-27 23:10 ` Andi Kleen
2016-12-28 1:23 ` David Carrillo-Cisneros
2016-12-28 20:03 ` Shivappa Vikas
2016-12-16 23:12 ` [PATCH 02/14] x86/cqm: Remove cqm recycling/conflict handling Vikas Shivappa
2016-12-16 23:12 ` [PATCH 03/14] x86/rdt: Add rdt common/cqm compile option Vikas Shivappa
2016-12-16 23:12 ` [PATCH 04/14] x86/cqm: Add Per pkg rmid support Vikas Shivappa
2016-12-16 23:12 ` [PATCH 05/14] x86/cqm,perf/core: Cgroup support prepare Vikas Shivappa
2016-12-16 23:13 ` [PATCH 06/14] x86/cqm: Add cgroup hierarchical monitoring support Vikas Shivappa
2016-12-16 23:13 ` [PATCH 07/14] x86/rdt,cqm: Scheduling support update Vikas Shivappa
2016-12-16 23:13 ` [PATCH 08/14] x86/cqm: Add support for monitoring task and cgroup together Vikas Shivappa
2016-12-16 23:13 ` [PATCH 09/14] x86/cqm: Add Continuous cgroup monitoring Vikas Shivappa
2016-12-16 23:13 ` [PATCH 10/14] x86/cqm: Add RMID reuse Vikas Shivappa
2016-12-16 23:13 ` [PATCH 11/14] x86/cqm: Add failure on open and read Vikas Shivappa
2016-12-23 11:58 ` David Carrillo-Cisneros
2016-12-16 23:13 ` [PATCH 12/14] perf/core,x86/cqm: Add read for Cgroup events,per pkg reads Vikas Shivappa
2016-12-16 23:13 ` [PATCH 13/14] perf/stat: fix bug in handling events in error state Vikas Shivappa
2016-12-16 23:13 ` Vikas Shivappa [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1481929988-31569-15-git-send-email-vikas.shivappa@linux.intel.com \
--to=vikas.shivappa@linux.intel.com \
--cc=andi.kleen@intel.com \
--cc=davidcc@google.com \
--cc=eranian@google.com \
--cc=fenghua.yu@intel.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=peterz@infradead.org \
--cc=ravi.v.shankar@intel.com \
--cc=tglx@linutronix.de \
--cc=tony.luck@intel.com \
--cc=vikas.shivappa@intel.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).