All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V2 0/2] measure SMI cost (user)
@ 2017-05-26 19:05 kan.liang
  2017-05-26 19:05 ` [PATCH V2 1/2] tools lib api fs: Add sysfs__write_int function kan.liang
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: kan.liang @ 2017-05-26 19:05 UTC (permalink / raw)
  To: acme, tglx, mingo, linux-kernel
  Cc: peterz, eranian, jolsa, elliott, ak, Kan Liang

From: Kan Liang <Kan.liang@intel.com>

Currently, there is no way to measure the time cost in System management
mode (SMM) by perf.

Intel perfmon supports FREEZE_WHILE_SMM bit in IA32_DEBUGCTL. Once it sets,
the PMU core counters will freeze on SMI handler. But it will not have an
effect on free running counters. E.g. APERF counter.
The cost of SMI can be measured by (aperf - unhalted core cycles).

A new sysfs entry /sys/device/cpu/freeze_on_smi is introduced to set
FREEZE_WHILE_SMM bit in IA32_DEBUGCTL. (kernel patch, which has been merged.
The commit ID is 6089327f5424f227bb6a8cf92363c2617e054453)

A new --smi-cost mode in perf stat is implemented to measure the SMI cost
by calculating unhalted core cycles and aperf results.

In practice, the percentages of SMI cycles is very useful for performance
oriented analysis. So the output will be SMI cycles% and SMI#.
For users who wants to get the actual value, they can apply --no-metric-only.

Here is an example of default output.

 Performance counter stats for 'sudo echo ':

SMI cycles%          SMI#
    0.1%              1

       0.010858678 seconds time elapsed

Changes since V1:
 - Check path array before trying to open that path. (Robert)
 - Refine change logs

Kan Liang (2):
  tools lib api fs: Add sysfs__write_int function
  perf stat: Add support to measure SMI cost

 tools/lib/api/fs/fs.c                  | 30 +++++++++++++++++++++
 tools/lib/api/fs/fs.h                  |  4 +++
 tools/perf/Documentation/perf-stat.txt | 14 ++++++++++
 tools/perf/builtin-stat.c              | 49 ++++++++++++++++++++++++++++++++++
 tools/perf/util/stat-shadow.c          | 33 +++++++++++++++++++++++
 tools/perf/util/stat.c                 |  2 ++
 tools/perf/util/stat.h                 |  2 ++
 7 files changed, 134 insertions(+)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH V2 1/2] tools lib api fs: Add sysfs__write_int function
  2017-05-26 19:05 [PATCH V2 0/2] measure SMI cost (user) kan.liang
@ 2017-05-26 19:05 ` kan.liang
  2017-06-21 18:17   ` [tip:perf/core] " tip-bot for Kan Liang
  2017-05-26 19:05 ` [PATCH V2 2/2] perf stat: Add support to measure SMI cost kan.liang
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 15+ messages in thread
From: kan.liang @ 2017-05-26 19:05 UTC (permalink / raw)
  To: acme, tglx, mingo, linux-kernel
  Cc: peterz, eranian, jolsa, elliott, ak, Kan Liang

From: Kan Liang <Kan.liang@intel.com>

Adding sysfs__write_int function to ease up writing int to sysfs.
New interface is:

  int sysfs__write_int(const char *entry, int value);

Also, introducing filename__write_int which is useful for new helpers to
write sysctl values.

Signed-off-by: Kan Liang <Kan.liang@intel.com>
---
 tools/lib/api/fs/fs.c | 30 ++++++++++++++++++++++++++++++
 tools/lib/api/fs/fs.h |  4 ++++
 2 files changed, 34 insertions(+)

diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 809c772..a7ecf8f 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -387,6 +387,22 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep)
 	return err;
 }
 
+int filename__write_int(const char *filename, int value)
+{
+	int fd = open(filename, O_WRONLY), err = -1;
+	char buf[64];
+
+	if (fd < 0)
+		return err;
+
+	sprintf(buf, "%d", value);
+	if (write(fd, buf, sizeof(buf)) == sizeof(buf))
+		err = 0;
+
+	close(fd);
+	return err;
+}
+
 int procfs__read_str(const char *entry, char **buf, size_t *sizep)
 {
 	char path[PATH_MAX];
@@ -480,3 +496,17 @@ int sysctl__read_int(const char *sysctl, int *value)
 
 	return filename__read_int(path, value);
 }
+
+int sysfs__write_int(const char *entry, int value)
+{
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	if (snprintf(path, sizeof(path), "%s/%s", sysfs, entry) >= PATH_MAX)
+		return -1;
+
+	return filename__write_int(path, value);
+}
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 956c211..4560534 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -31,6 +31,8 @@ int filename__read_int(const char *filename, int *value);
 int filename__read_ull(const char *filename, unsigned long long *value);
 int filename__read_str(const char *filename, char **buf, size_t *sizep);
 
+int filename__write_int(const char *filename, int value);
+
 int procfs__read_str(const char *entry, char **buf, size_t *sizep);
 
 int sysctl__read_int(const char *sysctl, int *value);
@@ -38,4 +40,6 @@ int sysfs__read_int(const char *entry, int *value);
 int sysfs__read_ull(const char *entry, unsigned long long *value);
 int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
 int sysfs__read_bool(const char *entry, bool *value);
+
+int sysfs__write_int(const char *entry, int value);
 #endif /* __API_FS__ */
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH V2 2/2] perf stat: Add support to measure SMI cost
  2017-05-26 19:05 [PATCH V2 0/2] measure SMI cost (user) kan.liang
  2017-05-26 19:05 ` [PATCH V2 1/2] tools lib api fs: Add sysfs__write_int function kan.liang
@ 2017-05-26 19:05 ` kan.liang
  2017-06-21 18:18   ` [tip:perf/core] " tip-bot for Kan Liang
  2017-05-29 12:46 ` [PATCH V2 0/2] measure SMI cost (user) Jiri Olsa
  2017-06-20 21:43 ` Jiri Olsa
  3 siblings, 1 reply; 15+ messages in thread
From: kan.liang @ 2017-05-26 19:05 UTC (permalink / raw)
  To: acme, tglx, mingo, linux-kernel
  Cc: peterz, eranian, jolsa, elliott, ak, Kan Liang

From: Kan Liang <Kan.liang@intel.com>

Implementing a new --smi-cost mode in perf stat to measure SMI cost.
During the measurement, the /sys/device/cpu/freeze_on_smi will be set.
The measurement can be done with one counter (unhalted core cycles),
and two free running MSR counters (IA32_APERF and SMI_COUNT).

In practice, the percentages of SMI core cycles should be more useful
than absolute value. So the output will be the percentage of SMI core
cycles and SMI#. metric_only will be set by default.

SMI cycles% = (aperf - unhalted core cycles) / aperf

Here is an example output.

 Performance counter stats for 'sudo echo ':

SMI cycles%          SMI#
    0.1%              1

       0.010858678 seconds time elapsed

Users who wants to get the actual value can apply additional
--no-metric-only.

Signed-off-by: Kan Liang <Kan.liang@intel.com>
---
 tools/perf/Documentation/perf-stat.txt | 14 ++++++++++
 tools/perf/builtin-stat.c              | 49 ++++++++++++++++++++++++++++++++++
 tools/perf/util/stat-shadow.c          | 33 +++++++++++++++++++++++
 tools/perf/util/stat.c                 |  2 ++
 tools/perf/util/stat.h                 |  2 ++
 5 files changed, 100 insertions(+)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index bd0e441..151db03 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -239,6 +239,20 @@ taskset.
 --no-merge::
 Do not merge results from same PMUs.
 
+--smi-cost::
+Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
+
+During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
+freeze core counters on SMI.
+The aperf counter will not be effected by the setting.
+The cost of SMI can be measured by (aperf - unhalted core cycles).
+
+In practice, the percentages of SMI cycles is very useful for performance
+oriented analysis. --metric_only will be applied by default.
+The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
+
+Users who wants to get the actual value can apply --no-metric-only.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a935b50..7c1ec3d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -86,6 +86,7 @@
 #define DEFAULT_SEPARATOR	" "
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
+#define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"
 
 static void print_counters(struct timespec *ts, int argc, const char **argv);
 
@@ -122,6 +123,14 @@ static const char * topdown_attrs[] = {
 	NULL,
 };
 
+static const char *smi_cost_attrs = {
+	"{"
+	"msr/aperf/,"
+	"msr/smi/,"
+	"cycles"
+	"}"
+};
+
 static struct perf_evlist	*evsel_list;
 
 static struct target target = {
@@ -137,6 +146,8 @@ static bool			null_run			=  false;
 static int			detailed_run			=  0;
 static bool			transaction_run;
 static bool			topdown_run			= false;
+static bool			smi_cost			= false;
+static bool			smi_reset			= false;
 static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
 static const char		*csv_sep			= NULL;
@@ -1779,6 +1790,8 @@ static const struct option stat_options[] = {
 			"Only print computed metrics. No raw values", enable_metric_only),
 	OPT_BOOLEAN(0, "topdown", &topdown_run,
 			"measure topdown level 1 statistics"),
+	OPT_BOOLEAN(0, "smi-cost", &smi_cost,
+			"measure SMI cost"),
 	OPT_END()
 };
 
@@ -2157,6 +2170,39 @@ static int add_default_attributes(void)
 		return 0;
 	}
 
+	if (smi_cost) {
+		int smi;
+
+		if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
+			fprintf(stderr, "freeze_on_smi is not supported.\n");
+			return -1;
+		}
+
+		if (!smi) {
+			if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
+				fprintf(stderr, "Failed to set freeze_on_smi.\n");
+				return -1;
+			}
+			smi_reset = true;
+		}
+
+		if (pmu_have_event("msr", "aperf") &&
+		    pmu_have_event("msr", "smi")) {
+			if (!force_metric_only)
+				metric_only = true;
+			err = parse_events(evsel_list, smi_cost_attrs, NULL);
+		} else {
+			fprintf(stderr, "To measure SMI cost, it needs "
+				"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
+			return -1;
+		}
+		if (err) {
+			fprintf(stderr, "Cannot set up SMI cost events\n");
+			return -1;
+		}
+		return 0;
+	}
+
 	if (topdown_run) {
 		char *str = NULL;
 		bool warn = false;
@@ -2739,6 +2785,9 @@ int cmd_stat(int argc, const char **argv)
 	perf_stat__exit_aggr_mode();
 	perf_evlist__free_stats(evsel_list);
 out:
+	if (smi_cost && smi_reset)
+		sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
+
 	perf_evlist__delete(evsel_list);
 	return status;
 }
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index ac10cc6..719d6cb 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
 static struct rblist runtime_saved_values;
 static bool have_frontend_stalled;
 
@@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void)
 	memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
 	memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
 	memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
+	memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
+	memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
 
 	next = rb_first(&runtime_saved_values.entries);
 	while (next) {
@@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
 		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
 		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, SMI_NUM))
+		update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, APERF))
+		update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
 
 	if (counter->collect_stat) {
 		struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
@@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu)
 	return sanitize_val(1.0 - sum);
 }
 
+static void print_smi_cost(int cpu, struct perf_evsel *evsel,
+			   struct perf_stat_output_ctx *out)
+{
+	double smi_num, aperf, cycles, cost = 0.0;
+	int ctx = evsel_context(evsel);
+	const char *color = NULL;
+
+	smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
+	aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
+	cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+
+	if ((cycles == 0) || (aperf == 0))
+		return;
+
+	if (smi_num)
+		cost = (aperf - cycles) / aperf * 100.00;
+
+	if (cost > 10)
+		color = PERF_COLOR_RED;
+	out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
+	out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
+}
+
 void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 				   double avg, int cpu,
 				   struct perf_stat_output_ctx *out)
@@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		}
 		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
 		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
+	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
+		print_smi_cost(cpu, evsel, out);
 	} else {
 		print_metric(ctxp, NULL, NULL, NULL, 0);
 	}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c581744..53b9a99 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
 	ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
 	ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
 	ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
+	ID(SMI_NUM, msr/smi/),
+	ID(APERF, msr/aperf/),
 };
 #undef ID
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 0a65ae2..7522bf1 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -22,6 +22,8 @@ enum perf_stat_evsel_id {
 	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
 	PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
 	PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
+	PERF_STAT_EVSEL_ID__SMI_NUM,
+	PERF_STAT_EVSEL_ID__APERF,
 	PERF_STAT_EVSEL_ID__MAX,
 };
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH V2 0/2] measure SMI cost (user)
  2017-05-26 19:05 [PATCH V2 0/2] measure SMI cost (user) kan.liang
  2017-05-26 19:05 ` [PATCH V2 1/2] tools lib api fs: Add sysfs__write_int function kan.liang
  2017-05-26 19:05 ` [PATCH V2 2/2] perf stat: Add support to measure SMI cost kan.liang
@ 2017-05-29 12:46 ` Jiri Olsa
  2017-05-29 12:52   ` Peter Zijlstra
  2017-06-20 21:43 ` Jiri Olsa
  3 siblings, 1 reply; 15+ messages in thread
From: Jiri Olsa @ 2017-05-29 12:46 UTC (permalink / raw)
  To: kan.liang
  Cc: acme, tglx, mingo, linux-kernel, peterz, eranian, jolsa, elliott, ak

On Fri, May 26, 2017 at 12:05:36PM -0700, kan.liang@intel.com wrote:
> From: Kan Liang <Kan.liang@intel.com>
> 
> Currently, there is no way to measure the time cost in System management
> mode (SMM) by perf.
> 
> Intel perfmon supports FREEZE_WHILE_SMM bit in IA32_DEBUGCTL. Once it sets,
> the PMU core counters will freeze on SMI handler. But it will not have an
> effect on free running counters. E.g. APERF counter.
> The cost of SMI can be measured by (aperf - unhalted core cycles).
> 
> A new sysfs entry /sys/device/cpu/freeze_on_smi is introduced to set
> FREEZE_WHILE_SMM bit in IA32_DEBUGCTL. (kernel patch, which has been merged.
> The commit ID is 6089327f5424f227bb6a8cf92363c2617e054453)
> 
> A new --smi-cost mode in perf stat is implemented to measure the SMI cost
> by calculating unhalted core cycles and aperf results.
> 
> In practice, the percentages of SMI cycles is very useful for performance
> oriented analysis. So the output will be SMI cycles% and SMI#.
> For users who wants to get the actual value, they can apply --no-metric-only.
> 
> Here is an example of default output.
> 
>  Performance counter stats for 'sudo echo ':
> 
> SMI cycles%          SMI#
>     0.1%              1
> 
>        0.010858678 seconds time elapsed

for some reason I can't get single SMI count generated,
is there a setup/bench that would provoke that?

other than that, the code looks ok

thanks,
jirka

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH V2 0/2] measure SMI cost (user)
  2017-05-29 12:46 ` [PATCH V2 0/2] measure SMI cost (user) Jiri Olsa
@ 2017-05-29 12:52   ` Peter Zijlstra
  2017-05-29 13:16     ` Jiri Olsa
  0 siblings, 1 reply; 15+ messages in thread
From: Peter Zijlstra @ 2017-05-29 12:52 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: kan.liang, acme, tglx, mingo, linux-kernel, eranian, jolsa, elliott, ak

On Mon, May 29, 2017 at 02:46:37PM +0200, Jiri Olsa wrote:

> for some reason I can't get single SMI count generated,
> is there a setup/bench that would provoke that?

Not having SMIs is a good thing ;-)

Not sure we can tickle them in a reliable way.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH V2 0/2] measure SMI cost (user)
  2017-05-29 12:52   ` Peter Zijlstra
@ 2017-05-29 13:16     ` Jiri Olsa
  2017-05-29 17:06       ` Liang, Kan
  0 siblings, 1 reply; 15+ messages in thread
From: Jiri Olsa @ 2017-05-29 13:16 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: kan.liang, acme, tglx, mingo, linux-kernel, eranian, jolsa, elliott, ak

On Mon, May 29, 2017 at 02:52:39PM +0200, Peter Zijlstra wrote:
> On Mon, May 29, 2017 at 02:46:37PM +0200, Jiri Olsa wrote:
> 
> > for some reason I can't get single SMI count generated,
> > is there a setup/bench that would provoke that?
> 
> Not having SMIs is a good thing ;-)
> 
> Not sure we can tickle them in a reliable way.

yea I saw some counts last time, now just zero
so I was wondering if it's working

jirka

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH V2 0/2] measure SMI cost (user)
  2017-05-29 13:16     ` Jiri Olsa
@ 2017-05-29 17:06       ` Liang, Kan
  2017-06-02 15:45         ` Liang, Kan
  0 siblings, 1 reply; 15+ messages in thread
From: Liang, Kan @ 2017-05-29 17:06 UTC (permalink / raw)
  To: Jiri Olsa, Peter Zijlstra
  Cc: acme, tglx, mingo, linux-kernel, eranian, jolsa, elliott, ak


> 
> On Mon, May 29, 2017 at 02:52:39PM +0200, Peter Zijlstra wrote:
> > On Mon, May 29, 2017 at 02:46:37PM +0200, Jiri Olsa wrote:
> >
> > > for some reason I can't get single SMI count generated, is there a
> > > setup/bench that would provoke that?
> >
> > Not having SMIs is a good thing ;-)
> >
> > Not sure we can tickle them in a reliable way.
> 
> yea I saw some counts last time, now just zero so I was wondering if it's
> working
> 

We have internal test case which can generate SMI, but I cannot publish
the test case. Sorry about that.

The example in the change log is from the real test (Only the SMI cycles%
number is fake). I believe it works.

Thanks,
Kan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH V2 0/2] measure SMI cost (user)
  2017-05-29 17:06       ` Liang, Kan
@ 2017-06-02 15:45         ` Liang, Kan
  2017-06-02 18:27           ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 15+ messages in thread
From: Liang, Kan @ 2017-06-02 15:45 UTC (permalink / raw)
  To: 'Jiri Olsa', 'Peter Zijlstra'
  Cc: 'acme@kernel.org', 'tglx@linutronix.de',
	'mingo@redhat.com',
	'linux-kernel@vger.kernel.org',
	'eranian@google.com', 'jolsa@kernel.org',
	'elliott@hpe.com', 'ak@linux.intel.com'



> >
> > On Mon, May 29, 2017 at 02:52:39PM +0200, Peter Zijlstra wrote:
> > > On Mon, May 29, 2017 at 02:46:37PM +0200, Jiri Olsa wrote:
> > >
> > > > for some reason I can't get single SMI count generated, is there a
> > > > setup/bench that would provoke that?
> > >
> > > Not having SMIs is a good thing ;-)
> > >
> > > Not sure we can tickle them in a reliable way.
> >
> > yea I saw some counts last time, now just zero so I was wondering if
> > it's working
> >
> 
> We have internal test case which can generate SMI, but I cannot publish the
> test case. Sorry about that.
> 

APM_CNT (0xB2) could be used to trigger SMI#.

It's documented in PCH datasheet.
https://www.intel.com/content/dam/www/public/us/en/
documents/datasheets/9-series-chipset-pch-datasheet.pdf

APM_CNT-Advanced Power Management Control Port Register 
I/O Address: B2h 
Attribute: R/W 
Default Value: 00h 
Size: 8 bits
Lockable: No 
Usage: Legacy Only
Power Well: Core
Bit Description
7:0 Used to pass an APM command between the OS and the SMI handler. 
Writes to this port not only store data in the APMC register, 
but also generates an SMI# when the APMC_EN bit is set.

You can write a byte to port 0xB2 to trigger an SMI#

Thanks,
Kan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH V2 0/2] measure SMI cost (user)
  2017-06-02 15:45         ` Liang, Kan
@ 2017-06-02 18:27           ` Arnaldo Carvalho de Melo
  2017-06-14 17:50             ` Liang, Kan
  0 siblings, 1 reply; 15+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-06-02 18:27 UTC (permalink / raw)
  To: Liang, Kan
  Cc: Jiri Olsa, Peter Zijlstra, Thomas Gleixner, Ingo Molnar,
	linux-kernel, Stephane Eranian, Jiri Olsa, elliott, Andi Kleen

Em Fri, Jun 02, 2017 at 03:45:11PM +0000, Liang, Kan escreveu:
> > > On Mon, May 29, 2017 at 02:52:39PM +0200, Peter Zijlstra wrote:
> > > > On Mon, May 29, 2017 at 02:46:37PM +0200, Jiri Olsa wrote:
> > > > > for some reason I can't get single SMI count generated, is there a
> > > > > setup/bench that would provoke that?

> > > > Not having SMIs is a good thing ;-)
> > > > Not sure we can tickle them in a reliable way.

> > > yea I saw some counts last time, now just zero so I was wondering
> > > if it's working

> > We have internal test case which can generate SMI, but I cannot publish the
> > test case. Sorry about that.
 
> APM_CNT (0xB2) could be used to trigger SMI#.

Here if I run the following 'perf stat' command and press the mute
button (the one sharing F1 in a thinkpad t450s it triggers SMIs, toggle
it in quick sucession and it generates more, etc:

[root@jouet ~]# perf stat -I 1000 -e msr/smi/
#           time             counts unit events
     1.000103173                  0      msr/smi/                                                    
     2.000278816                  4      msr/smi/                                                    
     3.000472630                  4      msr/smi/                                                    
     4.000743916                  0      msr/smi/                                                    
     5.001369358                  4      msr/smi/                                                    
     6.001668033                  0      msr/smi/                                                    
     7.001852603                  4      msr/smi/                                                    
     8.002108269                 12      msr/smi/                                                    
     9.002367312                  0      msr/smi/                                                    
^C     9.961897866                  0      msr/smi/                                                    

[root@jouet ~]#

- Arnaldo
 
> It's documented in PCH datasheet.
> https://www.intel.com/content/dam/www/public/us/en/
> documents/datasheets/9-series-chipset-pch-datasheet.pdf
> 
> APM_CNT-Advanced Power Management Control Port Register 
> I/O Address: B2h 
> Attribute: R/W 
> Default Value: 00h 
> Size: 8 bits
> Lockable: No 
> Usage: Legacy Only
> Power Well: Core
> Bit Description
> 7:0 Used to pass an APM command between the OS and the SMI handler. 
> Writes to this port not only store data in the APMC register, 
> but also generates an SMI# when the APMC_EN bit is set.
> 
> You can write a byte to port 0xB2 to trigger an SMI#
> 
> Thanks,
> Kan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH V2 0/2] measure SMI cost (user)
  2017-06-02 18:27           ` Arnaldo Carvalho de Melo
@ 2017-06-14 17:50             ` Liang, Kan
  2017-06-20 13:43               ` Liang, Kan
  0 siblings, 1 reply; 15+ messages in thread
From: Liang, Kan @ 2017-06-14 17:50 UTC (permalink / raw)
  To: Jiri Olsa (jolsa@kernel.org), Arnaldo Carvalho de Melo
  Cc: Jiri Olsa, Peter Zijlstra, Thomas Gleixner, Ingo Molnar,
	linux-kernel, Stephane Eranian, Jiri Olsa, elliott, Andi Kleen

Hi Jirka,

Have you got a chance to try the code?
Are you OK with the patch?

Thanks,
Kan

> 
> Em Fri, Jun 02, 2017 at 03:45:11PM +0000, Liang, Kan escreveu:
> > > > On Mon, May 29, 2017 at 02:52:39PM +0200, Peter Zijlstra wrote:
> > > > > On Mon, May 29, 2017 at 02:46:37PM +0200, Jiri Olsa wrote:
> > > > > > for some reason I can't get single SMI count generated, is
> > > > > > there a setup/bench that would provoke that?
> 
> > > > > Not having SMIs is a good thing ;-) Not sure we can tickle them
> > > > > in a reliable way.
> 
> > > > yea I saw some counts last time, now just zero so I was wondering
> > > > if it's working
> 
> > > We have internal test case which can generate SMI, but I cannot
> > > publish the test case. Sorry about that.
> 
> > APM_CNT (0xB2) could be used to trigger SMI#.
> 
> Here if I run the following 'perf stat' command and press the mute button
> (the one sharing F1 in a thinkpad t450s it triggers SMIs, toggle it in quick
> sucession and it generates more, etc:
> 
> [root@jouet ~]# perf stat -I 1000 -e msr/smi/
> #           time             counts unit events
>      1.000103173                  0      msr/smi/
>      2.000278816                  4      msr/smi/
>      3.000472630                  4      msr/smi/
>      4.000743916                  0      msr/smi/
>      5.001369358                  4      msr/smi/
>      6.001668033                  0      msr/smi/
>      7.001852603                  4      msr/smi/
>      8.002108269                 12      msr/smi/
>      9.002367312                  0      msr/smi/
> ^C     9.961897866                  0      msr/smi/
> 
> [root@jouet ~]#
> 
> - Arnaldo
> 
> > It's documented in PCH datasheet.
> > https://www.intel.com/content/dam/www/public/us/en/
> > documents/datasheets/9-series-chipset-pch-datasheet.pdf
> >
> > APM_CNT-Advanced Power Management Control Port Register I/O Address:
> > B2h
> > Attribute: R/W
> > Default Value: 00h
> > Size: 8 bits
> > Lockable: No
> > Usage: Legacy Only
> > Power Well: Core
> > Bit Description
> > 7:0 Used to pass an APM command between the OS and the SMI handler.
> > Writes to this port not only store data in the APMC register, but also
> > generates an SMI# when the APMC_EN bit is set.
> >
> > You can write a byte to port 0xB2 to trigger an SMI#
> >
> > Thanks,
> > Kan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH V2 0/2] measure SMI cost (user)
  2017-06-14 17:50             ` Liang, Kan
@ 2017-06-20 13:43               ` Liang, Kan
  2017-06-20 20:29                 ` 'Arnaldo Carvalho de Melo'
  0 siblings, 1 reply; 15+ messages in thread
From: Liang, Kan @ 2017-06-20 13:43 UTC (permalink / raw)
  To: Jiri Olsa (jolsa@kernel.org), 'Arnaldo Carvalho de Melo'
  Cc: 'Jiri Olsa', 'Peter Zijlstra',
	'Thomas Gleixner', 'Ingo Molnar',
	'linux-kernel@vger.kernel.org',
	'Stephane Eranian', 'Jiri Olsa',
	'elliott@hpe.com', 'Andi Kleen'

Hi Arnaldo and Jirka,

Ping.
Any comments for the patch?

Thanks,
Kan 

> Subject: RE: [PATCH V2 0/2] measure SMI cost (user)
> 
> Hi Jirka,
> 
> Have you got a chance to try the code?
> Are you OK with the patch?
> 
> Thanks,
> Kan
> 
> >
> > Em Fri, Jun 02, 2017 at 03:45:11PM +0000, Liang, Kan escreveu:
> > > > > On Mon, May 29, 2017 at 02:52:39PM +0200, Peter Zijlstra wrote:
> > > > > > On Mon, May 29, 2017 at 02:46:37PM +0200, Jiri Olsa wrote:
> > > > > > > for some reason I can't get single SMI count generated, is
> > > > > > > there a setup/bench that would provoke that?
> >
> > > > > > Not having SMIs is a good thing ;-) Not sure we can tickle
> > > > > > them in a reliable way.
> >
> > > > > yea I saw some counts last time, now just zero so I was
> > > > > wondering if it's working
> >
> > > > We have internal test case which can generate SMI, but I cannot
> > > > publish the test case. Sorry about that.
> >
> > > APM_CNT (0xB2) could be used to trigger SMI#.
> >
> > Here if I run the following 'perf stat' command and press the mute
> > button (the one sharing F1 in a thinkpad t450s it triggers SMIs,
> > toggle it in quick sucession and it generates more, etc:
> >
> > [root@jouet ~]# perf stat -I 1000 -e msr/smi/
> > #           time             counts unit events
> >      1.000103173                  0      msr/smi/
> >      2.000278816                  4      msr/smi/
> >      3.000472630                  4      msr/smi/
> >      4.000743916                  0      msr/smi/
> >      5.001369358                  4      msr/smi/
> >      6.001668033                  0      msr/smi/
> >      7.001852603                  4      msr/smi/
> >      8.002108269                 12      msr/smi/
> >      9.002367312                  0      msr/smi/
> > ^C     9.961897866                  0      msr/smi/
> >
> > [root@jouet ~]#
> >
> > - Arnaldo
> >
> > > It's documented in PCH datasheet.
> > > https://www.intel.com/content/dam/www/public/us/en/
> > > documents/datasheets/9-series-chipset-pch-datasheet.pdf
> > >
> > > APM_CNT-Advanced Power Management Control Port Register I/O
> Address:
> > > B2h
> > > Attribute: R/W
> > > Default Value: 00h
> > > Size: 8 bits
> > > Lockable: No
> > > Usage: Legacy Only
> > > Power Well: Core
> > > Bit Description
> > > 7:0 Used to pass an APM command between the OS and the SMI handler.
> > > Writes to this port not only store data in the APMC register, but
> > > also generates an SMI# when the APMC_EN bit is set.
> > >
> > > You can write a byte to port 0xB2 to trigger an SMI#
> > >
> > > Thanks,
> > > Kan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH V2 0/2] measure SMI cost (user)
  2017-06-20 13:43               ` Liang, Kan
@ 2017-06-20 20:29                 ` 'Arnaldo Carvalho de Melo'
  0 siblings, 0 replies; 15+ messages in thread
From: 'Arnaldo Carvalho de Melo' @ 2017-06-20 20:29 UTC (permalink / raw)
  To: Liang, Kan
  Cc: Jiri Olsa (jolsa@kernel.org), 'Jiri Olsa',
	'Peter Zijlstra', 'Thomas Gleixner',
	'Ingo Molnar', 'linux-kernel@vger.kernel.org',
	'Stephane Eranian', 'elliott@hpe.com',
	'Andi Kleen'

Em Tue, Jun 20, 2017 at 01:43:56PM +0000, Liang, Kan escreveu:
> Hi Arnaldo and Jirka,
> 
> Ping.
>> Any comments for the patch?

I thought there was a kernel part still outstanding, now I see it was
already merged, will try it and provide comments.

- Arnaldo
 
> Thanks,
> Kan 
> 
> > Subject: RE: [PATCH V2 0/2] measure SMI cost (user)
> > 
> > Hi Jirka,
> > 
> > Have you got a chance to try the code?
> > Are you OK with the patch?
> > 
> > Thanks,
> > Kan
> > 
> > >
> > > Em Fri, Jun 02, 2017 at 03:45:11PM +0000, Liang, Kan escreveu:
> > > > > > On Mon, May 29, 2017 at 02:52:39PM +0200, Peter Zijlstra wrote:
> > > > > > > On Mon, May 29, 2017 at 02:46:37PM +0200, Jiri Olsa wrote:
> > > > > > > > for some reason I can't get single SMI count generated, is
> > > > > > > > there a setup/bench that would provoke that?
> > >
> > > > > > > Not having SMIs is a good thing ;-) Not sure we can tickle
> > > > > > > them in a reliable way.
> > >
> > > > > > yea I saw some counts last time, now just zero so I was
> > > > > > wondering if it's working
> > >
> > > > > We have internal test case which can generate SMI, but I cannot
> > > > > publish the test case. Sorry about that.
> > >
> > > > APM_CNT (0xB2) could be used to trigger SMI#.
> > >
> > > Here if I run the following 'perf stat' command and press the mute
> > > button (the one sharing F1 in a thinkpad t450s it triggers SMIs,
> > > toggle it in quick sucession and it generates more, etc:
> > >
> > > [root@jouet ~]# perf stat -I 1000 -e msr/smi/
> > > #           time             counts unit events
> > >      1.000103173                  0      msr/smi/
> > >      2.000278816                  4      msr/smi/
> > >      3.000472630                  4      msr/smi/
> > >      4.000743916                  0      msr/smi/
> > >      5.001369358                  4      msr/smi/
> > >      6.001668033                  0      msr/smi/
> > >      7.001852603                  4      msr/smi/
> > >      8.002108269                 12      msr/smi/
> > >      9.002367312                  0      msr/smi/
> > > ^C     9.961897866                  0      msr/smi/
> > >
> > > [root@jouet ~]#
> > >
> > > - Arnaldo
> > >
> > > > It's documented in PCH datasheet.
> > > > https://www.intel.com/content/dam/www/public/us/en/
> > > > documents/datasheets/9-series-chipset-pch-datasheet.pdf
> > > >
> > > > APM_CNT-Advanced Power Management Control Port Register I/O
> > Address:
> > > > B2h
> > > > Attribute: R/W
> > > > Default Value: 00h
> > > > Size: 8 bits
> > > > Lockable: No
> > > > Usage: Legacy Only
> > > > Power Well: Core
> > > > Bit Description
> > > > 7:0 Used to pass an APM command between the OS and the SMI handler.
> > > > Writes to this port not only store data in the APMC register, but
> > > > also generates an SMI# when the APMC_EN bit is set.
> > > >
> > > > You can write a byte to port 0xB2 to trigger an SMI#
> > > >
> > > > Thanks,
> > > > Kan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH V2 0/2] measure SMI cost (user)
  2017-05-26 19:05 [PATCH V2 0/2] measure SMI cost (user) kan.liang
                   ` (2 preceding siblings ...)
  2017-05-29 12:46 ` [PATCH V2 0/2] measure SMI cost (user) Jiri Olsa
@ 2017-06-20 21:43 ` Jiri Olsa
  3 siblings, 0 replies; 15+ messages in thread
From: Jiri Olsa @ 2017-06-20 21:43 UTC (permalink / raw)
  To: kan.liang
  Cc: acme, tglx, mingo, linux-kernel, peterz, eranian, jolsa, elliott, ak

On Fri, May 26, 2017 at 12:05:36PM -0700, kan.liang@intel.com wrote:
> From: Kan Liang <Kan.liang@intel.com>
> 
> Currently, there is no way to measure the time cost in System management
> mode (SMM) by perf.
> 
> Intel perfmon supports FREEZE_WHILE_SMM bit in IA32_DEBUGCTL. Once it sets,
> the PMU core counters will freeze on SMI handler. But it will not have an
> effect on free running counters. E.g. APERF counter.
> The cost of SMI can be measured by (aperf - unhalted core cycles).
> 
> A new sysfs entry /sys/device/cpu/freeze_on_smi is introduced to set
> FREEZE_WHILE_SMM bit in IA32_DEBUGCTL. (kernel patch, which has been merged.
> The commit ID is 6089327f5424f227bb6a8cf92363c2617e054453)
> 
> A new --smi-cost mode in perf stat is implemented to measure the SMI cost
> by calculating unhalted core cycles and aperf results.
> 
> In practice, the percentages of SMI cycles is very useful for performance
> oriented analysis. So the output will be SMI cycles% and SMI#.
> For users who wants to get the actual value, they can apply --no-metric-only.
> 
> Here is an example of default output.
> 
>  Performance counter stats for 'sudo echo ':
> 
> SMI cycles%          SMI#
>     0.1%              1
> 
>        0.010858678 seconds time elapsed
> 
> Changes since V1:
>  - Check path array before trying to open that path. (Robert)
>  - Refine change logs
> 
> Kan Liang (2):
>   tools lib api fs: Add sysfs__write_int function
>   perf stat: Add support to measure SMI cost

Acked-by: Jiri Olsa <jolsa@kernel.org>

thanks,
jirka

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [tip:perf/core] tools lib api fs: Add sysfs__write_int function
  2017-05-26 19:05 ` [PATCH V2 1/2] tools lib api fs: Add sysfs__write_int function kan.liang
@ 2017-06-21 18:17   ` tip-bot for Kan Liang
  0 siblings, 0 replies; 15+ messages in thread
From: tip-bot for Kan Liang @ 2017-06-21 18:17 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: kan.liang, peterz, acme, jolsa, hpa, ak, elliott, linux-kernel,
	Kan.liang, tglx, eranian, mingo

Commit-ID:  3b00ea938653d136c8e4bcbe9722d954e128ce2e
Gitweb:     http://git.kernel.org/tip/3b00ea938653d136c8e4bcbe9722d954e128ce2e
Author:     Kan Liang <Kan.liang@intel.com>
AuthorDate: Fri, 26 May 2017 12:05:37 -0700
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Wed, 21 Jun 2017 11:35:27 -0300

tools lib api fs: Add sysfs__write_int function

Add sysfs__write_int() to ease up writing int to sysfs.  New interface
is:

  int sysfs__write_int(const char *entry, int value);

Also, introducing filename__write_int() which is useful for new helpers
to write sysctl values.

Signed-off-by: Kan Liang <Kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Elliott <elliott@hpe.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1495825538-5230-2-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/api/fs/fs.c | 30 ++++++++++++++++++++++++++++++
 tools/lib/api/fs/fs.h |  4 ++++
 2 files changed, 34 insertions(+)

diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 809c772..a7ecf8f 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -387,6 +387,22 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep)
 	return err;
 }
 
+int filename__write_int(const char *filename, int value)
+{
+	int fd = open(filename, O_WRONLY), err = -1;
+	char buf[64];
+
+	if (fd < 0)
+		return err;
+
+	sprintf(buf, "%d", value);
+	if (write(fd, buf, sizeof(buf)) == sizeof(buf))
+		err = 0;
+
+	close(fd);
+	return err;
+}
+
 int procfs__read_str(const char *entry, char **buf, size_t *sizep)
 {
 	char path[PATH_MAX];
@@ -480,3 +496,17 @@ int sysctl__read_int(const char *sysctl, int *value)
 
 	return filename__read_int(path, value);
 }
+
+int sysfs__write_int(const char *entry, int value)
+{
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	if (snprintf(path, sizeof(path), "%s/%s", sysfs, entry) >= PATH_MAX)
+		return -1;
+
+	return filename__write_int(path, value);
+}
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 956c211..4560534 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -31,6 +31,8 @@ int filename__read_int(const char *filename, int *value);
 int filename__read_ull(const char *filename, unsigned long long *value);
 int filename__read_str(const char *filename, char **buf, size_t *sizep);
 
+int filename__write_int(const char *filename, int value);
+
 int procfs__read_str(const char *entry, char **buf, size_t *sizep);
 
 int sysctl__read_int(const char *sysctl, int *value);
@@ -38,4 +40,6 @@ int sysfs__read_int(const char *entry, int *value);
 int sysfs__read_ull(const char *entry, unsigned long long *value);
 int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
 int sysfs__read_bool(const char *entry, bool *value);
+
+int sysfs__write_int(const char *entry, int value);
 #endif /* __API_FS__ */

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [tip:perf/core] perf stat: Add support to measure SMI cost
  2017-05-26 19:05 ` [PATCH V2 2/2] perf stat: Add support to measure SMI cost kan.liang
@ 2017-06-21 18:18   ` tip-bot for Kan Liang
  0 siblings, 0 replies; 15+ messages in thread
From: tip-bot for Kan Liang @ 2017-06-21 18:18 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: acme, tglx, kan.liang, Kan.liang, linux-kernel, eranian, mingo,
	elliott, jolsa, hpa, peterz, ak

Commit-ID:  daefd0bc0bd28cea2e6b2f3e1a9da005cd4f58fc
Gitweb:     http://git.kernel.org/tip/daefd0bc0bd28cea2e6b2f3e1a9da005cd4f58fc
Author:     Kan Liang <Kan.liang@intel.com>
AuthorDate: Fri, 26 May 2017 12:05:38 -0700
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Wed, 21 Jun 2017 11:35:35 -0300

perf stat: Add support to measure SMI cost

Implementing a new --smi-cost mode in perf stat to measure SMI cost.

During the measurement, the /sys/device/cpu/freeze_on_smi will be set.

The measurement can be done with one counter (unhalted core cycles), and
two free running MSR counters (IA32_APERF and SMI_COUNT).

In practice, the percentages of SMI core cycles should be more useful
than absolute value. So the output will be the percentage of SMI core
cycles and SMI#. metric_only will be set by default.

SMI cycles% = (aperf - unhalted core cycles) / aperf

Here is an example output.

 Performance counter stats for 'sudo echo ':

SMI cycles%          SMI#
    0.1%              1

       0.010858678 seconds time elapsed

Users who wants to get the actual value can apply additional
--no-metric-only.

Signed-off-by: Kan Liang <Kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Elliott <elliott@hpe.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1495825538-5230-3-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-stat.txt | 14 ++++++++++
 tools/perf/builtin-stat.c              | 49 ++++++++++++++++++++++++++++++++++
 tools/perf/util/stat-shadow.c          | 33 +++++++++++++++++++++++
 tools/perf/util/stat.c                 |  2 ++
 tools/perf/util/stat.h                 |  2 ++
 5 files changed, 100 insertions(+)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index bd0e441..6980763 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -239,6 +239,20 @@ taskset.
 --no-merge::
 Do not merge results from same PMUs.
 
+--smi-cost::
+Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
+
+During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
+freeze core counters on SMI.
+The aperf counter will not be effected by the setting.
+The cost of SMI can be measured by (aperf - unhalted core cycles).
+
+In practice, the percentages of SMI cycles is very useful for performance
+oriented analysis. --metric_only will be applied by default.
+The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
+
+Users who wants to get the actual value can apply --no-metric-only.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ad9324d1..3243630 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -86,6 +86,7 @@
 #define DEFAULT_SEPARATOR	" "
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
+#define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"
 
 static void print_counters(struct timespec *ts, int argc, const char **argv);
 
@@ -122,6 +123,14 @@ static const char * topdown_attrs[] = {
 	NULL,
 };
 
+static const char *smi_cost_attrs = {
+	"{"
+	"msr/aperf/,"
+	"msr/smi/,"
+	"cycles"
+	"}"
+};
+
 static struct perf_evlist	*evsel_list;
 
 static struct target target = {
@@ -137,6 +146,8 @@ static bool			null_run			=  false;
 static int			detailed_run			=  0;
 static bool			transaction_run;
 static bool			topdown_run			= false;
+static bool			smi_cost			= false;
+static bool			smi_reset			= false;
 static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
 static const char		*csv_sep			= NULL;
@@ -1782,6 +1793,8 @@ static const struct option stat_options[] = {
 			"Only print computed metrics. No raw values", enable_metric_only),
 	OPT_BOOLEAN(0, "topdown", &topdown_run,
 			"measure topdown level 1 statistics"),
+	OPT_BOOLEAN(0, "smi-cost", &smi_cost,
+			"measure SMI cost"),
 	OPT_END()
 };
 
@@ -2160,6 +2173,39 @@ static int add_default_attributes(void)
 		return 0;
 	}
 
+	if (smi_cost) {
+		int smi;
+
+		if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
+			fprintf(stderr, "freeze_on_smi is not supported.\n");
+			return -1;
+		}
+
+		if (!smi) {
+			if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
+				fprintf(stderr, "Failed to set freeze_on_smi.\n");
+				return -1;
+			}
+			smi_reset = true;
+		}
+
+		if (pmu_have_event("msr", "aperf") &&
+		    pmu_have_event("msr", "smi")) {
+			if (!force_metric_only)
+				metric_only = true;
+			err = parse_events(evsel_list, smi_cost_attrs, NULL);
+		} else {
+			fprintf(stderr, "To measure SMI cost, it needs "
+				"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
+			return -1;
+		}
+		if (err) {
+			fprintf(stderr, "Cannot set up SMI cost events\n");
+			return -1;
+		}
+		return 0;
+	}
+
 	if (topdown_run) {
 		char *str = NULL;
 		bool warn = false;
@@ -2742,6 +2788,9 @@ int cmd_stat(int argc, const char **argv)
 	perf_stat__exit_aggr_mode();
 	perf_evlist__free_stats(evsel_list);
 out:
+	if (smi_cost && smi_reset)
+		sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
+
 	perf_evlist__delete(evsel_list);
 	return status;
 }
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index ac10cc6..719d6cb 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
 static struct rblist runtime_saved_values;
 static bool have_frontend_stalled;
 
@@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void)
 	memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
 	memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
 	memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
+	memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
+	memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
 
 	next = rb_first(&runtime_saved_values.entries);
 	while (next) {
@@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
 		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
 		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, SMI_NUM))
+		update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, APERF))
+		update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
 
 	if (counter->collect_stat) {
 		struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
@@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu)
 	return sanitize_val(1.0 - sum);
 }
 
+static void print_smi_cost(int cpu, struct perf_evsel *evsel,
+			   struct perf_stat_output_ctx *out)
+{
+	double smi_num, aperf, cycles, cost = 0.0;
+	int ctx = evsel_context(evsel);
+	const char *color = NULL;
+
+	smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
+	aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
+	cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+
+	if ((cycles == 0) || (aperf == 0))
+		return;
+
+	if (smi_num)
+		cost = (aperf - cycles) / aperf * 100.00;
+
+	if (cost > 10)
+		color = PERF_COLOR_RED;
+	out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
+	out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
+}
+
 void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 				   double avg, int cpu,
 				   struct perf_stat_output_ctx *out)
@@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		}
 		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
 		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
+	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
+		print_smi_cost(cpu, evsel, out);
 	} else {
 		print_metric(ctxp, NULL, NULL, NULL, 0);
 	}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c581744..53b9a99 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
 	ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
 	ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
 	ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
+	ID(SMI_NUM, msr/smi/),
+	ID(APERF, msr/aperf/),
 };
 #undef ID
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 0a65ae2..7522bf1 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -22,6 +22,8 @@ enum perf_stat_evsel_id {
 	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
 	PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
 	PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
+	PERF_STAT_EVSEL_ID__SMI_NUM,
+	PERF_STAT_EVSEL_ID__APERF,
 	PERF_STAT_EVSEL_ID__MAX,
 };
 

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-06-21 18:22 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-26 19:05 [PATCH V2 0/2] measure SMI cost (user) kan.liang
2017-05-26 19:05 ` [PATCH V2 1/2] tools lib api fs: Add sysfs__write_int function kan.liang
2017-06-21 18:17   ` [tip:perf/core] " tip-bot for Kan Liang
2017-05-26 19:05 ` [PATCH V2 2/2] perf stat: Add support to measure SMI cost kan.liang
2017-06-21 18:18   ` [tip:perf/core] " tip-bot for Kan Liang
2017-05-29 12:46 ` [PATCH V2 0/2] measure SMI cost (user) Jiri Olsa
2017-05-29 12:52   ` Peter Zijlstra
2017-05-29 13:16     ` Jiri Olsa
2017-05-29 17:06       ` Liang, Kan
2017-06-02 15:45         ` Liang, Kan
2017-06-02 18:27           ` Arnaldo Carvalho de Melo
2017-06-14 17:50             ` Liang, Kan
2017-06-20 13:43               ` Liang, Kan
2017-06-20 20:29                 ` 'Arnaldo Carvalho de Melo'
2017-06-20 21:43 ` Jiri Olsa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.