All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andi Kleen <andi@firstfloor.org>
To: speck@linutronix.de
Cc: Andi Kleen <ak@linux.intel.com>
Subject: [MODERATED] [PATCH v3 2/6] PERFv3
Date: Thu,  7 Feb 2019 15:41:04 -0800	[thread overview]
Message-ID: <3dd5d6e2bc9ac53f826c251c68ce84fcc79a6872.1549582769.git.ak@linux.intel.com> (raw)
In-Reply-To: <cover.1549582769.git.ak@linux.intel.com>
In-Reply-To: <cover.1549582769.git.ak@linux.intel.com>

From: Andi Kleen <ak@linux.intel.com>
Subject:  x86/pmu/intel: Handle TSX with counter 3 on Skylake

Most of the code is from Peter Ziljstra at this point,
based on earlier code from AK.

On Skylake with recent microcode updates due to errata XXX
perfmon general purpose counter 3 can be corrupted when RTM transactions
are executed.

The microcode provides a new MSR to force disable RTM
(make all RTM transactions abort).

This patch adds the low level code to manage this MSR.
Depending on a global flag (/sys/devices/cpu/enable_all_counters)
schedule or not schedule events on generic counter 3.

When the flag is set, and an event uses counter 3, disable TSX
while the event is active.

This patch assumes that the kernel is using
RETPOLINE (or IBRS), otherwise speculative execution could
still corrupt counter 3 in very unlikely cases.

The enable_all_counters flag default is set to zero in this
patch. This default could be changed.

The trade offs for setting the option default are:

    Using 4 (or 8 with HT off) events in perf versus
    allowing RTM usage while perf is active.

    - Existing programs that use perf groups with 4 counters
    may not retrieve perfmon data anymore. Perf usages
    that use less than four (or 7 with HT off) counters
    are not impacted. Perf usages that don't use group
    will still work, but will see increase multiplexing.

    - TSX programs should not functionally break from
    forcing RTM to abort because they always need a valid
    fall back path. However they will see significantly
    lower performance if they rely on TSX for performance
    (all RTM transactions will run and only abort at the end),
    potentially slowing them down so much that it is
    equivalent to functional breakage.

Signed-off-by: Andi Kleen <ak@linux.intel.com>

---
v2:
Use u8 instead of bool
Rename force_rtm_abort_active.
v3:
Use correct patch version that actually compiles.
v4:
Switch to Peter's implementation with some updates by AK.
Now the TFA state is checked for in enable_all,
and the extra mask is handled by get_constraint
Use a temporary constraint instead of modifying the globals.
---
 arch/x86/events/core.c             |  6 ++-
 arch/x86/events/intel/core.c       | 64 +++++++++++++++++++++++++++++-
 arch/x86/events/perf_event.h       | 10 ++++-
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/msr-index.h   |  5 +++
 5 files changed, 83 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 58e659bfc2d9..f5d1435c6071 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2252,7 +2252,11 @@ static ssize_t num_counter_show(struct device *cdev,
 				  struct device_attribute *attr,
 				  char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.num_counters);
+	int num = x86_pmu.num_counters;
+	if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) &&
+		perf_enable_all_counters && num > 0)
+		num--;
+	return snprintf(buf, PAGE_SIZE, "%d\n", num);
 }
 
 static DEVICE_ATTR_RO(num_counter);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index daafb893449b..b4162b4b0899 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1999,6 +1999,30 @@ static void intel_pmu_nhm_enable_all(int added)
 	intel_pmu_enable_all(added);
 }
 
+static void intel_skl_pmu_enable_all(int added)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	u64 val;
+
+	/*
+	 * The perf code is not expected to execute RTM instructions
+	 * (and also cannot misspeculate into them due to RETPOLINE
+	 * use), so PMC3 should be 'stable'; IOW the values we
+	 * just potentially programmed into it, should still be there.
+	 *
+	 * If we programmed PMC3, make sure to set TFA before we make
+	 * things go and possibly encounter RTM instructions.
+	 * Similarly, if PMC3 got unused, make sure to clear TFA.
+	 */
+	val = MSR_TFA_RTM_FORCE_ABORT * test_bit(3, cpuc->active_mask);
+	if (cpuc->tfa_shadow != val) {
+		cpuc->tfa_shadow = val;
+		wrmsrl(MSR_TSX_FORCE_ABORT, val);
+	}
+
+	intel_pmu_enable_all(added);
+}
+
 static void enable_counter_freeze(void)
 {
 	update_debugctlmsr(get_debugctlmsr() |
@@ -3345,6 +3369,34 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 	return c;
 }
 
+bool perf_enable_all_counters __read_mostly;
+
+/*
+ * On Skylake counter 3 may get corrupted when RTM is used.
+ * Either avoid counter 3, or disable RTM when counter 3 used.
+ */
+
+static struct event_constraint *
+skl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event)
+{
+	struct event_constraint *c;
+
+	c = hsw_get_event_constraints(cpuc, idx, event);
+
+	if (!perf_enable_all_counters) {
+		cpuc->counter3_constraint = *c;
+		c = &cpuc->counter3_constraint;
+
+		/*
+		 * Without TFA we must not use PMC3.
+		 */
+		__clear_bit(3, c->idxmsk);
+	}
+
+	return c;
+}
+
 /*
  * Broadwell:
  *
@@ -4061,8 +4113,11 @@ static struct attribute *intel_pmu_caps_attrs[] = {
        NULL
 };
 
+DEVICE_BOOL_ATTR(enable_all_counters, 0644, perf_enable_all_counters);
+
 static struct attribute *intel_pmu_attrs[] = {
 	&dev_attr_freeze_on_smi.attr,
+	NULL,	/* May be overriden with enable_all_counters */
 	NULL,
 };
 
@@ -4543,9 +4598,16 @@ __init int intel_pmu_init(void)
 		/* all extra regs are per-cpu when HT is on */
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+		if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
+			x86_pmu.enable_all = intel_skl_pmu_enable_all;
+			intel_pmu_attrs[1] = &dev_attr_enable_all_counters.attr.attr;
+			x86_pmu.get_event_constraints = skl_get_event_constraints;
+			/* Could add checking&warning for !RETPOLINE here */
+		} else {
+			x86_pmu.get_event_constraints = hsw_get_event_constraints;
+		}
 
 		x86_pmu.hw_config = hsw_hw_config;
-		x86_pmu.get_event_constraints = hsw_get_event_constraints;
 		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
 			hsw_format_attr : nhm_format_attr;
 		extra_attr = merge_attr(extra_attr, skl_format_attr);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 78d7b7031bfc..2474ebfad961 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -70,7 +70,7 @@ struct event_constraint {
 #define PERF_X86_EVENT_EXCL_ACCT	0x0200 /* accounted EXCL event */
 #define PERF_X86_EVENT_AUTO_RELOAD	0x0400 /* use PEBS auto-reload */
 #define PERF_X86_EVENT_LARGE_PEBS	0x0800 /* use large PEBS */
-
+#define PERF_X86_EVENT_ABORT_TSX	0x1000 /* force abort TSX */
 
 struct amd_nb {
 	int nb_id;  /* NorthBridge id */
@@ -242,6 +242,12 @@ struct cpu_hw_events {
 	struct intel_excl_cntrs		*excl_cntrs;
 	int excl_thread_id; /* 0 or 1 */
 
+	/*
+	 * Manage using counter 3 on Skylake with TSX.
+	 */
+	int				tfa_shadow;
+	struct event_constraint		counter3_constraint;
+
 	/*
 	 * AMD specific bits
 	 */
@@ -998,6 +1004,8 @@ static inline int is_ht_workaround_enabled(void)
 	return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
 }
 
+extern bool perf_enable_all_counters;
+
 #else /* CONFIG_CPU_SUP_INTEL */
 
 static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 6d6122524711..981ff9479648 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -344,6 +344,7 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
 #define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 8e40c2446fd1..492b18720dba 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -666,6 +666,11 @@
 
 #define MSR_IA32_TSC_DEADLINE		0x000006E0
 
+#define MSR_TSX_FORCE_ABORT		0x0000010F
+
+#define MSR_TFA_RTM_FORCE_ABORT_BIT	0
+#define MSR_TFA_RTM_FORCE_ABORT		BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT)
+
 /* P4/Xeon+ specific */
 #define MSR_IA32_MCG_EAX		0x00000180
 #define MSR_IA32_MCG_EBX		0x00000181
-- 
2.17.2

  parent reply	other threads:[~2019-02-07 23:41 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-07 23:41 [MODERATED] [PATCH v3 0/6] PERFv3 Andi Kleen
2019-02-07 23:41 ` [MODERATED] [PATCH v3 1/6] PERFv3 Andi Kleen
2019-02-08  8:45   ` [MODERATED] " Peter Zijlstra
2019-02-07 23:41 ` Andi Kleen [this message]
2019-02-08  0:51   ` [MODERATED] Re: [SUSPECTED SPAM][PATCH v3 2/6] PERFv3 Andrew Cooper
2019-02-08  9:01     ` Peter Zijlstra
2019-02-08  9:31       ` [MODERATED] Re: [PATCH " Andrew Cooper
2019-02-08  9:39       ` [MODERATED] Re: [SUSPECTED SPAM][PATCH " Peter Zijlstra
2019-02-08 10:53         ` [MODERATED] [RFC][PATCH] performance walnuts Peter Zijlstra
2019-02-08 18:07           ` [MODERATED] " Andi Kleen
2019-02-11 10:40             ` Peter Zijlstra
2019-02-11 14:06               ` Thomas Gleixner
2019-02-11 20:17                 ` [MODERATED] " Konrad Rzeszutek Wilk
2019-02-11 23:39                   ` Thomas Gleixner
2019-02-09  0:28           ` [MODERATED] " Linus Torvalds
2019-02-09  4:34             ` Andi Kleen
2019-02-09  8:57             ` Peter Zijlstra
2019-02-13  2:56           ` mark gross
2019-02-15 17:32             ` mark gross
2019-02-15 17:44               ` Peter Zijlstra
2019-02-15 20:47                 ` mark gross
2019-02-15 21:33                   ` Thomas Gleixner
2019-02-19 13:35               ` [MODERATED] [RFC][PATCH v2] " Peter Zijlstra
2019-02-15 23:45           ` [MODERATED] Encrypted Message Jon Masters
2019-02-08  8:50   ` [MODERATED] Re: [PATCH v3 2/6] PERFv3 Peter Zijlstra
2019-02-08 17:26     ` Andi Kleen
2019-02-07 23:41 ` [MODERATED] [PATCH v3 3/6] PERFv3 Andi Kleen
2019-02-08  9:02   ` [MODERATED] " Peter Zijlstra
2019-02-07 23:41 ` [MODERATED] [PATCH v3 4/6] PERFv3 Andi Kleen
2019-02-07 23:41 ` [MODERATED] [PATCH v3 5/6] PERFv3 Andi Kleen
2019-02-08  0:54   ` [MODERATED] " Andrew Cooper
2019-02-07 23:41 ` [MODERATED] [PATCH v3 6/6] PERFv3 Andi Kleen
2019-02-08  9:07   ` [MODERATED] " Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3dd5d6e2bc9ac53f826c251c68ce84fcc79a6872.1549582769.git.ak@linux.intel.com \
    --to=andi@firstfloor.org \
    --cc=ak@linux.intel.com \
    --cc=speck@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.