historical-speck.lore.kernel.org archive mirror
 help / color / mirror / Atom feed
* [MODERATED] [PATCH 1/2] v3 more sampling fun 1
  2020-03-11 15:39 [MODERATED] [PATCH 0/2] v3 more sampling fun 0 mark gross
@ 2020-01-16 22:16 ` mark gross
  2020-01-30 19:12 ` [MODERATED] [PATCH 2/2] v3 more sampling fun 2 mark gross
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 19+ messages in thread
From: mark gross @ 2020-01-16 22:16 UTC (permalink / raw)
  To: speck

From: mark gross <mgross@linux.intel.com>
Subject: [PATCH 1/2] x86/speculation: Special Register Buffer Data Sampling
 (SRBDS) mitigation control.

SRBDS is an MDS-like speculative side channel that can leak bits from
the RNG across cores and threads. New microcode serializes the processor
access during the execution of RDRAND and RDSEED ensures that the shared
buffer is overwritten before it is released for reuse.

This vulnerability comes in two classes:
Processor models that are vulnerable and models with TSX enumerating
MDS_NO which are only vulnerable when TSX is enabled.

The mitigation is activated by default on affected processors and it
increases latency for RDRAND and RDSEED instructions.  Among other
effects this will reduce throughput from /dev/urandom.

This patch:
* enables administrator to configure the mitigation off when desired
  using either mitigations=off or srbds=off.
* exports vulnerability status via sysfs

Signed-off-by: Mark Gross <mgross@linux.intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
---
 .../admin-guide/kernel-parameters.txt         |  11 ++
 arch/x86/include/asm/cpu_device_id.h          |  12 ++
 arch/x86/include/asm/cpufeatures.h            |   2 +
 arch/x86/include/asm/msr-index.h              |   4 +
 arch/x86/kernel/cpu/bugs.c                    | 110 ++++++++++++++++++
 arch/x86/kernel/cpu/common.c                  |  41 +++++++
 arch/x86/kernel/cpu/cpu.h                     |  13 +++
 arch/x86/kernel/cpu/intel.c                   |   2 +
 arch/x86/kernel/cpu/match.c                   |  26 +++++
 drivers/base/cpu.c                            |   8 ++
 10 files changed, 229 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index c07815d230bc..71cf7e7eae45 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4659,6 +4659,17 @@
 	spia_pedr=
 	spia_peddr=
 
+	srbds=		[x86]
+			Special Register Buffer Data Sampling mitigation
+			control.
+
+			On CPUs vulnerable to this issue and users impacted by
+			the mitigation slowing down RDRAND and RDSEED can
+			disable by setting this:
+
+			off:	disable mitigation and remove performance
+				impact to rdrand and rdseed
+
 	srcutree.counter_wrap_check [KNL]
 			Specifies how frequently to check for
 			grace-period sequence counter wrap for the
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index 31c379c1da41..1128e211cdb6 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -35,7 +35,19 @@ struct x86_cpu_desc {
 	.x86_microcode_rev	= (revision),			\
 }
 
+/*
+ * Match a range of steppings
+ */
+
+struct x86_cpu_id_ext {
+	struct x86_cpu_id id;
+	__u16 steppings; /* bit map of steppings to match against */
+};
+
+#define X86_STEPPING_ANY GENMASK(15, 0)
+
 extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
+const struct x86_cpu_id_ext *x86_match_cpu_ext(const struct x86_cpu_id_ext *match);
 extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table);
 
 #endif /* _ASM_X86_CPU_DEVICE_ID */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f3327cb56edf..69f7dcb1fa5c 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -360,6 +360,7 @@
 #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
 #define X86_FEATURE_FSRM		(18*32+ 4) /* Fast Short Rep Mov */
 #define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
+#define X86_FEATURE_SRBDS_CTRL		(18*32+ 9) /* "" SRBDS mitigation MSR available */
 #define X86_FEATURE_MD_CLEAR		(18*32+10) /* VERW clears CPU buffers */
 #define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
@@ -404,5 +405,6 @@
 #define X86_BUG_SWAPGS			X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
 #define X86_BUG_TAA			X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
 #define X86_BUG_ITLB_MULTIHIT		X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+#define X86_BUG_SRBDS			X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d5e517d1c3dd..af64c8e80ff4 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -119,6 +119,10 @@
 #define TSX_CTRL_RTM_DISABLE		BIT(0)	/* Disable RTM feature */
 #define TSX_CTRL_CPUID_CLEAR		BIT(1)	/* Disable TSX enumeration */
 
+/* SRBDS support */
+#define MSR_IA32_MCU_OPT_CTRL		0x00000123
+#define RNGDS_MITG_DIS			BIT(0)
+
 #define MSR_IA32_SYSENTER_CS		0x00000174
 #define MSR_IA32_SYSENTER_ESP		0x00000175
 #define MSR_IA32_SYSENTER_EIP		0x00000176
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ed54b3b21c39..48fdc01eb95c 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -41,6 +41,7 @@ static void __init l1tf_select_mitigation(void);
 static void __init mds_select_mitigation(void);
 static void __init mds_print_mitigation(void);
 static void __init taa_select_mitigation(void);
+static void __init srbds_select_mitigation(void);
 
 /* The base value of the SPEC_CTRL MSR that always has to be preserved. */
 u64 x86_spec_ctrl_base;
@@ -108,6 +109,7 @@ void __init check_bugs(void)
 	l1tf_select_mitigation();
 	mds_select_mitigation();
 	taa_select_mitigation();
+	srbds_select_mitigation();
 
 	/*
 	 * As MDS and TAA mitigations are inter-related, print MDS
@@ -397,6 +399,101 @@ static int __init tsx_async_abort_parse_cmdline(char *str)
 }
 early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
 
+#undef pr_fmt
+#define pr_fmt(fmt)	"SRBDS: " fmt
+
+enum srbds_mitigations srbds_mitigation __ro_after_init = SRBDS_MITIGATION_FULL;
+static const char * const srbds_strings[] = {
+	[SRBDS_NOT_AFFECTED]		= "Not affected",
+	[SRBDS_MITIGATION_OFF]		= "Vulnerable",
+	[SRBDS_MITIGATION_UCODE_NEEDED]	= "Vulnerable: no microcode",
+	[SRBDS_MITIGATION_FULL]		= "Mitigated",
+	[SRBDS_TSX_NOT_AFFECTED]	= "Not affected (TSX disabled)",
+	[SRBDS_HYPERVISOR]		= "Unknown",
+};
+
+static bool srbds_off;
+
+void srbds_configure_mitigation(void)
+{
+	u64 mcu_ctrl;
+
+	if (srbds_mitigation == SRBDS_NOT_AFFECTED)
+		return;
+
+	if (srbds_mitigation == SRBDS_HYPERVISOR)
+		return;
+
+	if (srbds_mitigation == SRBDS_MITIGATION_UCODE_NEEDED)
+		return;
+
+	rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+
+	switch (srbds_mitigation) {
+	case SRBDS_MITIGATION_OFF:
+	case SRBDS_TSX_NOT_AFFECTED:
+		mcu_ctrl |= RNGDS_MITG_DIS;
+		break;
+	case SRBDS_MITIGATION_FULL:
+		mcu_ctrl &= ~RNGDS_MITG_DIS;
+		break;
+	default:
+		break;
+	}
+
+	wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+}
+
+static void __init srbds_select_mitigation(void)
+{
+	u64 ia32_cap;
+
+	if (!boot_cpu_has_bug(X86_BUG_SRBDS)) {
+		srbds_mitigation = SRBDS_NOT_AFFECTED;
+		return;
+	}
+
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+		srbds_mitigation = SRBDS_HYPERVISOR;
+		return;
+	}
+
+	if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL)) {
+		srbds_mitigation = SRBDS_MITIGATION_UCODE_NEEDED;
+		return;
+	}
+
+	if (boot_cpu_has_bug(X86_BUG_SRBDS)) {
+		srbds_mitigation = SRBDS_MITIGATION_FULL;
+
+		ia32_cap = x86_read_arch_cap_msr();
+		if (ia32_cap & ARCH_CAP_MDS_NO) {
+			if (!boot_cpu_has(X86_FEATURE_RTM))
+				srbds_mitigation = SRBDS_TSX_NOT_AFFECTED;
+		}
+	}
+
+	if (cpu_mitigations_off() || srbds_off) {
+		if (srbds_mitigation != SRBDS_TSX_NOT_AFFECTED)
+			srbds_mitigation = SRBDS_MITIGATION_OFF;
+	}
+
+	srbds_configure_mitigation();
+}
+
+static int __init srbds_parse_cmdline(char *str)
+{
+	if (!str)
+		return -EINVAL;
+
+	if (!strcmp(str, "off"))
+		srbds_off = true;
+
+	return 0;
+}
+
+early_param("srbds", srbds_parse_cmdline);
+
 #undef pr_fmt
 #define pr_fmt(fmt)     "Spectre V1 : " fmt
 
@@ -1528,6 +1625,11 @@ static char *ibpb_state(void)
 	return "";
 }
 
+static ssize_t srbds_show_state(char *buf)
+{
+	return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
+}
+
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
 			       char *buf, unsigned int bug)
 {
@@ -1572,6 +1674,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 	case X86_BUG_ITLB_MULTIHIT:
 		return itlb_multihit_show_state(buf);
 
+	case X86_BUG_SRBDS:
+		return srbds_show_state(buf);
+
 	default:
 		break;
 	}
@@ -1618,4 +1723,9 @@ ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr
 {
 	return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT);
 }
+
+ssize_t cpu_show_special_register_data_sampling(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS);
+}
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4cdb123ff66a..055ccbe6b364 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1007,6 +1007,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
 #define NO_SWAPGS		BIT(6)
 #define NO_ITLB_MULTIHIT	BIT(7)
 #define NO_SPECTRE_V2		BIT(8)
+#define SRBDS			BIT(9)
 
 #define VULNWL(_vendor, _family, _model, _whitelist)	\
 	{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
@@ -1020,6 +1021,15 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
 #define VULNWL_HYGON(family, whitelist)		\
 	VULNWL(HYGON, family, X86_MODEL_ANY, whitelist)
 
+#define VULNWL_EXT(_vendor, _family, _model, _steppings, _whitelist)	\
+	{ VULNWL(_vendor, _family, _model, _whitelist), _steppings }
+
+#define VULNWL_INTEL_EXT(model,  whitelist)		\
+	VULNWL_EXT(INTEL, 6, INTEL_FAM6_##model, X86_STEPPING_ANY, whitelist)
+
+#define VULNWL_INTEL_STEPPING(model, stepping, whitelist)		\
+	VULNWL_EXT(INTEL, 6, INTEL_FAM6_##model, stepping, whitelist)
+
 static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
 	VULNWL(ANY,	4, X86_MODEL_ANY,	NO_SPECULATION),
 	VULNWL(CENTAUR,	5, X86_MODEL_ANY,	NO_SPECULATION),
@@ -1075,6 +1085,27 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
 	{}
 };
 
+/*
+ * to avoide corrupting the whiltelist with blacklist items lets create a list
+ * of affected processors for issues that cannot be enumerated other than by
+ * family/model/stepping
+ */
+static const struct x86_cpu_id_ext affected_cpus[] __initconst = {
+	VULNWL_INTEL_EXT(IVYBRIDGE,		SRBDS),
+	VULNWL_INTEL_EXT(HASWELL,		SRBDS),
+	VULNWL_INTEL_EXT(HASWELL_L,		SRBDS),
+	VULNWL_INTEL_EXT(HASWELL_G,		SRBDS),
+	VULNWL_INTEL_EXT(BROADWELL_G,		SRBDS),
+	VULNWL_INTEL_EXT(BROADWELL,		SRBDS),
+	VULNWL_INTEL_EXT(SKYLAKE_L,		SRBDS),
+	VULNWL_INTEL_EXT(SKYLAKE,		SRBDS),
+	VULNWL_INTEL_STEPPING(KABYLAKE_L, GENMASK(0xA, 0),	SRBDS), /*06_8E steppings <=A*/
+	VULNWL_INTEL_STEPPING(KABYLAKE_L, GENMASK(0xC, 0xB),	SRBDS), /*06_8E stepping = 0xB|0xC if TSX enabled*/
+	VULNWL_INTEL_STEPPING(KABYLAKE, GENMASK(0xB, 0),	SRBDS), /*06_9E steppings <=B*/
+	VULNWL_INTEL_STEPPING(KABYLAKE, GENMASK(0xD, 0xC),	SRBDS), /*06_9E stepping = 0xC if TSX enabled*/
+	{}
+};
+
 static bool __init cpu_matches(unsigned long which)
 {
 	const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist);
@@ -1082,6 +1113,13 @@ static bool __init cpu_matches(unsigned long which)
 	return m && !!(m->driver_data & which);
 }
 
+static bool __init cpu_affected(unsigned long which)
+{
+	const struct x86_cpu_id_ext *m = x86_match_cpu_ext(affected_cpus);
+
+	return m && !!(m->id.driver_data & which);
+}
+
 u64 x86_read_arch_cap_msr(void)
 {
 	u64 ia32_cap = 0;
@@ -1124,6 +1162,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 	if (!cpu_matches(NO_SWAPGS))
 		setup_force_cpu_bug(X86_BUG_SWAPGS);
 
+	if (cpu_affected(SRBDS))
+		setup_force_cpu_bug(X86_BUG_SRBDS);
+
 	/*
 	 * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
 	 *	- TSX is supported or
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 37fdefd14f28..22d419080fd6 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -44,7 +44,20 @@ struct _tlb_table {
 extern const struct cpu_dev *const __x86_cpu_dev_start[],
 			    *const __x86_cpu_dev_end[];
 
+enum srbds_mitigations {
+	SRBDS_NOT_AFFECTED,
+	SRBDS_MITIGATION_OFF,
+	SRBDS_MITIGATION_UCODE_NEEDED,
+	SRBDS_MITIGATION_FULL,
+	SRBDS_TSX_NOT_AFFECTED,
+	SRBDS_HYPERVISOR,
+};
+
+extern __ro_after_init enum srbds_mitigations srbds_mitigation;
+void srbds_configure_mitigation(void);
+
 #ifdef CONFIG_CPU_SUP_INTEL
+
 enum tsx_ctrl_states {
 	TSX_CTRL_ENABLE,
 	TSX_CTRL_DISABLE,
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index be82cd5841c3..1b083a2a415b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -684,6 +684,8 @@ static void init_intel(struct cpuinfo_x86 *c)
 		tsx_enable();
 	if (tsx_ctrl_state == TSX_CTRL_DISABLE)
 		tsx_disable();
+
+	srbds_configure_mitigation();
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 6dd78d8235e4..118c503b1c36 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -49,6 +49,32 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
 }
 EXPORT_SYMBOL(x86_match_cpu);
 
+/*
+ * Extend x86_match_cpu to support matching a range of steppings.
+ */
+const struct x86_cpu_id_ext *x86_match_cpu_ext(const struct x86_cpu_id_ext *match)
+{
+	const struct x86_cpu_id_ext *m;
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	for (m = match; m->id.vendor | m->id.family | m->id.model | m->id.feature; m++) {
+		if (m->id.vendor != X86_VENDOR_ANY && c->x86_vendor != m->id.vendor)
+			continue;
+		if (m->id.family != X86_FAMILY_ANY && c->x86 != m->id.family)
+			continue;
+		if (m->id.model != X86_MODEL_ANY && c->x86_model != m->id.model)
+			continue;
+		if (m->steppings != X86_STEPPING_ANY &&
+		    !(BIT(c->x86_stepping) & m->steppings))
+			continue;
+		if (m->id.feature != X86_FEATURE_ANY && !cpu_has(c, m->id.feature))
+			continue;
+		return m;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(x86_match_cpu_ext);
+
 static const struct x86_cpu_desc *
 x86_match_cpu_with_stepping(const struct x86_cpu_desc *match)
 {
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 6265871a4af2..d69e094e790c 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -567,6 +567,12 @@ ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
 	return sprintf(buf, "Not affected\n");
 }
 
+ssize_t __weak cpu_show_special_register_data_sampling(struct device *dev,
+						       struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
@@ -575,6 +581,7 @@ static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
 static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
 static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
 static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
+static DEVICE_ATTR(special_register_data_sampling, 0444, cpu_show_special_register_data_sampling, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_meltdown.attr,
@@ -585,6 +592,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_mds.attr,
 	&dev_attr_tsx_async_abort.attr,
 	&dev_attr_itlb_multihit.attr,
+	&dev_attr_special_register_data_sampling.attr,
 	NULL
 };
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [MODERATED] [PATCH 2/2] v3 more sampling fun 2
  2020-03-11 15:39 [MODERATED] [PATCH 0/2] v3 more sampling fun 0 mark gross
  2020-01-16 22:16 ` [MODERATED] [PATCH 1/2] v3 more sampling fun 1 mark gross
@ 2020-01-30 19:12 ` mark gross
       [not found] ` <5e690bea.1c69fb81.16d6d.4b78SMTPIN_ADDED_BROKEN@mx.google.com>
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 19+ messages in thread
From: mark gross @ 2020-01-30 19:12 UTC (permalink / raw)
  To: speck

From: mark gross <mgross@linux.intel.com>
Subject: [PATCH 2/2] x86/speculation: SRBDS vulnerability and mitigation
 documentation

Adds documentation for the SRBDS vulnerability and its mitigation.

Reviewed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Mark Gross <mgross@linux.intel.com>
---
 Documentation/admin-guide/hw-vuln/index.rst   |   2 +
 .../special-register-buffer-data-sampling.rst | 145 ++++++++++++++++++
 2 files changed, 147 insertions(+)
 create mode 100644 Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst

diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index 0795e3c2643f..99d5b3244ef9 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -14,3 +14,5 @@ are configurable at compile, boot or run time.
    mds
    tsx_async_abort
    multihit.rst
+   special-register-buffer-data-sampling.rst
+
diff --git a/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst b/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst
new file mode 100644
index 000000000000..5acc7748f8e9
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst
@@ -0,0 +1,145 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+SRBDS - Special Register Buffer Data Sampling
+======================================
+
+SRBDS is a hardware vulnerability that allows MDS techniques to infer values
+returned from special register accesses.  Special register accesses are
+accesses to off core registers.  According to Intels evaluation, the special
+register reads that have a security expectation of privacy are:
+RDRAND, RDSEED and SGX EGETKEY.
+
+When RDRAND and RDSEED instructions are used the data is moved to the core
+through the special register mechanism.
+
+Affected processors
+--------------------
+Core models (desktop, mobile, Xeon-E3) that implement RDRAND and/or RDSEED and
+are vulnerable to MFBDS (Micro architectural Fill Buffer Data Sampling) variant
+of MDS (Micro architectural Data Sampling) or to TAA (TSX Asynchronous Abort)
+when TSX is enabled,
+
+  =============  ============  ========
+  common name    Family_Model  Stepping
+  =============  ============  ========
+  Ivybridge      06_3AH        All
+
+  Haswell        06_3CH        All
+  Haswell_L      06_45H        All
+  Haswell_G      06_46H        All
+
+  Broadwell_G    06_47H        All
+  Broadwell      06_3DH        All
+
+  Skylake_L      06_4EH        All
+  Skylake        06_5EH        All
+
+  Kabylake_L     06_8EH        <=A
+  Kabylake_L     06_8EH        0xB only if TSX is enabled
+  Kabylake_L     06_8EH        0xC only if TSX is enabled
+
+  Kabylake       06_9EH        <=B
+  Kabylake       06_9EH        0xC only if TSX is enabled
+  Kabylake       06_9EH        0xD only if TSX is enabled
+  =============  ============  ========
+
+Related CVEs
+------------
+
+The following CVE entry is related to this SRBDS issue:
+
+    ==============  =====  ===================================================
+    CVE-2020--0543
+    ==============  =====  ===================================================
+
+Attack scenarios
+---------------
+An unprivileged user can extract returned values from RDRAND and RDSEED
+executed on another core or sibling thread using MDS techniques.
+
+
+Mitigtion mechanism
+-------------------
+Intel will release microcode updates that modify the RDRAND, RDSEED, and
+EGETKEY instructions to overwrite secret special register data in the shared
+staging buffer before the secret data can be accessed by another logical
+processor.
+
+During execution of the RDRAND, RDSEED, or EGETKEY instruction, off-core
+accesses from other logical processors will be delayed until the special
+register read is complete and the secret data in the shared staging buffer is
+overwritten.
+
+This has three effects on performance:
+1 RDRAND, RDSEED, or EGETKEY instruction have higher latency.
+2 Executing RDRAND at the same time on multiple logical processors will be
+  serialized, resulting in an overall reduction in the maximum RDRAND bandwidth.
+3 Executing RDRAND, RDSEED or EGETKEY will delay memory accesses from other
+  logical processors that miss their core caches, with an impact similar to
+  legacy locked cache-line-split accesses.
+
+Because of the performance impact of the mitigation, the microcode updates also
+provide an opt-out mechanism (RNGDS_MITG_DIS) to disable the mitigation for
+RDRAND and RDSEED instructions executed outside of Intel Software Guard
+Extensions (Intel SGX) enclaves. On logical processors that disable the
+mitigation using this opt-out mechanism, RDRAND and RDSEED do not take longer
+to execute and do not impact performance of sibling logical processors memory
+accesses. The opt-out mechanism does not affect Intel SGX enclaves (including
+execution of RDRAND or RDSEED inside of an enclave, as well as EGETKEY
+execution).
+
+IA32_MCU_OPT_CTRL MSR Definition
+--------------------------------
+Along with the mitigation for this issue, Intel added a new thread-scope
+IA32_MCU_OPT_CTRL MSR, (address 0x123). The presence of this MSR and
+RNGDS_MITG_DIS (bit 0) is enumerated by CPUID.(EAX=07H,ECX=0).EDX[SRBDS_CTRL =
+9]==1. This MSR is introduced through the microcode update.
+
+Setting IA32_MCU_OPT_CTRL[0] (RNGDS_MITG_DIS) to 1 for a logical processor
+disables the mitigation for RDRAND and RDSEED executed outside of an Intel SGX
+enclave on that logical processor. Opting out of the mitigation for a
+particular logical processor does not affect the RDRAND and RDSEED mitigations
+for other logical processors.
+
+Note that inside of an Intel SGX enclave, the mitigation is applied regardless
+of the value of RNGDS_MITG_DS.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+The kernel command line allows for control over the SRBDS mitigation at boot
+time with the option "srbds=".  The option for this is:
+
+  ============= ============================================================
+  off           This option disables SRBS mitigation for RDRAND and RDSEED on
+                affected platforms.
+  ============= ============================================================
+
+SRBS System Information
+-----------------------
+The Linux kernel provides vulnerability status information through sysfs.  For
+SRBDS this can be accessed by the following sysfs file:
+/sys/devices/system/cpu/vulnerabilities/special_register_data_sampling
+
+The possible values contained in this file are:
+
+ ============================== ===========================================
+ Not affected                   Processor not vulnerable
+ Vulnerable                     Processor vulnerable and mitigation disabled
+ Vulnerable: no microcode       Processor vulnerable and microcode is missing
+                                mitigation
+ Mitigated                      Processor is vulnerable and mitigation is in
+                                effect.
+ Not affected (TSX disabled)    Processor is only vulnerable when TSX is
+                                enabled while this system was booted with TSX
+                                disabled.
+ Unknown                        Running on virtual guest processor that is
+                                affected but with no way to know if host
+                                processor is mitigated or vulnerable.
+ ============================== ===========================================
+
+Default mitigations
+-------------------
+This new microcode serializes processor access during execution of RDRAND,
+RDSEED ensures that the shared buffer is overwritten before it is released for
+reuse.
+
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [MODERATED] [PATCH 0/2] v3 more sampling fun 0
@ 2020-03-11 15:39 mark gross
  2020-01-16 22:16 ` [MODERATED] [PATCH 1/2] v3 more sampling fun 1 mark gross
                   ` (5 more replies)
  0 siblings, 6 replies; 19+ messages in thread
From: mark gross @ 2020-03-11 15:39 UTC (permalink / raw)
  To: speck

From: mark gross <mgross@linux.intel.com>
Subject: [PATCH 0/2] V3: more sampling funn

This is a significantly reworked version of the mitigation control   I think
I've addressed all the high quality feedback give on the first 2 versions.

This version of the mitigation no longer needs to change the structure of
cpu_vuln_whitelist as it creates a similar "affected_processors" list.

--- 

Special Register Buffer Data Sampling is a sampling type of vulnerability that
leaks data across cores sharing the HW-RNG for vulnerable processors.

This leak is fixed by a microcode update and is enabled by default.

This new microcode serializes processor access during execution of RDRAND or
RDSEED. It ensures that the shared buffer is overwritten before it is released
for reuse.

The mitigation impacts the throughput of the RDRAND and RDSEED instructions and
latency of RT processing running on the socket while executing RDRAND or
RDSEED.  The micro benchmark of calling RDRAND many times shows a slowdown.

This patch set enables kernel command line control of this mitigation and
exports vulnerability and mitigation status.

This patch set includes 2 patches:
The fist patch enables the command line control of the mitigation as well as
the sysfs export of vulnerability status.

The second patch has the Documentation/admin-guide/hw-vuln documentation for the
issue and the control over the mitigation.

The microcode defaults to enabling the mitigation.


mark gross (2):
  x86/speculation: Special Register Buffer Data Sampling (SRBDS)
    mitigation control.
  x86/speculation: SRBDS vulnerability and mitigation documentation

 Documentation/admin-guide/hw-vuln/index.rst   |   2 +
 .../special-register-buffer-data-sampling.rst | 145 ++++++++++++++++++
 .../admin-guide/kernel-parameters.txt         |  11 ++
 arch/x86/include/asm/cpu_device_id.h          |  12 ++
 arch/x86/include/asm/cpufeatures.h            |   2 +
 arch/x86/include/asm/msr-index.h              |   4 +
 arch/x86/kernel/cpu/bugs.c                    | 110 +++++++++++++
 arch/x86/kernel/cpu/common.c                  |  41 +++++
 arch/x86/kernel/cpu/cpu.h                     |  13 ++
 arch/x86/kernel/cpu/intel.c                   |   2 +
 arch/x86/kernel/cpu/match.c                   |  26 ++++
 drivers/base/cpu.c                            |   8 +
 12 files changed, 376 insertions(+)
 create mode 100644 Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst

-- 
2.17.1

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1
       [not found] ` <5e690bea.1c69fb81.16d6d.4b78SMTPIN_ADDED_BROKEN@mx.google.com>
@ 2020-03-11 17:21   ` Greg KH
  2020-03-11 23:09     ` mark gross
  0 siblings, 1 reply; 19+ messages in thread
From: Greg KH @ 2020-03-11 17:21 UTC (permalink / raw)
  To: speck

On Thu, Jan 16, 2020 at 02:16:07PM -0800, speck for mark gross wrote:
> --- a/drivers/base/cpu.c
> +++ b/drivers/base/cpu.c
> @@ -567,6 +567,12 @@ ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
>  	return sprintf(buf, "Not affected\n");
>  }
>  
> +ssize_t __weak cpu_show_special_register_data_sampling(struct device *dev,
> +						       struct device_attribute *attr, char *buf)
> +{
> +	return sprintf(buf, "Not affected\n");
> +}
> +
>  static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
>  static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
>  static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
> @@ -575,6 +581,7 @@ static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
>  static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
>  static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
>  static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
> +static DEVICE_ATTR(special_register_data_sampling, 0444, cpu_show_special_register_data_sampling, NULL);
>  
>  static struct attribute *cpu_root_vulnerabilities_attrs[] = {
>  	&dev_attr_meltdown.attr,
> @@ -585,6 +592,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
>  	&dev_attr_mds.attr,
>  	&dev_attr_tsx_async_abort.attr,
>  	&dev_attr_itlb_multihit.attr,
> +	&dev_attr_special_register_data_sampling.attr,
>  	NULL
>  };

You forgot a Documentation/ABI/ entry for this new sysfs file like I
pointed out previously :(

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] v3 more sampling fun 1
  2020-03-11 15:39 [MODERATED] [PATCH 0/2] v3 more sampling fun 0 mark gross
                   ` (2 preceding siblings ...)
       [not found] ` <5e690bea.1c69fb81.16d6d.4b78SMTPIN_ADDED_BROKEN@mx.google.com>
@ 2020-03-11 20:02 ` Thomas Gleixner
  2020-03-17 18:56   ` [MODERATED] " mark gross
  2020-03-11 20:26 ` [PATCH 2/2] v3 more sampling fun 2 Thomas Gleixner
  2020-03-11 20:28 ` [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1 Andrew Cooper
  5 siblings, 1 reply; 19+ messages in thread
From: Thomas Gleixner @ 2020-03-11 20:02 UTC (permalink / raw)
  To: speck

Mark,

speck for mark gross <speck@linutronix.de> writes:
> From: mark gross <mgross@linux.intel.com>
>
> This patch:

git grep 'This patch' Documentation/process/

> * enables administrator to configure the mitigation off when desired
>   using either mitigations=off or srbds=off.
> * exports vulnerability status via sysfs
>  
> +/*
> + * Match a range of steppings
> + */
> +
> +struct x86_cpu_id_ext {
> +	struct x86_cpu_id id;
> +	__u16 steppings; /* bit map of steppings to match against */

IIRC, we asked for adding the stepping to the existing data structure,
but I can't find any rationale somewhere why this is still separate.

If you really think hard about it then this is not needed at all. See
below.

> +static bool srbds_off;
> +
> +void srbds_configure_mitigation(void)
> +{
> +	u64 mcu_ctrl;
> +
> +	if (srbds_mitigation == SRBDS_NOT_AFFECTED)
> +		return;
> +
> +	if (srbds_mitigation == SRBDS_HYPERVISOR)
> +		return;
> +
> +	if (srbds_mitigation == SRBDS_MITIGATION_UCODE_NEEDED)
> +		return;
> +
> +	rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
> +
> +	switch (srbds_mitigation) {
> +	case SRBDS_MITIGATION_OFF:
> +	case SRBDS_TSX_NOT_AFFECTED:

This mitigation state confuses the hell out of me. The text says:

 +	[SRBDS_TSX_NOT_AFFECTED]	= "Not affected (TSX disabled)",

But the enum value reads to me: TSX is not affected....

    SRBDS_NOT_AFFECTED_TSX_OFF

is a bit more intuitive. Hmm?

> +		mcu_ctrl |= RNGDS_MITG_DIS;
> +		break;
> +	case SRBDS_MITIGATION_FULL:
> +		mcu_ctrl &= ~RNGDS_MITG_DIS;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
> +}
> +
> +static void __init srbds_select_mitigation(void)
> +{
> +	u64 ia32_cap;
> +
> +	if (!boot_cpu_has_bug(X86_BUG_SRBDS)) {
> +		srbds_mitigation = SRBDS_NOT_AFFECTED;
> +		return;
> +	}
> +
> +	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
> +		srbds_mitigation = SRBDS_HYPERVISOR;
> +		return;
> +	}
> +
> +	if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL)) {
> +		srbds_mitigation = SRBDS_MITIGATION_UCODE_NEEDED;
> +		return;
> +	}
> +
> +	if (boot_cpu_has_bug(X86_BUG_SRBDS)) {
> +		srbds_mitigation = SRBDS_MITIGATION_FULL;
> +
> +		ia32_cap = x86_read_arch_cap_msr();
> +		if (ia32_cap & ARCH_CAP_MDS_NO) {
> +			if (!boot_cpu_has(X86_FEATURE_RTM))
> +				srbds_mitigation = SRBDS_TSX_NOT_AFFECTED;

This logic comes with an awesome amount of comments...

> +		}
> +	}
> +
> +	if (cpu_mitigations_off() || srbds_off) {
> +		if (srbds_mitigation != SRBDS_TSX_NOT_AFFECTED)
> +			srbds_mitigation = SRBDS_MITIGATION_OFF;
> +	}
> +
> +	srbds_configure_mitigation();
> +}
> +
> +static int __init srbds_parse_cmdline(char *str)
> +{
> +	if (!str)
> +		return -EINVAL;
> +
> +	if (!strcmp(str, "off"))
> +		srbds_off = true;
> +
> +	return 0;
> +}
> +

stray newline

> +early_param("srbds", srbds_parse_cmdline);
> +

>  #define VULNWL(_vendor, _family, _model, _whitelist)	\
>  	{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
> @@ -1020,6 +1021,15 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
>  #define VULNWL_HYGON(family, whitelist)		\
>  	VULNWL(HYGON, family, X86_MODEL_ANY, whitelist)
>  
> +#define VULNWL_EXT(_vendor, _family, _model, _steppings, _whitelist)	\
> +	{ VULNWL(_vendor, _family, _model, _whitelist), _steppings }
> +

And because this is used for a blacklist the prefix VULNWL, aka
VULNerability White List, and the last argument make a lot of sense,
right?

> +#define VULNWL_INTEL_EXT(model,  whitelist)		\
> +	VULNWL_EXT(INTEL, 6, INTEL_FAM6_##model, X86_STEPPING_ANY, whitelist)
> +
> +#define VULNWL_INTEL_STEPPING(model, stepping, whitelist)		\
> +	VULNWL_EXT(INTEL, 6, INTEL_FAM6_##model, stepping, whitelist)
> +
>  static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
>  	VULNWL(ANY,	4, X86_MODEL_ANY,	NO_SPECULATION),
>  	VULNWL(CENTAUR,	5, X86_MODEL_ANY,	NO_SPECULATION),
> @@ -1075,6 +1085,27 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
>  	{}
>  };
>  
> +/*
> + * to avoide corrupting the whiltelist with blacklist items lets create a list

Sentences start with uppercase letters and spell checking is available
in most editors. Now for the content:

There is nothing to corrupt. Blacklists and whitelists do not mix.

Also what means 'lets create' here? This is a comment describing what
the following array is used for. Facts please.

> + * of affected processors for issues that cannot be enumerated other than by
> + * family/model/stepping
> + */
> +static const struct x86_cpu_id_ext affected_cpus[] __initconst = {
> +	VULNWL_INTEL_EXT(IVYBRIDGE,		SRBDS),


> +	VULNWL_INTEL_EXT(HASWELL,		SRBDS),
> +	VULNWL_INTEL_EXT(HASWELL_L,		SRBDS),
> +	VULNWL_INTEL_EXT(HASWELL_G,		SRBDS),
> +	VULNWL_INTEL_EXT(BROADWELL_G,		SRBDS),
> +	VULNWL_INTEL_EXT(BROADWELL,		SRBDS),
> +	VULNWL_INTEL_EXT(SKYLAKE_L,		SRBDS),
> +	VULNWL_INTEL_EXT(SKYLAKE,		SRBDS),
> +	VULNWL_INTEL_STEPPING(KABYLAKE_L, GENMASK(0xA, 0),	SRBDS), /*06_8E steppings <=A*/
> +	VULNWL_INTEL_STEPPING(KABYLAKE_L, GENMASK(0xC, 0xB),	SRBDS),
> /*06_8E stepping = 0xB|0xC if TSX enabled*/

This is beyond confusing because this should either be expressed in the
vulnerability itself, i.e. SRBDS_TSX_ONLY, or just commented along with
the comment in srbds_select_mitigation()

> +	VULNWL_INTEL_STEPPING(KABYLAKE, GENMASK(0xB, 0),	SRBDS), /*06_9E steppings <=B*/
> +	VULNWL_INTEL_STEPPING(KABYLAKE, GENMASK(0xD, 0xC),	SRBDS), /*06_9E stepping = 0xC if TSX enabled*/

        Comment and code do not match.

Aside of this whole thing is utter garbage, really.

#define X86_STEPPING_MAX	15
#define STEPSHIFT		16

#define ISVULN(_vendor, family, model, minstep, maxstep, vulns)		\
	{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY,	\
          GENMASK(minstep, maxstep) << STEPSHIFT | vulns }

#define ISVULN_INTEL(model, minstep, maxstep, vulns)			\
	ISVULN(INTEL, 6, INTEL_FAM6_##model, minstep, maxstep, vulns)

....

/* List of affected CPUs identified by model and stepping range. */
static const struct x86_cpu_id affected_cpus[] __initconst = {
	ISVULN_INTEL(HASWELL,		0, X86_STEPPING_MAX,	SRBDS),
	ISVULN_INTEL(BROADWELL_G,	0, X86_STEPPING_MAX,	SRBDS),

	/* Kabylake L steppings 0xB, 0xC only affected when TSX in on */
	ISVULN_INTEL(KABYLAKE_L,	0, 0xC,			SRBDS),

        /* Kabylake steppings 0xC, 0xD only affected when TSX in on */
	ISVULN_INTEL(KABYLAKE,		0, 0xD,			SRBDS),
        {}
};

Now:

static bool __init cpu_matches(unsigned long which)
{
  	const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist);

-	return m && !!(m->driver_data & which);
+  	return m && (m->driver_data & which) == which;
}
  
> +static bool __init cpu_affected(unsigned long which)
> +{
> +	const struct x86_cpu_id_ext *m = x86_match_cpu_ext(affected_cpus);
> +
> +	return m && !!(m->id.driver_data & which);
> +}

Which makes this go away

>  u64 x86_read_arch_cap_msr(void)
>  {
>  	u64 ia32_cap = 0;
> @@ -1124,6 +1162,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
>  	if (!cpu_matches(NO_SWAPGS))
>  		setup_force_cpu_bug(X86_BUG_SWAPGS);
>  
> +	if (cpu_affected(SRBDS))
> +		setup_force_cpu_bug(X86_BUG_SRBDS);

and this becomes:

+    	if (cpu_matches(BIT(boot_cpu_data.x86_stepping + STEPSHIFT) | which))
+		setup_force_cpu_bug(X86_BUG_SRBDS);

Too much code reuse, right?

>  	/*
>  	 * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
>  	 *	- TSX is supported or
> diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
> index 37fdefd14f28..22d419080fd6 100644
> --- a/arch/x86/kernel/cpu/cpu.h
> +++ b/arch/x86/kernel/cpu/cpu.h
> @@ -44,7 +44,20 @@ struct _tlb_table {
>  extern const struct cpu_dev *const __x86_cpu_dev_start[],
>  			    *const __x86_cpu_dev_end[];
>  
> +enum srbds_mitigations {
> +	SRBDS_NOT_AFFECTED,
> +	SRBDS_MITIGATION_OFF,
> +	SRBDS_MITIGATION_UCODE_NEEDED,
> +	SRBDS_MITIGATION_FULL,
> +	SRBDS_TSX_NOT_AFFECTED,
> +	SRBDS_HYPERVISOR,
> +};
> +
> +extern __ro_after_init enum srbds_mitigations srbds_mitigation;

And this needs to be public because the only user is in bugs.c, right?

> +void srbds_configure_mitigation(void);
> +
>  #ifdef CONFIG_CPU_SUP_INTEL
> +
>  enum tsx_ctrl_states {
>  	TSX_CTRL_ENABLE,
>  	TSX_CTRL_DISABLE,
> diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
> index be82cd5841c3..1b083a2a415b 100644
> --- a/arch/x86/kernel/cpu/intel.c
> +++ b/arch/x86/kernel/cpu/intel.c
> @@ -684,6 +684,8 @@ static void init_intel(struct cpuinfo_x86 *c)
>  		tsx_enable();
>  	if (tsx_ctrl_state == TSX_CTRL_DISABLE)
>  		tsx_disable();
> +
> +	srbds_configure_mitigation();
>  }
>  
>  #ifdef CONFIG_X86_32
> diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
> index 6dd78d8235e4..118c503b1c36 100644
> --- a/arch/x86/kernel/cpu/match.c
> +++ b/arch/x86/kernel/cpu/match.c
> @@ -49,6 +49,32 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
>  }
>  EXPORT_SYMBOL(x86_match_cpu);
>  
> +/*
> + * Extend x86_match_cpu to support matching a range of steppings.
> + */
> +const struct x86_cpu_id_ext *x86_match_cpu_ext(const struct x86_cpu_id_ext *match)
> +{
> +	const struct x86_cpu_id_ext *m;
> +	struct cpuinfo_x86 *c = &boot_cpu_data;
> +
> +	for (m = match; m->id.vendor | m->id.family | m->id.model | m->id.feature; m++) {
> +		if (m->id.vendor != X86_VENDOR_ANY && c->x86_vendor != m->id.vendor)
> +			continue;
> +		if (m->id.family != X86_FAMILY_ANY && c->x86 != m->id.family)
> +			continue;
> +		if (m->id.model != X86_MODEL_ANY && c->x86_model != m->id.model)
> +			continue;
> +		if (m->steppings != X86_STEPPING_ANY &&
> +		    !(BIT(c->x86_stepping) & m->steppings))
> +			continue;
> +		if (m->id.feature != X86_FEATURE_ANY && !cpu_has(c, m->id.feature))
> +			continue;
> +		return m;
> +	}
> +	return NULL;
> +}
> +EXPORT_SYMBOL(x86_match_cpu_ext);

Sigh, aside of being pointless duplicated code:

If we'd really need this then it can share most of the code with
x86_match_cpu(), but copy and paste is more fancy, right? You even
copied the export just in case ...

>  static const struct x86_cpu_desc *
>  x86_match_cpu_with_stepping(const struct x86_cpu_desc *match)
>  {
> diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
> index 6265871a4af2..d69e094e790c 100644
> --- a/drivers/base/cpu.c
> +++ b/drivers/base/cpu.c
> @@ -567,6 +567,12 @@ ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
>  	return sprintf(buf, "Not affected\n");
>  }
>  
> +ssize_t __weak cpu_show_special_register_data_sampling(struct device *dev,
> +						       struct device_attribute *attr, char *buf)
> +{
> +	return sprintf(buf, "Not affected\n");
> +}
> +
>  static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
>  static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
>  static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
> @@ -575,6 +581,7 @@ static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
>  static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
>  static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
>  static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
> +static DEVICE_ATTR(special_register_data_sampling, 0444, cpu_show_special_register_data_sampling, NULL);
> 
>  static struct attribute *cpu_root_vulnerabilities_attrs[] = {
>  	&dev_attr_meltdown.attr,
> @@ -585,6 +592,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
>  	&dev_attr_mds.attr,
>  	&dev_attr_tsx_async_abort.attr,
>  	&dev_attr_itlb_multihit.attr,
> +	&dev_attr_special_register_data_sampling.attr,

This still lacks an entry in:

  Documentation/ABI/testing/sysfs-devices-system-cpu

as requested by Greg several times.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/2] v3 more sampling fun 2
  2020-03-11 15:39 [MODERATED] [PATCH 0/2] v3 more sampling fun 0 mark gross
                   ` (3 preceding siblings ...)
  2020-03-11 20:02 ` Thomas Gleixner
@ 2020-03-11 20:26 ` Thomas Gleixner
  2020-03-11 20:38   ` [MODERATED] " Andrew Cooper
                     ` (2 more replies)
  2020-03-11 20:28 ` [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1 Andrew Cooper
  5 siblings, 3 replies; 19+ messages in thread
From: Thomas Gleixner @ 2020-03-11 20:26 UTC (permalink / raw)
  To: speck

speck for mark gross <speck@linutronix.de> writes:
> diff --git a/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst b/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst
> new file mode 100644
> index 000000000000..5acc7748f8e9
> --- /dev/null
> +++ b/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst
> @@ -0,0 +1,145 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +
> +SRBDS - Special Register Buffer Data Sampling
> +======================================

I doubt that this builds w/o warnings. And right:

Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst:4: WARNING: Title underline too short.

SRBDS - Special Register Buffer Data Sampling
======================================^

Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst:56: WARNING: Title underline too short.

Attack scenarios
---------------^

Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst:76: WARNING: Unexpected indentation.
Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst:77: WARNING: Block quote ends without a blank line; unexpected unindent.

Sigh...

> +SRBDS is a hardware vulnerability that allows MDS techniques to infer values

lacks a link to the MDS documentation

> +returned from special register accesses.  Special register accesses are
> +accesses to off core registers.  According to Intels evaluation, the special
> +register reads that have a security expectation of privacy are:
> +RDRAND, RDSEED and SGX EGETKEY.

And what 

> +
> +When RDRAND and RDSEED instructions are used the data is moved to the core
> +through the special register mechanism.

This sentence is not providing any new information and can be packed in
the above where you mention RDRAND, RDSEED already. EGETKEY is no instruction?

> +Affected processors
> +--------------------
> +Core models (desktop, mobile, Xeon-E3) that implement RDRAND and/or RDSEED and
> +are vulnerable to MFBDS (Micro architectural Fill Buffer Data
> Sampling) variant

s/to/to the/

> +of MDS (Micro architectural Data Sampling) or to TAA (TSX Asynchronous Abort)
> +when TSX is enabled,
> +
> +  =============  ============  ========
> +  common name    Family_Model  Stepping
> +  =============  ============  ========
> +  Ivybridge      06_3AH        All
> +
> +  Haswell        06_3CH        All
> +  Haswell_L      06_45H        All
> +  Haswell_G      06_46H        All
> +
> +  Broadwell_G    06_47H        All
> +  Broadwell      06_3DH        All
> +
> +  Skylake_L      06_4EH        All
> +  Skylake        06_5EH        All
> +
> +  Kabylake_L     06_8EH        <=A
> +  Kabylake_L     06_8EH        0xB only if TSX is enabled
> +  Kabylake_L     06_8EH        0xC only if TSX is enabled
> +
> +  Kabylake       06_9EH        <=B
> +  Kabylake       06_9EH        0xC only if TSX is enabled
> +  Kabylake       06_9EH        0xD only if TSX is enabled
> +  =============  ============  ========

> +  =============  ============  ===========================

ditto at the top of the table.

> +
> +Related CVEs
> +------------
> +
> +The following CVE entry is related to this SRBDS issue:
> +
> +    ==============  =====  ===================================================
> +    CVE-2020--0543
> +    ==============  =====  ===================================================

Is the void filled at some point?

> +Attack scenarios
> +---------------
> +An unprivileged user can extract returned values from RDRAND and RDSEED
> +executed on another core or sibling thread using MDS techniques.

Lacks EGETKEY again.

> +
> +
> +Mitigtion mechanism
> +-------------------
> +Intel will release microcode updates that modify the RDRAND, RDSEED, and
> +EGETKEY instructions to overwrite secret special register data in the shared
> +staging buffer before the secret data can be accessed by another logical
> +processor.
> +
> +During execution of the RDRAND, RDSEED, or EGETKEY instruction, off-core
> +accesses from other logical processors will be delayed until the special
> +register read is complete and the secret data in the shared staging buffer is
> +overwritten.
> +
> +This has three effects on performance:
> +1 RDRAND, RDSEED, or EGETKEY instruction have higher latency.
> +2 Executing RDRAND at the same time on multiple logical processors will be
> +  serialized, resulting in an overall reduction in the maximum RDRAND bandwidth.
> +3 Executing RDRAND, RDSEED or EGETKEY will delay memory accesses from other
> +  logical processors that miss their core caches, with an impact similar to
> +  legacy locked cache-line-split accesses.

I seriously doubt that you ever looked at the build output of this.

> +
> +Because of the performance impact of the mitigation, the microcode updates also
> +provide an opt-out mechanism (RNGDS_MITG_DIS) to disable the mitigation for
> +RDRAND and RDSEED instructions executed outside of Intel Software Guard
> +Extensions (Intel SGX) enclaves. On logical processors that disable the
> +mitigation using this opt-out mechanism, RDRAND and RDSEED do not take longer
> +to execute and do not impact performance of sibling logical processors memory
> +accesses. The opt-out mechanism does not affect Intel SGX enclaves (including
> +execution of RDRAND or RDSEED inside of an enclave, as well as EGETKEY
> +execution).
> +
> +IA32_MCU_OPT_CTRL MSR Definition
> +--------------------------------
> +Along with the mitigation for this issue, Intel added a new thread-scope
> +IA32_MCU_OPT_CTRL MSR, (address 0x123). The presence of this MSR and
> +RNGDS_MITG_DIS (bit 0) is enumerated by CPUID.(EAX=07H,ECX=0).EDX[SRBDS_CTRL =
> +9]==1. This MSR is introduced through the microcode update.
> +
> +Setting IA32_MCU_OPT_CTRL[0] (RNGDS_MITG_DIS) to 1 for a logical processor
> +disables the mitigation for RDRAND and RDSEED executed outside of an Intel SGX
> +enclave on that logical processor. Opting out of the mitigation for a
> +particular logical processor does not affect the RDRAND and RDSEED mitigations
> +for other logical processors.
> +
> +Note that inside of an Intel SGX enclave, the mitigation is applied regardless
> +of the value of RNGDS_MITG_DS.
> +
> +Mitigation control on the kernel command line
> +---------------------------------------------
> +The kernel command line allows for control over the SRBDS mitigation
> at boot

allows control (methinks)

> +time with the option "srbds=".  The option for this is:
> +
> +  ============= ============================================================
> +  off           This option disables SRBS mitigation for RDRAND and RDSEED on
> +                affected platforms.
> +  ============= ============================================================
> +
> +SRBS System Information
> +-----------------------
> +The Linux kernel provides vulnerability status information through sysfs.  For
> +SRBDS this can be accessed by the following sysfs file:
> +/sys/devices/system/cpu/vulnerabilities/special_register_data_sampling
> +
> +The possible values contained in this file are:
> +
> + ============================== ===========================================
> + Not affected                   Processor not vulnerable
> + Vulnerable                     Processor vulnerable and mitigation disabled
> + Vulnerable: no microcode       Processor vulnerable and microcode is missing
> +                                mitigation
> + Mitigated                      Processor is vulnerable and mitigation is in
> +                                effect.
> + Not affected (TSX disabled)    Processor is only vulnerable when TSX is
> +                                enabled while this system was booted with TSX
> +                                disabled.
> + Unknown                        Running on virtual guest processor that is
> +                                affected but with no way to know if host
> +                                processor is mitigated or vulnerable.
> + ============================== ===========================================
> +
> +Default mitigations
> +-------------------
> +This new microcode serializes processor access during execution of RDRAND,
> +RDSEED ensures that the shared buffer is overwritten before it is released for
> +reuse.

Errm. What has this to do with the default chosen by the kernel?

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1
  2020-03-11 15:39 [MODERATED] [PATCH 0/2] v3 more sampling fun 0 mark gross
                   ` (4 preceding siblings ...)
  2020-03-11 20:26 ` [PATCH 2/2] v3 more sampling fun 2 Thomas Gleixner
@ 2020-03-11 20:28 ` Andrew Cooper
  2020-03-11 23:18   ` mark gross
  5 siblings, 1 reply; 19+ messages in thread
From: Andrew Cooper @ 2020-03-11 20:28 UTC (permalink / raw)
  To: speck

[-- Attachment #1: Type: text/plain, Size: 589 bytes --]

On 16/01/2020 22:16, speck for mark gross wrote:
> +static void __init srbds_select_mitigation(void)
> +{
> +	u64 ia32_cap;
> +
> +	if (!boot_cpu_has_bug(X86_BUG_SRBDS)) {
> +		srbds_mitigation = SRBDS_NOT_AFFECTED;
> +		return;
> +	}
> +
> +	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
> +		srbds_mitigation = SRBDS_HYPERVISOR;
> +		return;
> +	}

These two ought to be reversing (and with a suitable adjustment to the
docs in patch 2).

If you're running as a guest, you can't even trust the model number used
to divine X86_BUG_SRBDS in the first place.

~Andrew


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [MODERATED] Re: [PATCH 2/2] v3 more sampling fun 2
  2020-03-11 20:26 ` [PATCH 2/2] v3 more sampling fun 2 Thomas Gleixner
@ 2020-03-11 20:38   ` Andrew Cooper
  2020-03-11 23:23   ` mark gross
  2020-03-12 22:04   ` mark gross
  2 siblings, 0 replies; 19+ messages in thread
From: Andrew Cooper @ 2020-03-11 20:38 UTC (permalink / raw)
  To: speck

[-- Attachment #1: Type: text/plain, Size: 492 bytes --]

On 11/03/2020 20:26, speck for Thomas Gleixner wrote:
>> +Note that inside of an Intel SGX enclave, the mitigation is applied regardless
>> +of the value of RNGDS_MITG_DS.
>> +
>> +Mitigation control on the kernel command line
>> +---------------------------------------------
>> +The kernel command line allows for control over the SRBDS mitigation
>> at boot
> allows control (methinks)

Both are fine from a grammar point of view.  It's simply down to preference.

~Andrew


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1
  2020-03-11 17:21   ` [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1 Greg KH
@ 2020-03-11 23:09     ` mark gross
  0 siblings, 0 replies; 19+ messages in thread
From: mark gross @ 2020-03-11 23:09 UTC (permalink / raw)
  To: speck

On Wed, Mar 11, 2020 at 06:21:28PM +0100, speck for Greg KH wrote:
> On Thu, Jan 16, 2020 at 02:16:07PM -0800, speck for mark gross wrote:
> > --- a/drivers/base/cpu.c
> > +++ b/drivers/base/cpu.c
> > @@ -567,6 +567,12 @@ ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
> >  	return sprintf(buf, "Not affected\n");
> >  }
> >  
> > +ssize_t __weak cpu_show_special_register_data_sampling(struct device *dev,
> > +						       struct device_attribute *attr, char *buf)
> > +{
> > +	return sprintf(buf, "Not affected\n");
> > +}
> > +
> >  static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
> >  static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
> >  static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
> > @@ -575,6 +581,7 @@ static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
> >  static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
> >  static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
> >  static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
> > +static DEVICE_ATTR(special_register_data_sampling, 0444, cpu_show_special_register_data_sampling, NULL);
> >  
> >  static struct attribute *cpu_root_vulnerabilities_attrs[] = {
> >  	&dev_attr_meltdown.attr,
> > @@ -585,6 +592,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
> >  	&dev_attr_mds.attr,
> >  	&dev_attr_tsx_async_abort.attr,
> >  	&dev_attr_itlb_multihit.attr,
> > +	&dev_attr_special_register_data_sampling.attr,
> >  	NULL
> >  };
> 
> You forgot a Documentation/ABI/ entry for this new sysfs file like I
> pointed out previously :(

I thought you where talking about the command line arguments not being
documented.

I'll add this.

--mark

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1
  2020-03-11 20:28 ` [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1 Andrew Cooper
@ 2020-03-11 23:18   ` mark gross
  2020-03-12  0:25     ` Luck, Tony
  0 siblings, 1 reply; 19+ messages in thread
From: mark gross @ 2020-03-11 23:18 UTC (permalink / raw)
  To: speck

On Wed, Mar 11, 2020 at 08:28:36PM +0000, speck for Andrew Cooper wrote:
> On 16/01/2020 22:16, speck for mark gross wrote:
> > +static void __init srbds_select_mitigation(void)
> > +{
> > +	u64 ia32_cap;
> > +
> > +	if (!boot_cpu_has_bug(X86_BUG_SRBDS)) {
> > +		srbds_mitigation = SRBDS_NOT_AFFECTED;
> > +		return;
> > +	}
> > +
> > +	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
> > +		srbds_mitigation = SRBDS_HYPERVISOR;
> > +		return;
> > +	}
> 
> These two ought to be reversing (and with a suitable adjustment to the
> docs in patch 2).
> 
> If you're running as a guest, you can't even trust the model number used
> to divine X86_BUG_SRBDS in the first place.
> 

I'll change this for the next version.

--mark

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [MODERATED] Re: [PATCH 2/2] v3 more sampling fun 2
  2020-03-11 20:26 ` [PATCH 2/2] v3 more sampling fun 2 Thomas Gleixner
  2020-03-11 20:38   ` [MODERATED] " Andrew Cooper
@ 2020-03-11 23:23   ` mark gross
  2020-03-12 22:04   ` mark gross
  2 siblings, 0 replies; 19+ messages in thread
From: mark gross @ 2020-03-11 23:23 UTC (permalink / raw)
  To: speck

Thanks for the valueble feedback.  I'll go over it in more detail tomorrow.
But, one thing I want to point out is that EGETKEY is always mitigate and only
RDRAND and RDSEED can have the mitigation disabled.

This is the first time I've worked with the Documentation.  I did build it but
I only looked to see if it completed :(

--mark

On Wed, Mar 11, 2020 at 09:26:18PM +0100, speck for Thomas Gleixner wrote:
> speck for mark gross <speck@linutronix.de> writes:
> > diff --git a/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst b/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst
> > new file mode 100644
> > index 000000000000..5acc7748f8e9
> > --- /dev/null
> > +++ b/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst
> > @@ -0,0 +1,145 @@
> > +.. SPDX-License-Identifier: GPL-2.0
> > +
> > +SRBDS - Special Register Buffer Data Sampling
> > +======================================
> 
> I doubt that this builds w/o warnings. And right:
> 
> Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst:4: WARNING: Title underline too short.
> 
> SRBDS - Special Register Buffer Data Sampling
> ======================================^
> 
> Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst:56: WARNING: Title underline too short.
> 
> Attack scenarios
> ---------------^
> 
> Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst:76: WARNING: Unexpected indentation.
> Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst:77: WARNING: Block quote ends without a blank line; unexpected unindent.
> 
> Sigh...
> 
> > +SRBDS is a hardware vulnerability that allows MDS techniques to infer values
> 
> lacks a link to the MDS documentation
> 
> > +returned from special register accesses.  Special register accesses are
> > +accesses to off core registers.  According to Intels evaluation, the special
> > +register reads that have a security expectation of privacy are:
> > +RDRAND, RDSEED and SGX EGETKEY.
> 
> And what 
> 
> > +
> > +When RDRAND and RDSEED instructions are used the data is moved to the core
> > +through the special register mechanism.
> 
> This sentence is not providing any new information and can be packed in
> the above where you mention RDRAND, RDSEED already. EGETKEY is no instruction?
> 
> > +Affected processors
> > +--------------------
> > +Core models (desktop, mobile, Xeon-E3) that implement RDRAND and/or RDSEED and
> > +are vulnerable to MFBDS (Micro architectural Fill Buffer Data
> > Sampling) variant
> 
> s/to/to the/
> 
> > +of MDS (Micro architectural Data Sampling) or to TAA (TSX Asynchronous Abort)
> > +when TSX is enabled,
> > +
> > +  =============  ============  ========
> > +  common name    Family_Model  Stepping
> > +  =============  ============  ========
> > +  Ivybridge      06_3AH        All
> > +
> > +  Haswell        06_3CH        All
> > +  Haswell_L      06_45H        All
> > +  Haswell_G      06_46H        All
> > +
> > +  Broadwell_G    06_47H        All
> > +  Broadwell      06_3DH        All
> > +
> > +  Skylake_L      06_4EH        All
> > +  Skylake        06_5EH        All
> > +
> > +  Kabylake_L     06_8EH        <=A
> > +  Kabylake_L     06_8EH        0xB only if TSX is enabled
> > +  Kabylake_L     06_8EH        0xC only if TSX is enabled
> > +
> > +  Kabylake       06_9EH        <=B
> > +  Kabylake       06_9EH        0xC only if TSX is enabled
> > +  Kabylake       06_9EH        0xD only if TSX is enabled
> > +  =============  ============  ========
> 
> > +  =============  ============  ===========================
> 
> ditto at the top of the table.
> 
> > +
> > +Related CVEs
> > +------------
> > +
> > +The following CVE entry is related to this SRBDS issue:
> > +
> > +    ==============  =====  ===================================================
> > +    CVE-2020--0543
> > +    ==============  =====  ===================================================
> 
> Is the void filled at some point?
> 
> > +Attack scenarios
> > +---------------
> > +An unprivileged user can extract returned values from RDRAND and RDSEED
> > +executed on another core or sibling thread using MDS techniques.
> 
> Lacks EGETKEY again.
> 
> > +
> > +
> > +Mitigtion mechanism
> > +-------------------
> > +Intel will release microcode updates that modify the RDRAND, RDSEED, and
> > +EGETKEY instructions to overwrite secret special register data in the shared
> > +staging buffer before the secret data can be accessed by another logical
> > +processor.
> > +
> > +During execution of the RDRAND, RDSEED, or EGETKEY instruction, off-core
> > +accesses from other logical processors will be delayed until the special
> > +register read is complete and the secret data in the shared staging buffer is
> > +overwritten.
> > +
> > +This has three effects on performance:
> > +1 RDRAND, RDSEED, or EGETKEY instruction have higher latency.
> > +2 Executing RDRAND at the same time on multiple logical processors will be
> > +  serialized, resulting in an overall reduction in the maximum RDRAND bandwidth.
> > +3 Executing RDRAND, RDSEED or EGETKEY will delay memory accesses from other
> > +  logical processors that miss their core caches, with an impact similar to
> > +  legacy locked cache-line-split accesses.
> 
> I seriously doubt that you ever looked at the build output of this.
> 
> > +
> > +Because of the performance impact of the mitigation, the microcode updates also
> > +provide an opt-out mechanism (RNGDS_MITG_DIS) to disable the mitigation for
> > +RDRAND and RDSEED instructions executed outside of Intel Software Guard
> > +Extensions (Intel SGX) enclaves. On logical processors that disable the
> > +mitigation using this opt-out mechanism, RDRAND and RDSEED do not take longer
> > +to execute and do not impact performance of sibling logical processors memory
> > +accesses. The opt-out mechanism does not affect Intel SGX enclaves (including
> > +execution of RDRAND or RDSEED inside of an enclave, as well as EGETKEY
> > +execution).
> > +
> > +IA32_MCU_OPT_CTRL MSR Definition
> > +--------------------------------
> > +Along with the mitigation for this issue, Intel added a new thread-scope
> > +IA32_MCU_OPT_CTRL MSR, (address 0x123). The presence of this MSR and
> > +RNGDS_MITG_DIS (bit 0) is enumerated by CPUID.(EAX=07H,ECX=0).EDX[SRBDS_CTRL =
> > +9]==1. This MSR is introduced through the microcode update.
> > +
> > +Setting IA32_MCU_OPT_CTRL[0] (RNGDS_MITG_DIS) to 1 for a logical processor
> > +disables the mitigation for RDRAND and RDSEED executed outside of an Intel SGX
> > +enclave on that logical processor. Opting out of the mitigation for a
> > +particular logical processor does not affect the RDRAND and RDSEED mitigations
> > +for other logical processors.
> > +
> > +Note that inside of an Intel SGX enclave, the mitigation is applied regardless
> > +of the value of RNGDS_MITG_DS.
> > +
> > +Mitigation control on the kernel command line
> > +---------------------------------------------
> > +The kernel command line allows for control over the SRBDS mitigation
> > at boot
> 
> allows control (methinks)
> 
> > +time with the option "srbds=".  The option for this is:
> > +
> > +  ============= ============================================================
> > +  off           This option disables SRBS mitigation for RDRAND and RDSEED on
> > +                affected platforms.
> > +  ============= ============================================================
> > +
> > +SRBS System Information
> > +-----------------------
> > +The Linux kernel provides vulnerability status information through sysfs.  For
> > +SRBDS this can be accessed by the following sysfs file:
> > +/sys/devices/system/cpu/vulnerabilities/special_register_data_sampling
> > +
> > +The possible values contained in this file are:
> > +
> > + ============================== ===========================================
> > + Not affected                   Processor not vulnerable
> > + Vulnerable                     Processor vulnerable and mitigation disabled
> > + Vulnerable: no microcode       Processor vulnerable and microcode is missing
> > +                                mitigation
> > + Mitigated                      Processor is vulnerable and mitigation is in
> > +                                effect.
> > + Not affected (TSX disabled)    Processor is only vulnerable when TSX is
> > +                                enabled while this system was booted with TSX
> > +                                disabled.
> > + Unknown                        Running on virtual guest processor that is
> > +                                affected but with no way to know if host
> > +                                processor is mitigated or vulnerable.
> > + ============================== ===========================================
> > +
> > +Default mitigations
> > +-------------------
> > +This new microcode serializes processor access during execution of RDRAND,
> > +RDSEED ensures that the shared buffer is overwritten before it is released for
> > +reuse.
> 
> Errm. What has this to do with the default chosen by the kernel?
> 
> Thanks,
> 
>         tglx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1
  2020-03-11 23:18   ` mark gross
@ 2020-03-12  0:25     ` Luck, Tony
  2020-03-12  1:34       ` Andrew Cooper
  0 siblings, 1 reply; 19+ messages in thread
From: Luck, Tony @ 2020-03-12  0:25 UTC (permalink / raw)
  To: speck

On Wed, Mar 11, 2020 at 04:18:19PM -0700, speck for mark gross wrote:
> On Wed, Mar 11, 2020 at 08:28:36PM +0000, speck for Andrew Cooper wrote:
> > On 16/01/2020 22:16, speck for mark gross wrote:
> > > +static void __init srbds_select_mitigation(void)
> > > +{
> > > +	u64 ia32_cap;
> > > +
> > > +	if (!boot_cpu_has_bug(X86_BUG_SRBDS)) {
> > > +		srbds_mitigation = SRBDS_NOT_AFFECTED;
> > > +		return;
> > > +	}
> > > +
> > > +	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
> > > +		srbds_mitigation = SRBDS_HYPERVISOR;
> > > +		return;
> > > +	}
> > 
> > These two ought to be reversing (and with a suitable adjustment to the
> > docs in patch 2).
> > 
> > If you're running as a guest, you can't even trust the model number used
> > to divine X86_BUG_SRBDS in the first place.
> > 
> 
> I'll change this for the next version.

Andrew: Are you really sure that you want that? It would
mean that all guests running on any server level Xeon
would report mitigation status as unknown. Actually
all those server Xeons are not affected by SRBDS.

Technically that is entirely the right thing to do. But
how much do hypervisors mess with the CPU model between
different classes on processors in practice? I thought
that commonly people put a bunch of Haswell/Broadwell/Skylakes
into a "pool" and pretended they were all Haswell.

Do you want to consider the tradeoff between being
absolutely accurate against all the support calls
you will get because guests report "unknown" mitigation
status?

-Tony

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1
  2020-03-12  0:25     ` Luck, Tony
@ 2020-03-12  1:34       ` Andrew Cooper
  2020-03-12 15:25         ` Luck, Tony
  0 siblings, 1 reply; 19+ messages in thread
From: Andrew Cooper @ 2020-03-12  1:34 UTC (permalink / raw)
  To: speck

[-- Attachment #1: Type: text/plain, Size: 2998 bytes --]

On 12/03/2020 00:25, speck for Luck, Tony wrote:
> On Wed, Mar 11, 2020 at 04:18:19PM -0700, speck for mark gross wrote:
>> On Wed, Mar 11, 2020 at 08:28:36PM +0000, speck for Andrew Cooper wrote:
>>> On 16/01/2020 22:16, speck for mark gross wrote:
>>>> +static void __init srbds_select_mitigation(void)
>>>> +{
>>>> +	u64 ia32_cap;
>>>> +
>>>> +	if (!boot_cpu_has_bug(X86_BUG_SRBDS)) {
>>>> +		srbds_mitigation = SRBDS_NOT_AFFECTED;
>>>> +		return;
>>>> +	}
>>>> +
>>>> +	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
>>>> +		srbds_mitigation = SRBDS_HYPERVISOR;
>>>> +		return;
>>>> +	}
>>> These two ought to be reversing (and with a suitable adjustment to the
>>> docs in patch 2).
>>>
>>> If you're running as a guest, you can't even trust the model number used
>>> to divine X86_BUG_SRBDS in the first place.
>>>
>> I'll change this for the next version.
> Andrew: Are you really sure that you want that?

I thought I was...

> It would
> mean that all guests running on any server level Xeon
> would report mitigation status as unknown. Actually
> all those server Xeons are not affected by SRBDS.

There are Xeon E3's to be found, in server form factors, in clouds.

> Technically that is entirely the right thing to do. But
> how much do hypervisors mess with the CPU model between
> different classes on processors in practice? I thought
> that commonly people put a bunch of Haswell/Broadwell/Skylakes
> into a "pool" and pretended they were all Haswell.

I actually made this point for KVM's benefit, which does (to the best of
my knowledge) behave like that.

With Xen, a VM sees the real F/M/S of the processor it first started on,
even if the reported features is way off from what a native version of
that CPU would look like.

I can't speak to other hypervisors, but it would be a reasonable
question to ask the keybase group.

> Do you want to consider the tradeoff between being
> absolutely accurate against all the support calls
> you will get because guests report "unknown" mitigation
> status?

vs the warm, fuzzy, and sinking feeling where it says safe, but isn't
actually?

Also, consider the case of a mix of E3's and E5's in a single migration
pool, which is disappointingly common at least at the enterprise level.

I accept this is a complicated problem, but personally, I view providing
potentially-wrong information worse than saying "ask the person who runs
your hypervisor".

Particularly with this issue where it seems that no hypervisor is
interested in offering the knob to guest kernels (so could at least
infer based on SRBDS_CTRL), nor is there an ARCH_CAPS_$FOO_NO bit which
could be filled in by a hypervisor on unaffected or mitigated hardware.

After all, the question of "are my random numbers known elsewhere?" also
depends on "is my hypervisor intercepting the RDRAND/RDSEED
instructions?" and possibly "has someone set the DETERMINISTIC_RNG bit
behind my back?".

~Andrew


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1
  2020-03-12  1:34       ` Andrew Cooper
@ 2020-03-12 15:25         ` Luck, Tony
  2020-03-12 16:02           ` Luck, Tony
  0 siblings, 1 reply; 19+ messages in thread
From: Luck, Tony @ 2020-03-12 15:25 UTC (permalink / raw)
  To: speck

On Thu, Mar 12, 2020 at 01:34:50AM +0000, speck for Andrew Cooper wrote:
> Particularly with this issue where it seems that no hypervisor is
> interested in offering the knob to guest kernels (so could at least
> infer based on SRBDS_CTRL), nor is there an ARCH_CAPS_$FOO_NO bit which
> could be filled in by a hypervisor on unaffected or mitigated hardware.

Maybe that is the solution?  We allocated a s/w (VMM) bit in
ARCH_CAPABILITIES for the TLB DoS issue for the nested hypervisor case.
Should we do that again for this issue?

If you think so, then we can take this discussion to Keybase so
that the other VMM vendors who hang out there can chime in.

[Note that it isn't my call to allocate a bit in ARCH_CAPABILITIES,
but I can campaign for it if folks want it].

-Tony

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1
  2020-03-12 15:25         ` Luck, Tony
@ 2020-03-12 16:02           ` Luck, Tony
  2020-03-12 16:45             ` Andrew Cooper
  0 siblings, 1 reply; 19+ messages in thread
From: Luck, Tony @ 2020-03-12 16:02 UTC (permalink / raw)
  To: speck

On Thu, Mar 12, 2020 at 08:25:01AM -0700, speck for Luck, Tony wrote:
> On Thu, Mar 12, 2020 at 01:34:50AM +0000, speck for Andrew Cooper wrote:
> > Particularly with this issue where it seems that no hypervisor is
> > interested in offering the knob to guest kernels (so could at least
> > infer based on SRBDS_CTRL), nor is there an ARCH_CAPS_$FOO_NO bit which
> > could be filled in by a hypervisor on unaffected or mitigated hardware.
> 
> Maybe that is the solution?  We allocated a s/w (VMM) bit in
> ARCH_CAPABILITIES for the TLB DoS issue for the nested hypervisor case.
> Should we do that again for this issue?

Bah, memory fail. I was actually thinking of bit 3 SKIP_L1DFL_VMENTRY
that told a nested VMM it didn't need to flush L1D again for L1TF as
an outer VMM was already handling that.

> If you think so, then we can take this discussion to Keybase so
> that the other VMM vendors who hang out there can chime in.
> 
> [Note that it isn't my call to allocate a bit in ARCH_CAPABILITIES,
> but I can campaign for it if folks want it].
> 
> -Tony

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1
  2020-03-12 16:02           ` Luck, Tony
@ 2020-03-12 16:45             ` Andrew Cooper
  0 siblings, 0 replies; 19+ messages in thread
From: Andrew Cooper @ 2020-03-12 16:45 UTC (permalink / raw)
  To: speck

[-- Attachment #1: Type: text/plain, Size: 1197 bytes --]

On 12/03/2020 16:02, speck for Luck, Tony wrote:
> On Thu, Mar 12, 2020 at 08:25:01AM -0700, speck for Luck, Tony wrote:
>> On Thu, Mar 12, 2020 at 01:34:50AM +0000, speck for Andrew Cooper wrote:
>>> Particularly with this issue where it seems that no hypervisor is
>>> interested in offering the knob to guest kernels (so could at least
>>> infer based on SRBDS_CTRL), nor is there an ARCH_CAPS_$FOO_NO bit which
>>> could be filled in by a hypervisor on unaffected or mitigated hardware.
>> Maybe that is the solution?  We allocated a s/w (VMM) bit in
>> ARCH_CAPABILITIES for the TLB DoS issue for the nested hypervisor case.
>> Should we do that again for this issue?
> Bah, memory fail. I was actually thinking of bit 3 SKIP_L1DFL_VMENTRY
> that told a nested VMM it didn't need to flush L1D again for L1TF as
> an outer VMM was already handling that.
>
>> If you think so, then we can take this discussion to Keybase so
>> that the other VMM vendors who hang out there can chime in.
>>
>> [Note that it isn't my call to allocate a bit in ARCH_CAPABILITIES,
>> but I can campaign for it if folks want it].

I think it would certainly be worth while asking.

~Andrew


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [MODERATED] Re: [PATCH 2/2] v3 more sampling fun 2
  2020-03-11 20:26 ` [PATCH 2/2] v3 more sampling fun 2 Thomas Gleixner
  2020-03-11 20:38   ` [MODERATED] " Andrew Cooper
  2020-03-11 23:23   ` mark gross
@ 2020-03-12 22:04   ` mark gross
  2020-03-13 15:21     ` Thomas Gleixner
  2 siblings, 1 reply; 19+ messages in thread
From: mark gross @ 2020-03-12 22:04 UTC (permalink / raw)
  To: speck

On Wed, Mar 11, 2020 at 09:26:18PM +0100, speck for Thomas Gleixner wrote:
> speck for mark gross <speck@linutronix.de> writes:
> > diff --git a/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst b/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst
> > new file mode 100644
> > index 000000000000..5acc7748f8e9
> > --- /dev/null
> > +++ b/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst
> > @@ -0,0 +1,145 @@
> > +.. SPDX-License-Identifier: GPL-2.0
> > +
> > +SRBDS - Special Register Buffer Data Sampling
> > +======================================
> 
> I doubt that this builds w/o warnings. And right:

It does now.

> 
> Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst:4: WARNING: Title underline too short.
> 
> SRBDS - Special Register Buffer Data Sampling
> ======================================^
fixed.
> 
> Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst:56: WARNING: Title underline too short.
> 
> Attack scenarios
> ---------------^
fixed.
> 
> Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst:76: WARNING: Unexpected indentation.
> Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst:77: WARNING: Block quote ends without a blank line; unexpected unindent.
fixed.
> 
> Sigh...
> 
> > +SRBDS is a hardware vulnerability that allows MDS techniques to infer values
> 
> lacks a link to the MDS documentation

will fix.

> 
> > +returned from special register accesses.  Special register accesses are
> > +accesses to off core registers.  According to Intels evaluation, the special
> > +register reads that have a security expectation of privacy are:
> > +RDRAND, RDSEED and SGX EGETKEY.
> 
> And what 
> 
> > +
> > +When RDRAND and RDSEED instructions are used the data is moved to the core
> > +through the special register mechanism.
> 
> This sentence is not providing any new information and can be packed in
> the above where you mention RDRAND, RDSEED already. EGETKEY is no instruction?
added " that is susceptible to MDS attacks."

> 
> > +Affected processors
> > +--------------------
> > +Core models (desktop, mobile, Xeon-E3) that implement RDRAND and/or RDSEED and
> > +are vulnerable to MFBDS (Micro architectural Fill Buffer Data
> > Sampling) variant
> 
> s/to/to the/
done.

> 
> > +of MDS (Micro architectural Data Sampling) or to TAA (TSX Asynchronous Abort)
> > +when TSX is enabled,
> > +
> > +  =============  ============  ========
> > +  common name    Family_Model  Stepping
> > +  =============  ============  ========
> > +  Ivybridge      06_3AH        All
> > +
> > +  Haswell        06_3CH        All
> > +  Haswell_L      06_45H        All
> > +  Haswell_G      06_46H        All
> > +
> > +  Broadwell_G    06_47H        All
> > +  Broadwell      06_3DH        All
> > +
> > +  Skylake_L      06_4EH        All
> > +  Skylake        06_5EH        All
> > +
> > +  Kabylake_L     06_8EH        <=A
> > +  Kabylake_L     06_8EH        0xB only if TSX is enabled
> > +  Kabylake_L     06_8EH        0xC only if TSX is enabled
> > +
> > +  Kabylake       06_9EH        <=B
> > +  Kabylake       06_9EH        0xC only if TSX is enabled
> > +  Kabylake       06_9EH        0xD only if TSX is enabled
> > +  =============  ============  ========
> 
> > +  =============  ============  ===========================
> 
> ditto at the top of the table.
I don't understand this feedback.


> 
> > +
> > +Related CVEs
> > +------------
> > +
> > +The following CVE entry is related to this SRBDS issue:
> > +
> > +    ==============  =====  ===================================================
> > +    CVE-2020--0543
> > +    ==============  =====  ===================================================
> 
> Is the void filled at some point?
Working on it.

> 
> > +Attack scenarios
> > +---------------
> > +An unprivileged user can extract returned values from RDRAND and RDSEED
> > +executed on another core or sibling thread using MDS techniques.
> 
> Lacks EGETKEY again.
no, egetkey is not an instruction for use outside an SGX enclave.

Also, the mitigation only alows MSR control of the mitigation for RDRAND and
RDSEED.  EGETKEY is always mitigated.

> 
> > +
> > +
> > +Mitigtion mechanism
> > +-------------------
> > +Intel will release microcode updates that modify the RDRAND, RDSEED, and
> > +EGETKEY instructions to overwrite secret special register data in the shared
> > +staging buffer before the secret data can be accessed by another logical
> > +processor.
> > +
> > +During execution of the RDRAND, RDSEED, or EGETKEY instruction, off-core
> > +accesses from other logical processors will be delayed until the special
> > +register read is complete and the secret data in the shared staging buffer is
> > +overwritten.
> > +
> > +This has three effects on performance:
> > +1 RDRAND, RDSEED, or EGETKEY instruction have higher latency.
> > +2 Executing RDRAND at the same time on multiple logical processors will be
> > +  serialized, resulting in an overall reduction in the maximum RDRAND bandwidth.
> > +3 Executing RDRAND, RDSEED or EGETKEY will delay memory accesses from other
> > +  logical processors that miss their core caches, with an impact similar to
> > +  legacy locked cache-line-split accesses.
> 
> I seriously doubt that you ever looked at the build output of this.
I can see why you doubt I ever looked at the output.  FWIW I used gitlab to
view the *.rst formatting and I was focused on fighting the table below to the
point I missed this section.

this list is fixed now.


> > +
> > +Because of the performance impact of the mitigation, the microcode updates also
> > +provide an opt-out mechanism (RNGDS_MITG_DIS) to disable the mitigation for
> > +RDRAND and RDSEED instructions executed outside of Intel Software Guard
> > +Extensions (Intel SGX) enclaves. On logical processors that disable the
> > +mitigation using this opt-out mechanism, RDRAND and RDSEED do not take longer
> > +to execute and do not impact performance of sibling logical processors memory
> > +accesses. The opt-out mechanism does not affect Intel SGX enclaves (including
> > +execution of RDRAND or RDSEED inside of an enclave, as well as EGETKEY
> > +execution).
> > +
> > +IA32_MCU_OPT_CTRL MSR Definition
> > +--------------------------------
> > +Along with the mitigation for this issue, Intel added a new thread-scope
> > +IA32_MCU_OPT_CTRL MSR, (address 0x123). The presence of this MSR and
> > +RNGDS_MITG_DIS (bit 0) is enumerated by CPUID.(EAX=07H,ECX=0).EDX[SRBDS_CTRL =
> > +9]==1. This MSR is introduced through the microcode update.
> > +
> > +Setting IA32_MCU_OPT_CTRL[0] (RNGDS_MITG_DIS) to 1 for a logical processor
> > +disables the mitigation for RDRAND and RDSEED executed outside of an Intel SGX
> > +enclave on that logical processor. Opting out of the mitigation for a
> > +particular logical processor does not affect the RDRAND and RDSEED mitigations
> > +for other logical processors.
> > +
> > +Note that inside of an Intel SGX enclave, the mitigation is applied regardless
> > +of the value of RNGDS_MITG_DS.
> > +
> > +Mitigation control on the kernel command line
> > +---------------------------------------------
> > +The kernel command line allows for control over the SRBDS mitigation
> > at boot
> 
> allows control (methinks)
ok.

> 
> > +time with the option "srbds=".  The option for this is:
> > +
> > +  ============= ============================================================
> > +  off           This option disables SRBS mitigation for RDRAND and RDSEED on
> > +                affected platforms.
> > +  ============= ============================================================
> > +
> > +SRBS System Information
> > +-----------------------
> > +The Linux kernel provides vulnerability status information through sysfs.  For
> > +SRBDS this can be accessed by the following sysfs file:
> > +/sys/devices/system/cpu/vulnerabilities/special_register_data_sampling
> > +
> > +The possible values contained in this file are:
> > +
> > + ============================== ===========================================
> > + Not affected                   Processor not vulnerable
> > + Vulnerable                     Processor vulnerable and mitigation disabled
> > + Vulnerable: no microcode       Processor vulnerable and microcode is missing
> > +                                mitigation
> > + Mitigated                      Processor is vulnerable and mitigation is in
> > +                                effect.
> > + Not affected (TSX disabled)    Processor is only vulnerable when TSX is
> > +                                enabled while this system was booted with TSX
> > +                                disabled.
> > + Unknown                        Running on virtual guest processor that is
> > +                                affected but with no way to know if host
> > +                                processor is mitigated or vulnerable.
> > + ============================== ===========================================
> > +
> > +Default mitigations
> > +-------------------
> > +This new microcode serializes processor access during execution of RDRAND,
> > +RDSEED ensures that the shared buffer is overwritten before it is released for
> > +reuse.
> 
> Errm. What has this to do with the default chosen by the kernel?
its a statement that if the kernel does nothing then the mitigation is in
effect.


thanks for the feedback.  Please respond about the comment I didn't understand.

--mark

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/2] v3 more sampling fun 2
  2020-03-12 22:04   ` mark gross
@ 2020-03-13 15:21     ` Thomas Gleixner
  0 siblings, 0 replies; 19+ messages in thread
From: Thomas Gleixner @ 2020-03-13 15:21 UTC (permalink / raw)
  To: speck

Mark,

speck for mark gross <speck@linutronix.de> writes:
> On Wed, Mar 11, 2020 at 09:26:18PM +0100, speck for Thomas Gleixner wrote:
>> speck for mark gross <speck@linutronix.de> writes:
>> > +  =============  ============  ========
>> > +  common name    Family_Model  Stepping
>> > +  =============  ============  ========
>> > +  Ivybridge      06_3AH        All
>> > +
>> > +  Haswell        06_3CH        All
>> > +  Haswell_L      06_45H        All
>> > +  Haswell_G      06_46H        All
>> > +
>> > +  Broadwell_G    06_47H        All
>> > +  Broadwell      06_3DH        All
>> > +
>> > +  Skylake_L      06_4EH        All
>> > +  Skylake        06_5EH        All
>> > +
>> > +  Kabylake_L     06_8EH        <=A
>> > +  Kabylake_L     06_8EH        0xB only if TSX is enabled
>> > +  Kabylake_L     06_8EH        0xC only if TSX is enabled
>> > +
>> > +  Kabylake       06_9EH        <=B
>> > +  Kabylake       06_9EH        0xC only if TSX is enabled
>> > +  Kabylake       06_9EH        0xD only if TSX is enabled
>> > +  =============  ============  ========
>> 
>> > +  =============  ============  ===========================
>> 
>> ditto at the top of the table.
> I don't understand this feedback.

The length of '===' must be at least as long as the text in the rows. At
least that was a requirement in the past. Maybe it got relaxed, but even
then it looks more consistent:

  =============  ============  ========
  Kabylake       06_9EH        0xD only if TSX is enabled
  =============  ============  ========

vs.

  =============  ============  ===========================
  Kabylake       06_9EH        0xD only if TSX is enabled
  =============  ============  ===========================

>> > +Attack scenarios
>> > +---------------
>> > +An unprivileged user can extract returned values from RDRAND and RDSEED
>> > +executed on another core or sibling thread using MDS techniques.
>> 
>> Lacks EGETKEY again.
> no, egetkey is not an instruction for use outside an SGX enclave.
>
> Also, the mitigation only alows MSR control of the mitigation for RDRAND and
> RDSEED.  EGETKEY is always mitigated.

This is talking about attack scenarios not about mitigations.

>> > +Default mitigations
>> > +-------------------
>> > +This new microcode serializes processor access during execution of RDRAND,
>> > +RDSEED ensures that the shared buffer is overwritten before it is released for
>> > +reuse.
>> 
>> Errm. What has this to do with the default chosen by the kernel?
> its a statement that if the kernel does nothing then the mitigation is in
> effect.

Then please write it in a way which makes it clear what the kernel does
by default.

  If updated microcode is available, the microcode mitigation which
  overwrites the shared buffer is enabled by default.

Or something to that effect.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1
  2020-03-11 20:02 ` Thomas Gleixner
@ 2020-03-17 18:56   ` mark gross
  0 siblings, 0 replies; 19+ messages in thread
From: mark gross @ 2020-03-17 18:56 UTC (permalink / raw)
  To: speck

On Wed, Mar 11, 2020 at 09:02:52PM +0100, speck for Thomas Gleixner wrote:
> Mark,
> 
> speck for mark gross <speck@linutronix.de> writes:
> > From: mark gross <mgross@linux.intel.com>
> >
> > This patch:
> 
> git grep 'This patch' Documentation/process/

Interpretive mood / give orders to the codebase to change its behavior.

ok.

> 
> > * enables administrator to configure the mitigation off when desired
> >   using either mitigations=off or srbds=off.
> > * exports vulnerability status via sysfs
> >  
> > +/*
> > + * Match a range of steppings
> > + */
> > +
> > +struct x86_cpu_id_ext {
> > +	struct x86_cpu_id id;
> > +	__u16 steppings; /* bit map of steppings to match against */
> 
> IIRC, we asked for adding the stepping to the existing data structure,
> but I can't find any rationale somewhere why this is still separate.

Changed to change the x86_cpu_id to append a "steppings" member at the end.

> 
> If you really think hard about it then this is not needed at all. See
> below.
> 
> > +static bool srbds_off;
> > +
> > +void srbds_configure_mitigation(void)
> > +{
> > +	u64 mcu_ctrl;
> > +
> > +	if (srbds_mitigation == SRBDS_NOT_AFFECTED)
> > +		return;
> > +
> > +	if (srbds_mitigation == SRBDS_HYPERVISOR)
> > +		return;
> > +
> > +	if (srbds_mitigation == SRBDS_MITIGATION_UCODE_NEEDED)
> > +		return;
> > +
> > +	rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
> > +
> > +	switch (srbds_mitigation) {
> > +	case SRBDS_MITIGATION_OFF:
> > +	case SRBDS_TSX_NOT_AFFECTED:
> 
> This mitigation state confuses the hell out of me. The text says:
> 
>  +	[SRBDS_TSX_NOT_AFFECTED]	= "Not affected (TSX disabled)",
> 
> But the enum value reads to me: TSX is not affected....
> 
>     SRBDS_NOT_AFFECTED_TSX_OFF
> 
> is a bit more intuitive. Hmm?
changed to SRBDS_NOT_AFFECTED_TSX_OFF.

> 
> > +		mcu_ctrl |= RNGDS_MITG_DIS;
> > +		break;
> > +	case SRBDS_MITIGATION_FULL:
> > +		mcu_ctrl &= ~RNGDS_MITG_DIS;
> > +		break;
> > +	default:
> > +		break;
> > +	}
> > +
> > +	wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
> > +}
> > +
> > +static void __init srbds_select_mitigation(void)
> > +{
> > +	u64 ia32_cap;
> > +
> > +	if (!boot_cpu_has_bug(X86_BUG_SRBDS)) {
> > +		srbds_mitigation = SRBDS_NOT_AFFECTED;
> > +		return;
> > +	}
> > +
> > +	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
> > +		srbds_mitigation = SRBDS_HYPERVISOR;
> > +		return;
> > +	}
> > +
> > +	if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL)) {
> > +		srbds_mitigation = SRBDS_MITIGATION_UCODE_NEEDED;
> > +		return;
> > +	}
> > +
> > +	if (boot_cpu_has_bug(X86_BUG_SRBDS)) {
> > +		srbds_mitigation = SRBDS_MITIGATION_FULL;
> > +
> > +		ia32_cap = x86_read_arch_cap_msr();
> > +		if (ia32_cap & ARCH_CAP_MDS_NO) {
> > +			if (!boot_cpu_has(X86_FEATURE_RTM))
> > +				srbds_mitigation = SRBDS_TSX_NOT_AFFECTED;
> 
> This logic comes with an awesome amount of comments...
Added comment about checking to see if this is one of the MDS_NO systems that
supports TSX where they are only vulnerable if TSX is enabled.

> 
> > +		}
> > +	}
> > +
> > +	if (cpu_mitigations_off() || srbds_off) {
> > +		if (srbds_mitigation != SRBDS_TSX_NOT_AFFECTED)
> > +			srbds_mitigation = SRBDS_MITIGATION_OFF;
> > +	}
> > +
> > +	srbds_configure_mitigation();
> > +}
> > +
> > +static int __init srbds_parse_cmdline(char *str)
> > +{
> > +	if (!str)
> > +		return -EINVAL;
> > +
> > +	if (!strcmp(str, "off"))
> > +		srbds_off = true;
> > +
> > +	return 0;
> > +}
> > +
> 
> stray newline
> 
> > +early_param("srbds", srbds_parse_cmdline);
> > +
> 
> >  #define VULNWL(_vendor, _family, _model, _whitelist)	\
> >  	{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
> > @@ -1020,6 +1021,15 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
> >  #define VULNWL_HYGON(family, whitelist)		\
> >  	VULNWL(HYGON, family, X86_MODEL_ANY, whitelist)
> >  
> > +#define VULNWL_EXT(_vendor, _family, _model, _steppings, _whitelist)	\
> > +	{ VULNWL(_vendor, _family, _model, _whitelist), _steppings }
> > +
> 
> And because this is used for a blacklist the prefix VULNWL, aka
> VULNerability White List, and the last argument make a lot of sense,
> right?

right.  Would it be ok to s/whitelist/issues or issue_mask for all these?  That
structure is a bitmask and not a list anyway.

> 
> > +#define VULNWL_INTEL_EXT(model,  whitelist)		\
> > +	VULNWL_EXT(INTEL, 6, INTEL_FAM6_##model, X86_STEPPING_ANY, whitelist)
> > +
> > +#define VULNWL_INTEL_STEPPING(model, stepping, whitelist)		\
> > +	VULNWL_EXT(INTEL, 6, INTEL_FAM6_##model, stepping, whitelist)
> > +
> >  static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
> >  	VULNWL(ANY,	4, X86_MODEL_ANY,	NO_SPECULATION),
> >  	VULNWL(CENTAUR,	5, X86_MODEL_ANY,	NO_SPECULATION),
> > @@ -1075,6 +1085,27 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
> >  	{}
> >  };
> >  
> > +/*
> > + * to avoide corrupting the whiltelist with blacklist items lets create a list
> 
> Sentences start with uppercase letters and spell checking is available
> in most editors. Now for the content:
> 
> There is nothing to corrupt. Blacklists and whitelists do not mix.
> 
> Also what means 'lets create' here? This is a comment describing what
> the following array is used for. Facts please.
Changed / reworded and spell checked.

> 
> > + * of affected processors for issues that cannot be enumerated other than by
> > + * family/model/stepping
> > + */
> > +static const struct x86_cpu_id_ext affected_cpus[] __initconst = {
> > +	VULNWL_INTEL_EXT(IVYBRIDGE,		SRBDS),
> 
> 
> > +	VULNWL_INTEL_EXT(HASWELL,		SRBDS),
> > +	VULNWL_INTEL_EXT(HASWELL_L,		SRBDS),
> > +	VULNWL_INTEL_EXT(HASWELL_G,		SRBDS),
> > +	VULNWL_INTEL_EXT(BROADWELL_G,		SRBDS),
> > +	VULNWL_INTEL_EXT(BROADWELL,		SRBDS),
> > +	VULNWL_INTEL_EXT(SKYLAKE_L,		SRBDS),
> > +	VULNWL_INTEL_EXT(SKYLAKE,		SRBDS),
> > +	VULNWL_INTEL_STEPPING(KABYLAKE_L, GENMASK(0xA, 0),	SRBDS), /*06_8E steppings <=A*/
> > +	VULNWL_INTEL_STEPPING(KABYLAKE_L, GENMASK(0xC, 0xB),	SRBDS),
> > /*06_8E stepping = 0xB|0xC if TSX enabled*/
> 
> This is beyond confusing because this should either be expressed in the
> vulnerability itself, i.e. SRBDS_TSX_ONLY, or just commented along with
> the comment in srbds_select_mitigation()

reduced by 2 entries and removed talk of TSX enabled

> 
> > +	VULNWL_INTEL_STEPPING(KABYLAKE, GENMASK(0xB, 0),	SRBDS), /*06_9E steppings <=B*/
> > +	VULNWL_INTEL_STEPPING(KABYLAKE, GENMASK(0xD, 0xC),	SRBDS), /*06_9E stepping = 0xC if TSX enabled*/
> 
>         Comment and code do not match.
Fixed.

> 
> Aside of this whole thing is utter garbage, really.
> 
> #define X86_STEPPING_MAX	15
> #define STEPSHIFT		16
> 
> #define ISVULN(_vendor, family, model, minstep, maxstep, vulns)		\
> 	{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY,	\
>           GENMASK(minstep, maxstep) << STEPSHIFT | vulns }
> 
> #define ISVULN_INTEL(model, minstep, maxstep, vulns)			\
> 	ISVULN(INTEL, 6, INTEL_FAM6_##model, minstep, maxstep, vulns)
> 
> ....
> 
> /* List of affected CPUs identified by model and stepping range. */
> static const struct x86_cpu_id affected_cpus[] __initconst = {
> 	ISVULN_INTEL(HASWELL,		0, X86_STEPPING_MAX,	SRBDS),
> 	ISVULN_INTEL(BROADWELL_G,	0, X86_STEPPING_MAX,	SRBDS),
> 
> 	/* Kabylake L steppings 0xB, 0xC only affected when TSX in on */
> 	ISVULN_INTEL(KABYLAKE_L,	0, 0xC,			SRBDS),
> 
>         /* Kabylake steppings 0xC, 0xD only affected when TSX in on */
> 	ISVULN_INTEL(KABYLAKE,		0, 0xD,			SRBDS),
>         {}
> };
> 
> Now:
> 
> static bool __init cpu_matches(unsigned long which)
> {
>   	const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist);
> 
> -	return m && !!(m->driver_data & which);
> +  	return m && (m->driver_data & which) == which;
> }
>   
Yes, this is a clever use of encoding steppings into 16 bits of the driver_data
member but, its not very straight forward compared to adding a steppings data
member to the end of the existing structure.


> > +static bool __init cpu_affected(unsigned long which)
> > +{
> > +	const struct x86_cpu_id_ext *m = x86_match_cpu_ext(affected_cpus);
> > +
> > +	return m && !!(m->id.driver_data & which);
> > +}
> 
> Which makes this go away
> 
> >  u64 x86_read_arch_cap_msr(void)
> >  {
> >  	u64 ia32_cap = 0;
> > @@ -1124,6 +1162,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
> >  	if (!cpu_matches(NO_SWAPGS))
> >  		setup_force_cpu_bug(X86_BUG_SWAPGS);
> >  
> > +	if (cpu_affected(SRBDS))
> > +		setup_force_cpu_bug(X86_BUG_SRBDS);
> 
> and this becomes:
> 
> +    	if (cpu_matches(BIT(boot_cpu_data.x86_stepping + STEPSHIFT) | which))
> +		setup_force_cpu_bug(X86_BUG_SRBDS);
> 
> Too much code reuse, right?
Maybe.

> 
> >  	/*
> >  	 * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
> >  	 *	- TSX is supported or
> > diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
> > index 37fdefd14f28..22d419080fd6 100644
> > --- a/arch/x86/kernel/cpu/cpu.h
> > +++ b/arch/x86/kernel/cpu/cpu.h
> > @@ -44,7 +44,20 @@ struct _tlb_table {
> >  extern const struct cpu_dev *const __x86_cpu_dev_start[],
> >  			    *const __x86_cpu_dev_end[];
> >  
> > +enum srbds_mitigations {
> > +	SRBDS_NOT_AFFECTED,
> > +	SRBDS_MITIGATION_OFF,
> > +	SRBDS_MITIGATION_UCODE_NEEDED,
> > +	SRBDS_MITIGATION_FULL,
> > +	SRBDS_TSX_NOT_AFFECTED,
> > +	SRBDS_HYPERVISOR,
> > +};
> > +
> > +extern __ro_after_init enum srbds_mitigations srbds_mitigation;
> 
> And this needs to be public because the only user is in bugs.c, right?
right.  this is now gone.

> 
> > +void srbds_configure_mitigation(void);
> > +
> >  #ifdef CONFIG_CPU_SUP_INTEL
> > +
> >  enum tsx_ctrl_states {
> >  	TSX_CTRL_ENABLE,
> >  	TSX_CTRL_DISABLE,
> > diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
> > index be82cd5841c3..1b083a2a415b 100644
> > --- a/arch/x86/kernel/cpu/intel.c
> > +++ b/arch/x86/kernel/cpu/intel.c
> > @@ -684,6 +684,8 @@ static void init_intel(struct cpuinfo_x86 *c)
> >  		tsx_enable();
> >  	if (tsx_ctrl_state == TSX_CTRL_DISABLE)
> >  		tsx_disable();
> > +
> > +	srbds_configure_mitigation();
> >  }
> >  
> >  #ifdef CONFIG_X86_32
> > diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
> > index 6dd78d8235e4..118c503b1c36 100644
> > --- a/arch/x86/kernel/cpu/match.c
> > +++ b/arch/x86/kernel/cpu/match.c
> > @@ -49,6 +49,32 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
> >  }
> >  EXPORT_SYMBOL(x86_match_cpu);
> >  
> > +/*
> > + * Extend x86_match_cpu to support matching a range of steppings.
> > + */
> > +const struct x86_cpu_id_ext *x86_match_cpu_ext(const struct x86_cpu_id_ext *match)
> > +{
> > +	const struct x86_cpu_id_ext *m;
> > +	struct cpuinfo_x86 *c = &boot_cpu_data;
> > +
> > +	for (m = match; m->id.vendor | m->id.family | m->id.model | m->id.feature; m++) {
> > +		if (m->id.vendor != X86_VENDOR_ANY && c->x86_vendor != m->id.vendor)
> > +			continue;
> > +		if (m->id.family != X86_FAMILY_ANY && c->x86 != m->id.family)
> > +			continue;
> > +		if (m->id.model != X86_MODEL_ANY && c->x86_model != m->id.model)
> > +			continue;
> > +		if (m->steppings != X86_STEPPING_ANY &&
> > +		    !(BIT(c->x86_stepping) & m->steppings))
> > +			continue;
> > +		if (m->id.feature != X86_FEATURE_ANY && !cpu_has(c, m->id.feature))
> > +			continue;
> > +		return m;
> > +	}
> > +	return NULL;
> > +}
> > +EXPORT_SYMBOL(x86_match_cpu_ext);
> 
> Sigh, aside of being pointless duplicated code:
> 
> If we'd really need this then it can share most of the code with
> x86_match_cpu(), but copy and paste is more fancy, right? You even
> copied the export just in case ...

duplication is removed. function refactored.

> 
> >  static const struct x86_cpu_desc *
> >  x86_match_cpu_with_stepping(const struct x86_cpu_desc *match)
> >  {
> > diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
> > index 6265871a4af2..d69e094e790c 100644
> > --- a/drivers/base/cpu.c
> > +++ b/drivers/base/cpu.c
> > @@ -567,6 +567,12 @@ ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
> >  	return sprintf(buf, "Not affected\n");
> >  }
> >  
> > +ssize_t __weak cpu_show_special_register_data_sampling(struct device *dev,
> > +						       struct device_attribute *attr, char *buf)
> > +{
> > +	return sprintf(buf, "Not affected\n");
> > +}
> > +
> >  static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
> >  static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
> >  static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
> > @@ -575,6 +581,7 @@ static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
> >  static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
> >  static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
> >  static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
> > +static DEVICE_ATTR(special_register_data_sampling, 0444, cpu_show_special_register_data_sampling, NULL);
> > 
> >  static struct attribute *cpu_root_vulnerabilities_attrs[] = {
> >  	&dev_attr_meltdown.attr,
> > @@ -585,6 +592,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
> >  	&dev_attr_mds.attr,
> >  	&dev_attr_tsx_async_abort.attr,
> >  	&dev_attr_itlb_multihit.attr,
> > +	&dev_attr_special_register_data_sampling.attr,
> 
> This still lacks an entry in:
> 
>   Documentation/ABI/testing/sysfs-devices-system-cpu
> 
> as requested by Greg several times.
Done.

I like your review feedback, its not ambiguous.

Thank you.

I should have an updated posting hopefully tomorrow.
--mark

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2020-03-17 18:56 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-11 15:39 [MODERATED] [PATCH 0/2] v3 more sampling fun 0 mark gross
2020-01-16 22:16 ` [MODERATED] [PATCH 1/2] v3 more sampling fun 1 mark gross
2020-01-30 19:12 ` [MODERATED] [PATCH 2/2] v3 more sampling fun 2 mark gross
     [not found] ` <5e690bea.1c69fb81.16d6d.4b78SMTPIN_ADDED_BROKEN@mx.google.com>
2020-03-11 17:21   ` [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1 Greg KH
2020-03-11 23:09     ` mark gross
2020-03-11 20:02 ` Thomas Gleixner
2020-03-17 18:56   ` [MODERATED] " mark gross
2020-03-11 20:26 ` [PATCH 2/2] v3 more sampling fun 2 Thomas Gleixner
2020-03-11 20:38   ` [MODERATED] " Andrew Cooper
2020-03-11 23:23   ` mark gross
2020-03-12 22:04   ` mark gross
2020-03-13 15:21     ` Thomas Gleixner
2020-03-11 20:28 ` [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1 Andrew Cooper
2020-03-11 23:18   ` mark gross
2020-03-12  0:25     ` Luck, Tony
2020-03-12  1:34       ` Andrew Cooper
2020-03-12 15:25         ` Luck, Tony
2020-03-12 16:02           ` Luck, Tony
2020-03-12 16:45             ` Andrew Cooper

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).