All of lore.kernel.org
 help / color / mirror / Atom feed
* [MODERATED] [PATCH v2] Linux Patch 1/1
@ 2018-04-26  0:11 Tim Chen
  2018-04-26  3:15 ` [MODERATED] " Jon Masters
                   ` (3 more replies)
  0 siblings, 4 replies; 46+ messages in thread
From: Tim Chen @ 2018-04-26  0:11 UTC (permalink / raw)
  To: speck


[-- Attachment #1.1: Type: text/plain, Size: 14210 bytes --]


From bf437945b231c9daaa3b927f50e57a868b7757d9 Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Mon, 23 Apr 2018 05:57:53 -0700
Subject: [PATCH v2] Linux Patch 1/1 

x86/ssb: Enable option to mitigate SSB by explicit prctl

Processes that are unable to use other software mitigations and execute
untrusted code may want to disable speculative store bypass. This patch
adds a new prctl : PR_SET_RDS_MODE.
The prctl allows the enable/disable of reduced data speculation mode
to mitigate speculative store bypass vulnerability of the
current process.

The prctl mode is inherited to children to handle threads and allow wrappers,
but it is not broadcast to all threads in the same process. It is generally
safe to inherit because it doesn't allow any new attacks, but just prevents
them.

It also adds a matching prctl PR_GET_RDS_MODE to retrieve that state.

This patch is applied on top of v4 of Konrad's SSB patchset.

v2:
Switch from using RDS on seccomp processes to using an explicit
prctl to enable RDS.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 Documentation/admin-guide/kernel-parameters.txt |  4 ++
 arch/x86/include/asm/nospec-branch.h            | 17 +++++
 arch/x86/include/asm/thread_info.h              |  2 +
 arch/x86/kernel/cpu/bugs.c                      | 86 +++++++++++++++++++++++--
 arch/x86/kernel/cpu/intel.c                     |  4 +-
 arch/x86/kernel/process_64.c                    | 12 ++++
 include/uapi/linux/prctl.h                      | 12 ++++
 kernel/fork.c                                   |  8 +++
 kernel/sys.c                                    | 16 +++++
 9 files changed, 155 insertions(+), 6 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a2d337b..1f95161 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4053,6 +4053,10 @@
 			auto - kernel detects whether your CPU model contains a
 			       vulnerable implementation of Speculative Store
 			       Bypass and picks the most appropriate mitigation
+			prctl - disable Speculative Store Bypass for processes
+				via prctl. Processes run normally with
+				Speculative Store bypass by default unless
+				told otherwise by prctl.
 
 			Not specifying this option is equivalent to
 			spec_store_bypass_disable=auto.
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 6f3d224..a6bb9da 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -3,10 +3,12 @@
 #ifndef _ASM_X86_NOSPEC_BRANCH_H_
 #define _ASM_X86_NOSPEC_BRANCH_H_
 
+#include <linux/jump_label.h>
 #include <asm/alternative.h>
 #include <asm/alternative-asm.h>
 #include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
+#include <asm/percpu.h>
 
 /*
  * Fill the CPU return stack buffer.
@@ -242,11 +244,15 @@ extern void x86_restore_host_spec_ctrl(u64);
 enum ssb_mitigation {
 	SPEC_STORE_BYPASS_NONE,
 	SPEC_STORE_BYPASS_DISABLE,
+	SPEC_STORE_BYPASS_PRCTL,
 };
 
 extern char __indirect_thunk_start[];
 extern char __indirect_thunk_end[];
 
+/* cpu's MSR_IA32_SPEC_CTRL state for speculation store bypass mitigation */
+DECLARE_PER_CPU(int, rds_msr_val);
+
 /*
  * On VMEXIT we must ensure that no RSB predictions learned in the guest
  * can be followed in the host, by overwriting the RSB completely. Both
@@ -310,6 +316,17 @@ do {									\
 	preempt_enable();						\
 } while (0)
 
+/*
+ * speculation store bypass vulnerabilities mitigation functions.
+ *
+ * Should be called only from pre-emption off paths.
+ *
+ */
+
+void enable_speculative_store_bypass_mitigation(void);
+void disable_speculative_store_bypass_mitigation(void);
+
+DECLARE_STATIC_KEY_FALSE(specctrl_rds);
 #endif /* __ASSEMBLY__ */
 
 /*
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a5d9521..f401321 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -79,6 +79,7 @@ struct thread_info {
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
+#define TIF_RDS			5	/* Reduced data speculation */
 #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
@@ -105,6 +106,7 @@ struct thread_info {
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
+#define _TIF_RDS		(1 << TIF_RDS)
 #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index f519214..10819f7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -12,6 +12,7 @@
 #include <linux/utsname.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
+#include <linux/prctl.h>
 
 #include <asm/nospec-branch.h>
 #include <asm/cmdline.h>
@@ -133,7 +134,7 @@ EXPORT_SYMBOL_GPL(x86_set_spec_ctrl);
 
 u64 x86_get_spec_ctrl(void)
 {
-	return x86_spec_ctrl_base;
+	return x86_spec_ctrl_base | __this_cpu_read(rds_msr_val);
 }
 EXPORT_SYMBOL_GPL(x86_get_spec_ctrl);
 
@@ -148,10 +149,12 @@ EXPORT_SYMBOL_GPL(x86_set_guest_spec_ctrl);
 
 void x86_restore_host_spec_ctrl(u64 guest_spec_ctrl)
 {
-	if (x86_get_spec_ctrl() == guest_spec_ctrl)
+	u64 host_val = x86_get_spec_ctrl();
+
+	if (host_val == guest_spec_ctrl)
 		return;
 	else
-		wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+		wrmsrl(MSR_IA32_SPEC_CTRL, host_val);
 }
 EXPORT_SYMBOL_GPL(x86_restore_host_spec_ctrl);
 
@@ -376,16 +379,26 @@ static void __init spectre_v2_select_mitigation(void)
 
 static enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE;
 
+/* dynamic reduced data speculation in use */
+DEFINE_STATIC_KEY_FALSE(specctrl_rds);
+EXPORT_SYMBOL(specctrl_rds);
+
+/* current MSR_IA32_SPEC_CTRL state for ssb mitigation */
+DEFINE_PER_CPU(int, rds_msr_val);
+EXPORT_SYMBOL(rds_msr_val);
+
 /* The kernel command line selection */
 enum ssb_mitigation_cmd {
 	SPEC_STORE_BYPASS_CMD_NONE,
 	SPEC_STORE_BYPASS_CMD_AUTO,
 	SPEC_STORE_BYPASS_CMD_ON,
+	SPEC_STORE_BYPASS_CMD_PRCTL,
 };
 
 static const char *ssb_strings[] = {
 	[SPEC_STORE_BYPASS_NONE]	= "Vulnerable",
-	[SPEC_STORE_BYPASS_DISABLE]	= "Mitigation: Speculative Store Bypass disabled"
+	[SPEC_STORE_BYPASS_DISABLE]	= "Mitigation: Speculative Store Bypass disabled",
+	[SPEC_STORE_BYPASS_PRCTL]	= "Mitigation: Speculative Store Bypass disabled via prctl"
 };
 
 static const struct {
@@ -395,8 +408,61 @@ static const struct {
 	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
 	{ "on",		SPEC_STORE_BYPASS_CMD_ON },   /* Disable Speculative Store Bypass */
 	{ "off",	SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
+	{ "prctl",	SPEC_STORE_BYPASS_CMD_PRCTL }, /* disable Speculative Store Bypass via prctl */
 };
 
+void enable_speculative_store_bypass_mitigation(void)
+{
+       if (static_branch_unlikely(&specctrl_rds) &&
+           /* need to toggle msr? */
+           __this_cpu_read(rds_msr_val) != SPEC_CTRL_RDS) {
+               wrmsrl(MSR_IA32_SPEC_CTRL, SPEC_CTRL_RDS | x86_spec_ctrl_base);
+               __this_cpu_write(rds_msr_val, SPEC_CTRL_RDS);
+       }
+}
+
+void disable_speculative_store_bypass_mitigation(void)
+{
+       if (static_branch_unlikely(&specctrl_rds) &&
+           /* need to toggle msr? */
+           __this_cpu_read(rds_msr_val) != 0) {
+               wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+               __this_cpu_write(rds_msr_val, 0);
+       }
+}
+
+int arch_prctl_set_rds(struct task_struct *p, unsigned long val)
+{
+	if (val == PR_RDS_MODE_ENABLE) {
+		set_tsk_thread_flag(p, TIF_RDS);
+		if (p == current)
+			enable_speculative_store_bypass_mitigation();
+	} else if (val == PR_RDS_MODE_DISABLE) {
+		clear_tsk_thread_flag(p, TIF_RDS);
+		if (p == current)
+			disable_speculative_store_bypass_mitigation();
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+int arch_prctl_get_rds(struct task_struct *p)
+{
+	if (test_tsk_thread_flag(p, TIF_RDS))
+		return PR_RDS_MODE_ENABLE;
+
+	return PR_RDS_MODE_DISABLE;
+}
+
+void arch_prctl_copy_rds_mode(struct task_struct *parent, struct task_struct *child)
+{
+	if (test_tsk_thread_flag(parent, TIF_RDS))
+		set_tsk_thread_flag(child, TIF_RDS);
+	else
+		clear_tsk_thread_flag(child, TIF_RDS);
+}
+
 static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
 {
 	char arg[20];
@@ -445,6 +511,7 @@ static void __init ssb_select_mitigation(void)
 	     cmd == SPEC_STORE_BYPASS_CMD_AUTO))
 		return;
 
+again:
 	switch (cmd) {
 	case SPEC_STORE_BYPASS_CMD_AUTO:
 		/*
@@ -452,9 +519,17 @@ static void __init ssb_select_mitigation(void)
 		 */
 		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
 			break;
+		/* Choose prctl as the default mode */
+		cmd = SPEC_STORE_BYPASS_CMD_PRCTL;
+		goto again;
+		break;
 	case SPEC_STORE_BYPASS_CMD_ON:
 		mode = SPEC_STORE_BYPASS_DISABLE;
 		break;
+	case SPEC_STORE_BYPASS_CMD_PRCTL:
+		mode = SPEC_STORE_BYPASS_PRCTL;
+		static_branch_enable(&specctrl_rds);
+		break;
 	case SPEC_STORE_BYPASS_CMD_NONE:
 		break;
 	}
@@ -463,7 +538,8 @@ static void __init ssb_select_mitigation(void)
 	pr_info("%s\n", ssb_strings[mode]);
 
 	if (mode != SPEC_STORE_BYPASS_NONE) {
-		if (boot_cpu_has(X86_FEATURE_RDS))
+		if (boot_cpu_has(X86_FEATURE_RDS) &&
+		    !static_branch_unlikely(&specctrl_rds))
 			x86_spec_ctrl_base |= SPEC_CTRL_RDS;
 
 		setup_force_cpu_cap(X86_FEATURE_DISABLE_SSB);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index e0f0596..98ca193 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -772,7 +772,9 @@ static void init_intel(struct cpuinfo_x86 *c)
 
 	init_intel_misc_features(c);
 
-	if (cpu_has(c, X86_FEATURE_DISABLE_SSB) && cpu_has(c, X86_FEATURE_RDS)) {
+	if (cpu_has(c, X86_FEATURE_DISABLE_SSB) &&
+	    cpu_has(c, X86_FEATURE_RDS) &&
+	    !static_branch_unlikely(&specctrl_rds)) {
 		x86_set_spec_ctrl(SPEC_CTRL_RDS);
 	}
 }
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4b100fe..c8c5cc4 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -54,6 +54,7 @@
 #include <asm/vdso.h>
 #include <asm/intel_rdt_sched.h>
 #include <asm/unistd.h>
+#include <asm/nospec-branch.h>
 #ifdef CONFIG_IA32_EMULATION
 /* Not included via unistd.h */
 #include <asm/unistd_32_ia32.h>
@@ -529,6 +530,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/* Load the Intel cache allocation PQR MSR. */
 	intel_rdt_sched_in();
 
+	if (test_tsk_thread_flag(next_p, TIF_RDS))
+		/*
+		 * Reduce data speculation for processes that
+		 * have reduced data speculation option turned
+		 * on via prctl.  This provides protection against speculation
+		 * store bypass attack for untrusted code within the process.
+		 */
+		enable_speculative_store_bypass_mitigation();
+	else
+		disable_speculative_store_bypass_mitigation();
+
 	return prev_p;
 }
 
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index af5f8c2..7635b9a 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -207,4 +207,16 @@ struct prctl_mm_map {
 # define PR_SVE_VL_LEN_MASK		0xffff
 # define PR_SVE_VL_INHERIT		(1 << 17) /* inherit across exec */
 
+/*
+ * Reduced Data Speculation controls
+ *
+ * Set this on a task so it will run with reduced level of data speculation
+ * to mitigate against speculative store bypass attack.
+ */
+#define PR_SET_RDS_MODE		53	/* set reduced data speculation mode of process */
+#define PR_RDS_MODE_DISABLE	0       /* Disable reduced data speculation mode */
+#define PR_RDS_MODE_ENABLE	1       /* Enable reduced data speculation mode */
+
+#define PR_GET_RDS_MODE		54	/* get reduced data speculation mode of process */
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index a5d21c4..8fe9c80 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1561,6 +1561,11 @@ static inline void rcu_copy_process(struct task_struct *p)
 #endif /* #ifdef CONFIG_TASKS_RCU */
 }
 
+void __weak arch_prctl_copy_rds_mode(struct task_struct *parent, struct task_struct *child)
+{
+	return;
+}
+
 /*
  * This creates a new process as a copy of the old one,
  * but does not actually start it yet.
@@ -1633,6 +1638,9 @@ static __latent_entropy struct task_struct *copy_process(
 	if (!p)
 		goto fork_out;
 
+	/* Copy the reduced data speculation mode from parent to child */
+	arch_prctl_copy_rds_mode(current, p);
+
 	/*
 	 * This _must_ happen before we call free_task(), i.e. before we jump
 	 * to any of the bad_fork_* labels. This is to avoid freeing
diff --git a/kernel/sys.c b/kernel/sys.c
index ad69218..8fbb698 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2242,6 +2242,16 @@ static int propagate_has_child_subreaper(struct task_struct *p, void *data)
 	return 1;
 }
 
+int __weak arch_prctl_set_rds(struct task_struct *p, unsigned long arg)
+{
+	return 0;
+}
+
+int __weak arch_prctl_get_rds(struct task_struct *p)
+{
+	return 0;
+}
+
 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		unsigned long, arg4, unsigned long, arg5)
 {
@@ -2450,6 +2460,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_SVE_GET_VL:
 		error = SVE_GET_VL();
 		break;
+	case PR_SET_RDS_MODE:
+		error = arch_prctl_set_rds(me, arg2);
+		break;
+	case PR_GET_RDS_MODE:
+		error = arch_prctl_get_rds(me);
+		break;
 	default:
 		error = -EINVAL;
 		break;
-- 
2.9.4


^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-26  0:11 [MODERATED] [PATCH v2] Linux Patch 1/1 Tim Chen
@ 2018-04-26  3:15 ` Jon Masters
  2018-04-26 17:03   ` Tim Chen
  2018-04-26  3:29 ` Jon Masters
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 46+ messages in thread
From: Jon Masters @ 2018-04-26  3:15 UTC (permalink / raw)
  To: speck

[-- Attachment #1: Type: text/plain, Size: 806 bytes --]

On 04/25/2018 08:11 PM, speck for Tim Chen wrote:

> Processes that are unable to use other software mitigations and execute
> untrusted code may want to disable speculative store bypass. This patch
> adds a new prctl : PR_SET_RDS_MODE.

Let's not use the Intel name for this. It should be neutral since it
will be used across architectures. PR_SET_SSB_MODE could be ok.

I personally favor a PR_SET_SPECULATION or similar first argument
(seeking input from Linus here) that could be used to set flags for this
and any future additional limitations that might need to be added. I
can't see how this will be the first and only time. Another way around
could be PR_SET_VULNERABILITY but "speculation" seems nicer to me.

Jon.

-- 
Computer Architect | Sent from my Fedora powered laptop


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-26  0:11 [MODERATED] [PATCH v2] Linux Patch 1/1 Tim Chen
  2018-04-26  3:15 ` [MODERATED] " Jon Masters
@ 2018-04-26  3:29 ` Jon Masters
  2018-04-26 12:03   ` Thomas Gleixner
  2018-04-26 16:18 ` Jon Masters
  2018-04-27 16:08 ` Thomas Gleixner
  3 siblings, 1 reply; 46+ messages in thread
From: Jon Masters @ 2018-04-26  3:29 UTC (permalink / raw)
  To: speck

[-- Attachment #1: Type: text/plain, Size: 1508 bytes --]

One general question beyond the below: Should we define a new auxvec for
processes to be told about speculation restrictions that they might be
operating under, in case those are inherited, say across a fork?

Comments on the rest of the patch...

On 04/25/2018 08:11 PM, speck for Tim Chen wrote:

> +/* cpu's MSR_IA32_SPEC_CTRL state for speculation store bypass mitigation */
> +DECLARE_PER_CPU(int, rds_msr_val);

We do similar in RHEL today for IBRS, for similar performance reasons.
Ultimately you /might/ want this to include additional bits for future
mitigations that are toggled on top of the x86_spec_ctrl_base and thus
call it something a bit more generic, but that's a nit for now.

Also, you're using an int here for the rds bit (which is fair enough)
but the actual MSR is a u64. It might be worth a comment [ I've had to
sanitize some of the previous IBRS assembly routines we've got in RHEL
that were explicitly zeroing the high order bits during the MSR write to
make sure we handle preserving those in case of future mitigations. ]

> +#define TIF_RDS			5	/* Reduced data speculation */

In the threadinfo for x86 call it whatever, but previous comment applies
overall on naming.

Otherwise it looks ok. I prefer changing the name to something non-x86,
and would like to know whether Linus would rather have a generic set of
prctl knobs for speculation controls under one common name.

Jon.

-- 
Computer Architect | Sent from my Fedora powered laptop


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v2] Linux Patch 1/1
  2018-04-26  3:29 ` Jon Masters
@ 2018-04-26 12:03   ` Thomas Gleixner
  2018-04-26 16:48     ` [MODERATED] " Tim Chen
  0 siblings, 1 reply; 46+ messages in thread
From: Thomas Gleixner @ 2018-04-26 12:03 UTC (permalink / raw)
  To: speck

On Wed, 25 Apr 2018, speck for Jon Masters wrote:

> One general question beyond the below: Should we define a new auxvec for
> processes to be told about speculation restrictions that they might be
> operating under, in case those are inherited, say across a fork?
> 
> Comments on the rest of the patch...
> 
> On 04/25/2018 08:11 PM, speck for Tim Chen wrote:
> 
> > +/* cpu's MSR_IA32_SPEC_CTRL state for speculation store bypass mitigation */
> > +DECLARE_PER_CPU(int, rds_msr_val);
> 
> We do similar in RHEL today for IBRS, for similar performance reasons.
> Ultimately you /might/ want this to include additional bits for future
> mitigations that are toggled on top of the x86_spec_ctrl_base and thus
> call it something a bit more generic, but that's a nit for now.
> 
> Also, you're using an int here for the rds bit (which is fair enough)

No. It's not fair enough.

Variables which store information which is read or written to registers
should have an explicit data type which matches the register. Everything
else with and without a comment is just sloppy hackery and utter crap.

It's fine to use u32 if you are sure that this is the lower part of the
MSR, but then the variable name should reflect it, but int and a name which
suggest that it's the actual MSR value is just not going to fly.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-26  0:11 [MODERATED] [PATCH v2] Linux Patch 1/1 Tim Chen
  2018-04-26  3:15 ` [MODERATED] " Jon Masters
  2018-04-26  3:29 ` Jon Masters
@ 2018-04-26 16:18 ` Jon Masters
  2018-04-27 16:08 ` Thomas Gleixner
  3 siblings, 0 replies; 46+ messages in thread
From: Jon Masters @ 2018-04-26 16:18 UTC (permalink / raw)
  To: speck

[-- Attachment #1: Type: text/plain, Size: 1377 bytes --]

On 04/25/2018 08:11 PM, speck for Tim Chen wrote:
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index 4b100fe..c8c5cc4 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -54,6 +54,7 @@
>  #include <asm/vdso.h>
>  #include <asm/intel_rdt_sched.h>
>  #include <asm/unistd.h>
> +#include <asm/nospec-branch.h>
>  #ifdef CONFIG_IA32_EMULATION
>  /* Not included via unistd.h */
>  #include <asm/unistd_32_ia32.h>
> @@ -529,6 +530,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
>  	/* Load the Intel cache allocation PQR MSR. */
>  	intel_rdt_sched_in();
>  
> +	if (test_tsk_thread_flag(next_p, TIF_RDS))
> +		/*
> +		 * Reduce data speculation for processes that
> +		 * have reduced data speculation option turned
> +		 * on via prctl.  This provides protection against speculation
> +		 * store bypass attack for untrusted code within the process.
> +		 */
> +		enable_speculative_store_bypass_mitigation();
> +	else
> +		disable_speculative_store_bypass_mitigation();
> +
>  	return prev_p;
>  }

It's worth noting that if either thread sets RDS, effectively both my
get it under the hood (depending upon implementation). This will have a
subtle but minor performance impact.

Jon.

-- 
Computer Architect | Sent from my Fedora powered laptop


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-26 12:03   ` Thomas Gleixner
@ 2018-04-26 16:48     ` Tim Chen
  0 siblings, 0 replies; 46+ messages in thread
From: Tim Chen @ 2018-04-26 16:48 UTC (permalink / raw)
  To: speck


[-- Attachment #1.1: Type: text/plain, Size: 1523 bytes --]

On 04/26/2018 05:03 AM, speck for Thomas Gleixner wrote:
> On Wed, 25 Apr 2018, speck for Jon Masters wrote:
> 
>> One general question beyond the below: Should we define a new auxvec for
>> processes to be told about speculation restrictions that they might be
>> operating under, in case those are inherited, say across a fork?
>>
>> Comments on the rest of the patch...
>>
>> On 04/25/2018 08:11 PM, speck for Tim Chen wrote:
>>
>>> +/* cpu's MSR_IA32_SPEC_CTRL state for speculation store bypass mitigation */
>>> +DECLARE_PER_CPU(int, rds_msr_val);
>>
>> We do similar in RHEL today for IBRS, for similar performance reasons.
>> Ultimately you /might/ want this to include additional bits for future
>> mitigations that are toggled on top of the x86_spec_ctrl_base and thus
>> call it something a bit more generic, but that's a nit for now.
>>
>> Also, you're using an int here for the rds bit (which is fair enough)
> 
> No. It's not fair enough.
> 
> Variables which store information which is read or written to registers
> should have an explicit data type which matches the register. Everything
> else with and without a comment is just sloppy hackery and utter crap.
> 
> It's fine to use u32 if you are sure that this is the lower part of the
> MSR, but then the variable name should reflect it, but int and a name which
> suggest that it's the actual MSR value is just not going to fly.

It is an oversight on my part.  Will fix this in next version of patch.

Thanks.

Tim

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-26  3:15 ` [MODERATED] " Jon Masters
@ 2018-04-26 17:03   ` Tim Chen
  0 siblings, 0 replies; 46+ messages in thread
From: Tim Chen @ 2018-04-26 17:03 UTC (permalink / raw)
  To: speck


[-- Attachment #1.1: Type: text/plain, Size: 893 bytes --]

On 04/25/2018 08:15 PM, speck for Jon Masters wrote:
> On 04/25/2018 08:11 PM, speck for Tim Chen wrote:
> 
>> Processes that are unable to use other software mitigations and execute
>> untrusted code may want to disable speculative store bypass. This patch
>> adds a new prctl : PR_SET_RDS_MODE.
> 
> Let's not use the Intel name for this. It should be neutral since it
> will be used across architectures. PR_SET_SSB_MODE could be ok.
> 
> I personally favor a PR_SET_SPECULATION or similar first argument
> (seeking input from Linus here) that could be used to set flags for this
> and any future additional limitations that might need to be added. I
> can't see how this will be the first and only time. Another way around
> could be PR_SET_VULNERABILITY but "speculation" seems nicer to me.
> 

How about PR_SET_SPECULATION_RESTRICTION?  Linus, any suggestion?

Tim


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v2] Linux Patch 1/1
  2018-04-26  0:11 [MODERATED] [PATCH v2] Linux Patch 1/1 Tim Chen
                   ` (2 preceding siblings ...)
  2018-04-26 16:18 ` Jon Masters
@ 2018-04-27 16:08 ` Thomas Gleixner
  2018-04-27 16:47   ` [MODERATED] " Borislav Petkov
                     ` (2 more replies)
  3 siblings, 3 replies; 46+ messages in thread
From: Thomas Gleixner @ 2018-04-27 16:08 UTC (permalink / raw)
  To: speck

On Wed, 25 Apr 2018, speck for Tim Chen wrote:
> +/* dynamic reduced data speculation in use */
> +DEFINE_STATIC_KEY_FALSE(specctrl_rds);
> +EXPORT_SYMBOL(specctrl_rds);

Why needs this to be exported?

> +/* current MSR_IA32_SPEC_CTRL state for ssb mitigation */
> +DEFINE_PER_CPU(int, rds_msr_val);
> +EXPORT_SYMBOL(rds_msr_val);

Ditto

>  /* The kernel command line selection */
>  enum ssb_mitigation_cmd {
>  	SPEC_STORE_BYPASS_CMD_NONE,
>  	SPEC_STORE_BYPASS_CMD_AUTO,
>  	SPEC_STORE_BYPASS_CMD_ON,
> +	SPEC_STORE_BYPASS_CMD_PRCTL,
>  };
>  
>  static const char *ssb_strings[] = {
>  	[SPEC_STORE_BYPASS_NONE]	= "Vulnerable",
> -	[SPEC_STORE_BYPASS_DISABLE]	= "Mitigation: Speculative Store Bypass disabled"
> +	[SPEC_STORE_BYPASS_DISABLE]	= "Mitigation: Speculative Store Bypass disabled",
> +	[SPEC_STORE_BYPASS_PRCTL]	= "Mitigation: Speculative Store Bypass disabled via prctl"
>  };
>  
>  static const struct {
> @@ -395,8 +408,61 @@ static const struct {
>  	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
>  	{ "on",		SPEC_STORE_BYPASS_CMD_ON },   /* Disable Speculative Store Bypass */
>  	{ "off",	SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
> +	{ "prctl",	SPEC_STORE_BYPASS_CMD_PRCTL }, /* disable Speculative Store Bypass via prctl */
>  };
>  
> +void enable_speculative_store_bypass_mitigation(void)

The declaration of those functions says:

> +/*
> + * speculation store bypass vulnerabilities mitigation functions.
> + *
> + * Should be called only from pre-emption off paths.
> + *
> + */

Can you please explain how the PRCTL code fulfils the preempt off
prerequisite?

It does not, which clearly shows that you never tested that code with the
full set of debug stuff enabled, because otherwise the preempt check in
__this_cpu_read/write would have triggered.

> +{
> +       if (static_branch_unlikely(&specctrl_rds) &&
> +           /* need to toggle msr? */
> +           __this_cpu_read(rds_msr_val) != SPEC_CTRL_RDS) {

Bah. This is horrible to read, really.

	if (static_branch_unlikely(&specctrl_rds)) {
		if (__this_cpu_read(rds_msr_val) != SPEC_CTRL_RDS) {
 			wrmsrl(MSR_IA32_SPEC_CTRL, SPEC_CTRL_RDS | x86_spec_ctrl_base);
	               __this_cpu_write(rds_msr_val, SPEC_CTRL_RDS);
		}
	}

But see below.

> +int arch_prctl_set_rds(struct task_struct *p, unsigned long val)
> +{
> +	if (val == PR_RDS_MODE_ENABLE) {
> +		set_tsk_thread_flag(p, TIF_RDS);
> +		if (p == current)
> +			enable_speculative_store_bypass_mitigation();
> +	} else if (val == PR_RDS_MODE_DISABLE) {
> +		clear_tsk_thread_flag(p, TIF_RDS);
> +		if (p == current)
> +			disable_speculative_store_bypass_mitigation();

In an earlier discussion it was said, that the prtcl - if at all - is a one
way operation, i.e. set it end be done with it. Why is this different again?

And allowing to disable it once it is set is broken, really.

> +	} else
> +		return -EINVAL;

Missing parentheses. Dammit, why do I have say that over and over?

> +
> +	return 0;
> +}
> +
> +int arch_prctl_get_rds(struct task_struct *p)
> +{
> +	if (test_tsk_thread_flag(p, TIF_RDS))
> +		return PR_RDS_MODE_ENABLE;
> +
> +	return PR_RDS_MODE_DISABLE;
> +}
> +
> +void arch_prctl_copy_rds_mode(struct task_struct *parent, struct task_struct *child)

What the heck has this to do with prctl? Its about TIF_RDS where ever that
flag has been set from.

> +{
> +	if (test_tsk_thread_flag(parent, TIF_RDS))
> +		set_tsk_thread_flag(child, TIF_RDS);
> +	else
> +		clear_tsk_thread_flag(child, TIF_RDS);
> +}
> +
>  static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
>  {
>  	char arg[20];
> @@ -445,6 +511,7 @@ static void __init ssb_select_mitigation(void)
>  	     cmd == SPEC_STORE_BYPASS_CMD_AUTO))
>  		return;
>  
> +again:
>  	switch (cmd) {
>  	case SPEC_STORE_BYPASS_CMD_AUTO:
>  		/*
> @@ -452,9 +519,17 @@ static void __init ssb_select_mitigation(void)
>  		 */
>  		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
>  			break;
> +		/* Choose prctl as the default mode */
> +		cmd = SPEC_STORE_BYPASS_CMD_PRCTL;
> +		goto again;
> +		break;
 
WTF? This is just crap. The goto is not required at all if done right.

>  	case SPEC_STORE_BYPASS_CMD_ON:

> @@ -529,6 +530,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
>  	/* Load the Intel cache allocation PQR MSR. */
>  	intel_rdt_sched_in();
>  
> +	if (test_tsk_thread_flag(next_p, TIF_RDS))
> +		/*
> +		 * Reduce data speculation for processes that
> +		 * have reduced data speculation option turned
> +		 * on via prctl.  This provides protection against speculation
> +		 * store bypass attack for untrusted code within the process.
> +		 */
> +		enable_speculative_store_bypass_mitigation();
> +	else
> +		disable_speculative_store_bypass_mitigation();

So this does an unconditional function call on every context switch
independent of whether the prctl mode is enabled or not. Crap, really.

What the heck is wrong with putting the TIF flag into _TIF_WORK_CTXSW and
put this into __switch_to_extra() and do:

	if ((tifp ^ tifn) & _TIF_RDS)
		speculative_bypass_update(tifn);

and that would be:

static inline void speculative_bypass_update(unsigned long tifn)
{
	u64 msr = x86_spec_ctrl_base | ((tifn & _TIF_RDS) << TIF_TO_RDS_SHIFT);

        wrmsrl(MSR_IA32_SPEC_CTRL, msr);
}

Which would get rid of the static key because the only place where the
mitigation mode and therefore the PRCTL enablement needs to be checked is
the prctl itself.

int arch_prctl_set_rds(struct task_struct *p, unsigned long val)
{
	if (!spec_store_bypass_prctl)
		return -ENOTSUPP;
		
	if (val != PR_RDS_MODE_ENABLE)
		return -EINVAL;

	set_tsk_thread_flag(p, TIF_RDS);
	if (p == current) {
		preempt_disable();
		speculative_bypass_update(task_thread_info(p)->flags);
		preempt_enable();
	}
	return 0;
}	

See? No static key in the context switch path and no unconditional function
calls, a single conditional which is evaluated already in switch_to.

If RDS is never set then speculative_bypass_update() is never ever
invoked. If it needs to be toggled, then the MSR write is worse than the
extra hoop through __switch_to_extra(). It also spares the per cpu state
variable for this RDS bit.

That would be too simple and too performant and would magically make this
work for 32bit as well, right? It even would make the seccomp magic simple
because all it has to do is to set TIF_RDS on the thread once and not check
for seccomp on every fricking context switch.

And most important it has absolutely ZERO impact if the prctl is not
enabled.

That said, the above might be acceptable when Linus does not veto the whole
thing in general.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 16:08 ` Thomas Gleixner
@ 2018-04-27 16:47   ` Borislav Petkov
  2018-04-27 17:13     ` Jon Masters
  2018-04-27 17:58   ` Thomas Gleixner
  2018-04-27 19:01   ` [MODERATED] " Tim Chen
  2 siblings, 1 reply; 46+ messages in thread
From: Borislav Petkov @ 2018-04-27 16:47 UTC (permalink / raw)
  To: speck

On Fri, Apr 27, 2018 at 06:08:26PM +0200, speck for Thomas Gleixner wrote:
> That said, the above might be acceptable when Linus does not veto the whole
> thing in general.

Also, in another discussion we had, if *not* the task itself would call
prctl() but root or admin would be able to do set_tsk_thread_flag(p,
TIF_RDS) once, I think we'll be better off wrt to breaking the task and
tricking it into calling prctl() aspect.

I.e., do the marking from the outside and not have a prctl at all.

Regardless, the whole deal still feels like putting lipstick on a pig to
me.

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 16:47   ` [MODERATED] " Borislav Petkov
@ 2018-04-27 17:13     ` Jon Masters
  2018-04-27 17:32       ` Borislav Petkov
  0 siblings, 1 reply; 46+ messages in thread
From: Jon Masters @ 2018-04-27 17:13 UTC (permalink / raw)
  To: speck

[-- Attachment #1: Type: text/plain, Size: 1206 bytes --]

On 04/27/2018 12:47 PM, speck for Borislav Petkov wrote:
> On Fri, Apr 27, 2018 at 06:08:26PM +0200, speck for Thomas Gleixner wrote:
>> That said, the above might be acceptable when Linus does not veto the whole
>> thing in general.
> 
> Also, in another discussion we had, if *not* the task itself would call
> prctl() but root or admin would be able to do set_tsk_thread_flag(p,
> TIF_RDS) once, I think we'll be better off wrt to breaking the task and
> tricking it into calling prctl() aspect.
> 
> I.e., do the marking from the outside and not have a prctl at all.
> 
> Regardless, the whole deal still feels like putting lipstick on a pig to
> me.

The problem is that the performance hit is up to 30% for worst case
workloads. Typically it's a few percent, but clearly the silicon
companies have a reasonable ask for us to preserve default leaving SSB
enabled. The only way to do that is if we can easily disable it on a
per-task basis for vulnerable code.

My opinion is this needs to be a prctl that a vulnerable application can
set. It can be a one-way thing that's set/inherited by children, etc.

Jon.

-- 
Computer Architect | Sent from my Fedora powered laptop


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 17:13     ` Jon Masters
@ 2018-04-27 17:32       ` Borislav Petkov
  2018-04-27 17:48         ` Thomas Gleixner
  2018-04-27 17:50         ` [MODERATED] " Andi Kleen
  0 siblings, 2 replies; 46+ messages in thread
From: Borislav Petkov @ 2018-04-27 17:32 UTC (permalink / raw)
  To: speck

On Fri, Apr 27, 2018 at 01:13:03PM -0400, speck for Jon Masters wrote:
> My opinion is this needs to be a prctl that a vulnerable application can
> set. It can be a one-way thing that's set/inherited by children, etc.

If the task does prctl, it *must* at least be a one-way thing because,
as jikos said, if a malicious attacker can trick it into calling prctl()
again to turn MD back on, the whole dance we're doing is useless.

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 17:32       ` Borislav Petkov
@ 2018-04-27 17:48         ` Thomas Gleixner
  2018-04-27 18:09           ` [MODERATED] " Andi Kleen
  2018-04-27 17:50         ` [MODERATED] " Andi Kleen
  1 sibling, 1 reply; 46+ messages in thread
From: Thomas Gleixner @ 2018-04-27 17:48 UTC (permalink / raw)
  To: speck

On Fri, 27 Apr 2018, speck for Borislav Petkov wrote:

> On Fri, Apr 27, 2018 at 01:13:03PM -0400, speck for Jon Masters wrote:
> > My opinion is this needs to be a prctl that a vulnerable application can
> > set. It can be a one-way thing that's set/inherited by children, etc.
> 
> If the task does prctl, it *must* at least be a one-way thing because,
> as jikos said, if a malicious attacker can trick it into calling prctl()
> again to turn MD back on, the whole dance we're doing is useless.

That's not the worst problem. The worst thing is that it's inherited on
fork and then the child can disable it again nilly-willy, which makes no
sense whatsoever. If it's set only then the child CANNOT do that.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 17:32       ` Borislav Petkov
  2018-04-27 17:48         ` Thomas Gleixner
@ 2018-04-27 17:50         ` Andi Kleen
  1 sibling, 0 replies; 46+ messages in thread
From: Andi Kleen @ 2018-04-27 17:50 UTC (permalink / raw)
  To: speck

On Fri, Apr 27, 2018 at 07:32:21PM +0200, speck for Borislav Petkov wrote:
> On Fri, Apr 27, 2018 at 01:13:03PM -0400, speck for Jon Masters wrote:
> > My opinion is this needs to be a prctl that a vulnerable application can
> > set. It can be a one-way thing that's set/inherited by children, etc.
> 
> If the task does prctl, it *must* at least be a one-way thing because,
> as jikos said, if a malicious attacker can trick it into calling prctl()
> again to turn MD back on, the whole dance we're doing is useless.

If you can trick someone to call prctl you can also trick them 
to execute other code. No need to use SSB and other complicated
side channel methods then. The game is already over.

-Andi

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 16:08 ` Thomas Gleixner
  2018-04-27 16:47   ` [MODERATED] " Borislav Petkov
@ 2018-04-27 17:58   ` Thomas Gleixner
  2018-04-27 19:01   ` [MODERATED] " Tim Chen
  2 siblings, 0 replies; 46+ messages in thread
From: Thomas Gleixner @ 2018-04-27 17:58 UTC (permalink / raw)
  To: speck

On Fri, 27 Apr 2018, speck for Thomas Gleixner wrote:
> On Wed, 25 Apr 2018, speck for Tim Chen wrote:
> > +
> > +void arch_prctl_copy_rds_mode(struct task_struct *parent, struct task_struct *child)
> 
> What the heck has this to do with prctl? Its about TIF_RDS where ever that
> flag has been set from.
> 
> > +{
> > +	if (test_tsk_thread_flag(parent, TIF_RDS))
> > +		set_tsk_thread_flag(child, TIF_RDS);
> > +	else
> > +		clear_tsk_thread_flag(child, TIF_RDS);
> > +}

And just for the record. Why do you need that function at all? What's wrong
with using arch_dup_task_struct() for this? Right, again it would be too
obvious to do that.

Oh well

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 17:48         ` Thomas Gleixner
@ 2018-04-27 18:09           ` Andi Kleen
  2018-04-27 18:17             ` Thomas Gleixner
  0 siblings, 1 reply; 46+ messages in thread
From: Andi Kleen @ 2018-04-27 18:09 UTC (permalink / raw)
  To: speck

On Fri, Apr 27, 2018 at 07:48:03PM +0200, speck for Thomas Gleixner wrote:
> On Fri, 27 Apr 2018, speck for Borislav Petkov wrote:
> 
> > On Fri, Apr 27, 2018 at 01:13:03PM -0400, speck for Jon Masters wrote:
> > > My opinion is this needs to be a prctl that a vulnerable application can
> > > set. It can be a one-way thing that's set/inherited by children, etc.
> > 
> > If the task does prctl, it *must* at least be a one-way thing because,
> > as jikos said, if a malicious attacker can trick it into calling prctl()
> > again to turn MD back on, the whole dance we're doing is useless.
> 
> That's not the worst problem. The worst thing is that it's inherited on
> fork and then the child can disable it again nilly-willy, which makes no
> sense whatsoever. If it's set only then the child CANNOT do that.

If the child can execute code it itself why shouldn't it be able
to clear this bit? After all it can do everything else to itself too.

It would be unsafe if it could disable it on someone else,
but of course it can't do that.

-Andi

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 18:09           ` [MODERATED] " Andi Kleen
@ 2018-04-27 18:17             ` Thomas Gleixner
  2018-04-27 18:20               ` Thomas Gleixner
  2018-04-27 18:45               ` [MODERATED] " Andi Kleen
  0 siblings, 2 replies; 46+ messages in thread
From: Thomas Gleixner @ 2018-04-27 18:17 UTC (permalink / raw)
  To: speck

On Fri, 27 Apr 2018, speck for Andi Kleen wrote:
> On Fri, Apr 27, 2018 at 07:48:03PM +0200, speck for Thomas Gleixner wrote:
> > On Fri, 27 Apr 2018, speck for Borislav Petkov wrote:
> > 
> > > On Fri, Apr 27, 2018 at 01:13:03PM -0400, speck for Jon Masters wrote:
> > > > My opinion is this needs to be a prctl that a vulnerable application can
> > > > set. It can be a one-way thing that's set/inherited by children, etc.
> > > 
> > > If the task does prctl, it *must* at least be a one-way thing because,
> > > as jikos said, if a malicious attacker can trick it into calling prctl()
> > > again to turn MD back on, the whole dance we're doing is useless.
> > 
> > That's not the worst problem. The worst thing is that it's inherited on
> > fork and then the child can disable it again nilly-willy, which makes no
> > sense whatsoever. If it's set only then the child CANNOT do that.
> 
> If the child can execute code it itself why shouldn't it be able
> to clear this bit? After all it can do everything else to itself too.

Sure. You set it on that sandbox thing and then the thread which is spawned
of from there can disable it. Brilliant idea.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 18:17             ` Thomas Gleixner
@ 2018-04-27 18:20               ` Thomas Gleixner
  2018-04-27 18:30                 ` [MODERATED] " Linus Torvalds
  2018-04-27 18:45               ` [MODERATED] " Andi Kleen
  1 sibling, 1 reply; 46+ messages in thread
From: Thomas Gleixner @ 2018-04-27 18:20 UTC (permalink / raw)
  To: speck

On Fri, 27 Apr 2018, speck for Thomas Gleixner wrote:
> On Fri, 27 Apr 2018, speck for Andi Kleen wrote:
> > On Fri, Apr 27, 2018 at 07:48:03PM +0200, speck for Thomas Gleixner wrote:
> > > On Fri, 27 Apr 2018, speck for Borislav Petkov wrote:
> > > 
> > > > On Fri, Apr 27, 2018 at 01:13:03PM -0400, speck for Jon Masters wrote:
> > > > > My opinion is this needs to be a prctl that a vulnerable application can
> > > > > set. It can be a one-way thing that's set/inherited by children, etc.
> > > > 
> > > > If the task does prctl, it *must* at least be a one-way thing because,
> > > > as jikos said, if a malicious attacker can trick it into calling prctl()
> > > > again to turn MD back on, the whole dance we're doing is useless.
> > > 
> > > That's not the worst problem. The worst thing is that it's inherited on
> > > fork and then the child can disable it again nilly-willy, which makes no
> > > sense whatsoever. If it's set only then the child CANNOT do that.
> > 
> > If the child can execute code it itself why shouldn't it be able
> > to clear this bit? After all it can do everything else to itself too.
> 
> Sure. You set it on that sandbox thing and then the thread which is spawned
> of from there can disable it. Brilliant idea.

And in fact you want it even inherit on exec because then you can start the
JVM or whatever you want to protect with it disabled and never have to
worry about it again.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 18:20               ` Thomas Gleixner
@ 2018-04-27 18:30                 ` Linus Torvalds
  2018-04-27 18:36                   ` Thomas Gleixner
  2018-04-27 19:26                   ` [MODERATED] " Tim Chen
  0 siblings, 2 replies; 46+ messages in thread
From: Linus Torvalds @ 2018-04-27 18:30 UTC (permalink / raw)
  To: speck



On Fri, 27 Apr 2018, speck for Thomas Gleixner wrote:
> On Fri, 27 Apr 2018, speck for Thomas Gleixner wrote:
> > 
> > Sure. You set it on that sandbox thing and then the thread which is spawned
> > of from there can disable it. Brilliant idea.
> 
> And in fact you want it even inherit on exec because then you can start the
> JVM or whatever you want to protect with it disabled and never have to
> worry about it again.

I don't think that's the attack people are worried about.

Basically, for the store buffer bypass, the *only* worry is JIT'ed code. 
I don't think people expect it to leak from supervisor to user mode, for 
example, so it's not primarily a protection domain issue.

It's almost purely a "I generated code assuming the architecture would 
actualy execute the code I wrote" issue.

That means that it's not like you're really executing "untrusted" code in 
general. Your jitted code isn't going to just run random sysctl's without 
any checking or anything like that. You trust your JVM to take care of 
*those* kinds of security issues.

So "user can turn it on and off as they please" is not really an issue. In 
fact, it could easily be seen as a feature. Making it expensive or hard to 
turn off the mitigation means that you can't necessarily just turn it on 
temporarily for the code you really care about.

Realistically, a JIT would probably prefer to only turn off store buffer 
bypass when jumping into JIT'ed code, and then turning it on when coming 
back. Exactly because it's really not about any "security", it's abnout "I 
translated untrusted code, and I want _exactly_ the semantics that I did 
the translations with - everything else I can and do check manually".

               Linus

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 18:30                 ` [MODERATED] " Linus Torvalds
@ 2018-04-27 18:36                   ` Thomas Gleixner
  2018-04-27 18:53                     ` [MODERATED] " Linus Torvalds
  2018-04-27 19:26                   ` [MODERATED] " Tim Chen
  1 sibling, 1 reply; 46+ messages in thread
From: Thomas Gleixner @ 2018-04-27 18:36 UTC (permalink / raw)
  To: speck

On Fri, 27 Apr 2018, speck for Linus Torvalds wrote:
> On Fri, 27 Apr 2018, speck for Thomas Gleixner wrote:
> > On Fri, 27 Apr 2018, speck for Thomas Gleixner wrote:
> > > 
> > > Sure. You set it on that sandbox thing and then the thread which is spawned
> > > of from there can disable it. Brilliant idea.
> > 
> > And in fact you want it even inherit on exec because then you can start the
> > JVM or whatever you want to protect with it disabled and never have to
> > worry about it again.
> 
> I don't think that's the attack people are worried about.
> 
> Basically, for the store buffer bypass, the *only* worry is JIT'ed code. 
> I don't think people expect it to leak from supervisor to user mode, for 
> example, so it's not primarily a protection domain issue.
> 
> It's almost purely a "I generated code assuming the architecture would 
> actualy execute the code I wrote" issue.
> 
> That means that it's not like you're really executing "untrusted" code in 
> general. Your jitted code isn't going to just run random sysctl's without 
> any checking or anything like that. You trust your JVM to take care of 
> *those* kinds of security issues.
> 
> So "user can turn it on and off as they please" is not really an issue. In 
> fact, it could easily be seen as a feature. Making it expensive or hard to 
> turn off the mitigation means that you can't necessarily just turn it on 
> temporarily for the code you really care about.
> 
> Realistically, a JIT would probably prefer to only turn off store buffer 
> bypass when jumping into JIT'ed code, and then turning it on when coming 
> back. Exactly because it's really not about any "security", it's abnout "I 
> translated untrusted code, and I want _exactly_ the semantics that I did 
> the translations with - everything else I can and do check manually".

Fair enough. I didn't look at it that way. Thanks for the clarification.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 18:17             ` Thomas Gleixner
  2018-04-27 18:20               ` Thomas Gleixner
@ 2018-04-27 18:45               ` Andi Kleen
  2018-04-27 19:08                 ` Thomas Gleixner
  1 sibling, 1 reply; 46+ messages in thread
From: Andi Kleen @ 2018-04-27 18:45 UTC (permalink / raw)
  To: speck

> Sure. You set it on that sandbox thing and then the thread which is spawned
> of from there can disable it. Brilliant idea.

It's a TIF flag, not a mm flag.

It can only disable it for itself, not for other threads.

-Andi

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 18:36                   ` Thomas Gleixner
@ 2018-04-27 18:53                     ` Linus Torvalds
  2018-04-27 19:14                       ` Andi Kleen
  2018-04-27 19:37                       ` Jon Masters
  0 siblings, 2 replies; 46+ messages in thread
From: Linus Torvalds @ 2018-04-27 18:53 UTC (permalink / raw)
  To: speck



On Fri, 27 Apr 2018, speck for Thomas Gleixner wrote:

> On Fri, 27 Apr 2018, speck for Linus Torvalds wrote:

> > Basically, for the store buffer bypass, the *only* worry is JIT'ed code. 
> > I don't think people expect it to leak from supervisor to user mode, for 
> > example, so it's not primarily a protection domain issue.
> 
> Fair enough. I didn't look at it that way. Thanks for the clarification.

NOTE! My reading of the worry people have about the store buffer bypass 
attack may simply be wrong. So somebody should verify this part.

Because if the store buffer bypass is actually seen as a security issue on 
its own, then your argument that people shouldn't be able to turn the 
mitigation off is obviously very valid.

But I *think* that the store buffer bypass is a non-issue if you actually 
control the whole binary, because then you could just have written your 
code to not do the store that hid the old value in the first place.

So I think this one is fundamentally different from say Spectre/Meltdown, 
that actually worked across security domains.

That also argues that perhaps we could possible leave the store buffer 
bypass mitigation off by default, if we could just detect the "people are 
JIT'ing code" situation. Which we can't, afaik, but..

Anybody?

              Linus

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 16:08 ` Thomas Gleixner
  2018-04-27 16:47   ` [MODERATED] " Borislav Petkov
  2018-04-27 17:58   ` Thomas Gleixner
@ 2018-04-27 19:01   ` Tim Chen
  2018-04-27 19:19     ` Thomas Gleixner
  2 siblings, 1 reply; 46+ messages in thread
From: Tim Chen @ 2018-04-27 19:01 UTC (permalink / raw)
  To: speck


[-- Attachment #1.1: Type: text/plain, Size: 8051 bytes --]

On 04/27/2018 09:08 AM, speck for Thomas Gleixner wrote:
> On Wed, 25 Apr 2018, speck for Tim Chen wrote:
>> +/* dynamic reduced data speculation in use */
>> +DEFINE_STATIC_KEY_FALSE(specctrl_rds);
>> +EXPORT_SYMBOL(specctrl_rds);
> 
> Why needs this to be exported?

Artifact from earlier version that uses this in
a module that's no longer in use now.  Will remove.

> 
>> +/* current MSR_IA32_SPEC_CTRL state for ssb mitigation */
>> +DEFINE_PER_CPU(int, rds_msr_val);
>> +EXPORT_SYMBOL(rds_msr_val);
> 
> Ditto

Will remove.

> 
>>  /* The kernel command line selection */
>>  enum ssb_mitigation_cmd {
>>  	SPEC_STORE_BYPASS_CMD_NONE,
>>  	SPEC_STORE_BYPASS_CMD_AUTO,
>>  	SPEC_STORE_BYPASS_CMD_ON,
>> +	SPEC_STORE_BYPASS_CMD_PRCTL,
>>  };
>>  
>>  static const char *ssb_strings[] = {
>>  	[SPEC_STORE_BYPASS_NONE]	= "Vulnerable",
>> -	[SPEC_STORE_BYPASS_DISABLE]	= "Mitigation: Speculative Store Bypass disabled"
>> +	[SPEC_STORE_BYPASS_DISABLE]	= "Mitigation: Speculative Store Bypass disabled",
>> +	[SPEC_STORE_BYPASS_PRCTL]	= "Mitigation: Speculative Store Bypass disabled via prctl"
>>  };
>>  
>>  static const struct {
>> @@ -395,8 +408,61 @@ static const struct {
>>  	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
>>  	{ "on",		SPEC_STORE_BYPASS_CMD_ON },   /* Disable Speculative Store Bypass */
>>  	{ "off",	SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
>> +	{ "prctl",	SPEC_STORE_BYPASS_CMD_PRCTL }, /* disable Speculative Store Bypass via prctl */
>>  };
>>  
>> +void enable_speculative_store_bypass_mitigation(void)
> 
> The declaration of those functions says:
> 
>> +/*
>> + * speculation store bypass vulnerabilities mitigation functions.
>> + *
>> + * Should be called only from pre-emption off paths.
>> + *
>> + */
> 
> Can you please explain how the PRCTL code fulfils the preempt off
> prerequisite?

Ok. Will add that in next version.

> 
> It does not, which clearly shows that you never tested that code with the
> full set of debug stuff enabled, because otherwise the preempt check in
> __this_cpu_read/write would have triggered.
> 
>> +{
>> +       if (static_branch_unlikely(&specctrl_rds) &&
>> +           /* need to toggle msr? */
>> +           __this_cpu_read(rds_msr_val) != SPEC_CTRL_RDS) {
> 
> Bah. This is horrible to read, really.
> 
> 	if (static_branch_unlikely(&specctrl_rds)) {
> 		if (__this_cpu_read(rds_msr_val) != SPEC_CTRL_RDS) {
>  			wrmsrl(MSR_IA32_SPEC_CTRL, SPEC_CTRL_RDS | x86_spec_ctrl_base);
> 	               __this_cpu_write(rds_msr_val, SPEC_CTRL_RDS);
> 		}
> 	}
> 
> But see below.
> 
>> +int arch_prctl_set_rds(struct task_struct *p, unsigned long val)
>> +{
>> +	if (val == PR_RDS_MODE_ENABLE) {
>> +		set_tsk_thread_flag(p, TIF_RDS);
>> +		if (p == current)
>> +			enable_speculative_store_bypass_mitigation();
>> +	} else if (val == PR_RDS_MODE_DISABLE) {
>> +		clear_tsk_thread_flag(p, TIF_RDS);
>> +		if (p == current)
>> +			disable_speculative_store_bypass_mitigation();
> 
> In an earlier discussion it was said, that the prtcl - if at all - is a one
> way operation, i.e. set it end be done with it. Why is this different again?
> 
> And allowing to disable it once it is set is broken, really.
> 

Will change it if the consensus is not to revert it once it is set.

>> +	} else
>> +		return -EINVAL;
> 
> Missing parentheses. Dammit, why do I have say that over and over?

Will fix

> 
>> +
>> +	return 0;
>> +}
>> +
>> +int arch_prctl_get_rds(struct task_struct *p)
>> +{
>> +	if (test_tsk_thread_flag(p, TIF_RDS))
>> +		return PR_RDS_MODE_ENABLE;
>> +
>> +	return PR_RDS_MODE_DISABLE;
>> +}
>> +
>> +void arch_prctl_copy_rds_mode(struct task_struct *parent, struct task_struct *child)
> 
> What the heck has this to do with prctl? Its about TIF_RDS where ever that
> flag has been set from.
> 
>> +{
>> +	if (test_tsk_thread_flag(parent, TIF_RDS))
>> +		set_tsk_thread_flag(child, TIF_RDS);
>> +	else
>> +		clear_tsk_thread_flag(child, TIF_RDS);
>> +}
>> +
>>  static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
>>  {
>>  	char arg[20];
>> @@ -445,6 +511,7 @@ static void __init ssb_select_mitigation(void)
>>  	     cmd == SPEC_STORE_BYPASS_CMD_AUTO))
>>  		return;
>>  
>> +again:
>>  	switch (cmd) {
>>  	case SPEC_STORE_BYPASS_CMD_AUTO:
>>  		/*
>> @@ -452,9 +519,17 @@ static void __init ssb_select_mitigation(void)
>>  		 */
>>  		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
>>  			break;
>> +		/* Choose prctl as the default mode */
>> +		cmd = SPEC_STORE_BYPASS_CMD_PRCTL;
>> +		goto again;
>> +		break;
>  
> WTF? This is just crap. The goto is not required at all if done right.
> 
>>  	case SPEC_STORE_BYPASS_CMD_ON:
> 
>> @@ -529,6 +530,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
>>  	/* Load the Intel cache allocation PQR MSR. */
>>  	intel_rdt_sched_in();
>>  
>> +	if (test_tsk_thread_flag(next_p, TIF_RDS))
>> +		/*
>> +		 * Reduce data speculation for processes that
>> +		 * have reduced data speculation option turned
>> +		 * on via prctl.  This provides protection against speculation
>> +		 * store bypass attack for untrusted code within the process.
>> +		 */
>> +		enable_speculative_store_bypass_mitigation();
>> +	else
>> +		disable_speculative_store_bypass_mitigation();
> 
> So this does an unconditional function call on every context switch
> independent of whether the prctl mode is enabled or not. Crap, really.
> 
> What the heck is wrong with putting the TIF flag into _TIF_WORK_CTXSW and
> put this into __switch_to_extra() and do:
> 
> 	if ((tifp ^ tifn) & _TIF_RDS)
> 		speculative_bypass_update(tifn);
> 
> and that would be:
> 
> static inline void speculative_bypass_update(unsigned long tifn)
> {
> 	u64 msr = x86_spec_ctrl_base | ((tifn & _TIF_RDS) << TIF_TO_RDS_SHIFT);
> 
>         wrmsrl(MSR_IA32_SPEC_CTRL, msr);
> }
> 
> Which would get rid of the static key because the only place where the
> mitigation mode and therefore the PRCTL enablement needs to be checked is
> the prctl itself.

Okay, I'll take a crack with this approach.  You have any suggestions
of dealing with AMD cpus which use a different msr (MSR_AMD64_LS_CFG)
and differnet bit depending on cpu model?

I'm hoping I don't have to check for vendor in this code every time.
I was earlier thinking of using static key to segregate the vendor
code paths. So if I put in a static_cpu_has(X86_FEATURE_AMD_RDS) kind of
check here, will it be acceptable?  Or is there already a static
key I can use to distinguish between intel and amd cpu? 

> 
> int arch_prctl_set_rds(struct task_struct *p, unsigned long val)
> {
> 	if (!spec_store_bypass_prctl)
> 		return -ENOTSUPP;
> 		
> 	if (val != PR_RDS_MODE_ENABLE)
> 		return -EINVAL;
> 
> 	set_tsk_thread_flag(p, TIF_RDS);
> 	if (p == current) {
> 		preempt_disable();
> 		speculative_bypass_update(task_thread_info(p)->flags);
> 		preempt_enable();
> 	}
> 	return 0;
> }	
> 
> See? No static key in the context switch path and no unconditional function
> calls, a single conditional which is evaluated already in switch_to.
> 
> If RDS is never set then speculative_bypass_update() is never ever
> invoked. If it needs to be toggled, then the MSR write is worse than the
> extra hoop through __switch_to_extra(). It also spares the per cpu state
> variable for this RDS bit.
> 
> That would be too simple and too performant and would magically make this
> work for 32bit as well, right? It even would make the seccomp magic simple
> because all it has to do is to set TIF_RDS on the thread once and not check
> for seccomp on every fricking context switch.
> 
> And most important it has absolutely ZERO impact if the prctl is not
> enabled.
> 
> That said, the above might be acceptable when Linus does not veto the whole
> thing in general.
> 
> Thanks,
> 
> 	tglx
> 


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 18:45               ` [MODERATED] " Andi Kleen
@ 2018-04-27 19:08                 ` Thomas Gleixner
  2018-04-27 19:25                   ` [MODERATED] " Andi Kleen
  0 siblings, 1 reply; 46+ messages in thread
From: Thomas Gleixner @ 2018-04-27 19:08 UTC (permalink / raw)
  To: speck

On Fri, 27 Apr 2018, speck for Andi Kleen wrote:

> > Sure. You set it on that sandbox thing and then the thread which is spawned
> > of from there can disable it. Brilliant idea.
> 
> It's a TIF flag, not a mm flag.

I'm well aware of that.

> It can only disable it for itself, not for other threads.

I know.

Still the point is that if you spawn a thread and you know that the thread
is going to run untrusted code, you want to make sure that MD is disabled
for that thread, right? So if that thread can reenable it, then you lost
control.

I might have completely misunderstood the whole thing, but with your terse
explanations I'm not going to understand it better.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 18:53                     ` [MODERATED] " Linus Torvalds
@ 2018-04-27 19:14                       ` Andi Kleen
  2018-04-27 19:37                       ` Jon Masters
  1 sibling, 0 replies; 46+ messages in thread
From: Andi Kleen @ 2018-04-27 19:14 UTC (permalink / raw)
  To: speck

> That also argues that perhaps we could possible leave the store buffer 
> bypass mitigation off by default, if we could just detect the "people are 
> JIT'ing code" situation. Which we can't, afaik, but..
> 
> Anybody?

That's what Tim's patches are doing essentially. Keep mitigation off by default
but allows processes to enable using prctl.

Originally we tried to use seccomp for detect JIT, but it turned out that
there can be other mitigations for seccomp too 
(e.g. browsers doing full site process separation so they don't care about
leaks anymore). So separate prctl makes more sense.

-andi

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 19:01   ` [MODERATED] " Tim Chen
@ 2018-04-27 19:19     ` Thomas Gleixner
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Gleixner @ 2018-04-27 19:19 UTC (permalink / raw)
  To: speck

On Fri, 27 Apr 2018, speck for Tim Chen wrote:
> On 04/27/2018 09:08 AM, speck for Thomas Gleixner wrote:
> > Which would get rid of the static key because the only place where the
> > mitigation mode and therefore the PRCTL enablement needs to be checked is
> > the prctl itself.
> 
> Okay, I'll take a crack with this approach.  You have any suggestions
> of dealing with AMD cpus which use a different msr (MSR_AMD64_LS_CFG)
> and differnet bit depending on cpu model?
> 
> I'm hoping I don't have to check for vendor in this code every time.

Now you care about performance? Excuse me, but that just makes me laugh.

The vendor check in the slow path is definitely way less horrible than a
extra conditional _and_ a potentially pointless function call into a
separate compilation unit in the hot path.

> I was earlier thinking of using static key to segregate the vendor
> code paths. So if I put in a static_cpu_has(X86_FEATURE_AMD_RDS) kind of
> check here, will it be acceptable?  Or is there already a static
> key I can use to distinguish between intel and amd cpu?

Not that I know of. But adding one should be simple enough and I assume
that we might have other places where this would be useful.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 19:08                 ` Thomas Gleixner
@ 2018-04-27 19:25                   ` Andi Kleen
  2018-04-27 19:52                     ` Thomas Gleixner
  0 siblings, 1 reply; 46+ messages in thread
From: Andi Kleen @ 2018-04-27 19:25 UTC (permalink / raw)
  To: speck

> Still the point is that if you spawn a thread and you know that the thread
> is going to run untrusted code, you want to make sure that MD is disabled
> for that thread, right? So if that thread can reenable it, then you lost
> control.

There are multiple levels

1) JITed sandbox. cannot just do system calls, but can do PRIME+PROBE and
may have call interfaces to other code
2) Native code process. No protection inside the process if you're at this
level. Can also normally do system calls.
3) Kernel.
3) Between processes. 

We try to protect against case (1) doing attacks here.

If the untrusted code can do random system calls you already lost control
in a much worse way. So you already need to have system call protection
in some way (using a JIT not allowing them or seccomp). Or rather
if the process can subvert its environment somehow to do the prctl
it can already execute  arbitrary code, which is much worse the SSB.

Or to put it differently, the point of the prctl is to not allow JITed
code to read data it shouldn't read from its JITed sandbox. If it already
has escaped its sandbox then it can already read everything it wants
in its address space, and do much worse thing.

On the other hand if you have the ability to switch it everywhere
you want that gives you more flexibility to do the protection
you need for your JIT.

-Andi

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 18:30                 ` [MODERATED] " Linus Torvalds
  2018-04-27 18:36                   ` Thomas Gleixner
@ 2018-04-27 19:26                   ` Tim Chen
  2018-04-27 19:57                     ` Jon Masters
  2018-04-27 22:52                     ` Thomas Gleixner
  1 sibling, 2 replies; 46+ messages in thread
From: Tim Chen @ 2018-04-27 19:26 UTC (permalink / raw)
  To: speck


[-- Attachment #1.1: Type: text/plain, Size: 1594 bytes --]

On 04/27/2018 11:30 AM, speck for Linus Torvalds wrote:
> 
> 
> On Fri, 27 Apr 2018, speck for Thomas Gleixner wrote:
>> On Fri, 27 Apr 2018, speck for Thomas Gleixner wrote:
>>>
>>> Sure. You set it on that sandbox thing and then the thread which is spawned
>>> of from there can disable it. Brilliant idea.
>>
>> And in fact you want it even inherit on exec because then you can start the
>> JVM or whatever you want to protect with it disabled and never have to
>> worry about it again.
> 
> I don't think that's the attack people are worried about.
> 
> Basically, for the store buffer bypass, the *only* worry is JIT'ed code. 
> I don't think people expect it to leak from supervisor to user mode, for 
> example, so it's not primarily a protection domain issue.
> 
> It's almost purely a "I generated code assuming the architecture would 
> actualy execute the code I wrote" issue.
> 
> That means that it's not like you're really executing "untrusted" code in 
> general. Your jitted code isn't going to just run random sysctl's without 
> any checking or anything like that. You trust your JVM to take care of 
> *those* kinds of security issues.
> 
> So "user can turn it on and off as they please" is not really an issue. In 
> fact, it could easily be seen as a feature. Making it expensive or hard to 
> turn off the mitigation means that you can't necessarily just turn it on 
> temporarily for the code you really care about.


I'll keep the option in prctl to turn the mitigation off then till we
find a strong reason to do otherwise.

Tim


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 18:53                     ` [MODERATED] " Linus Torvalds
  2018-04-27 19:14                       ` Andi Kleen
@ 2018-04-27 19:37                       ` Jon Masters
  2018-04-27 19:52                         ` Linus Torvalds
  1 sibling, 1 reply; 46+ messages in thread
From: Jon Masters @ 2018-04-27 19:37 UTC (permalink / raw)
  To: speck

[-- Attachment #1: Type: text/plain, Size: 1849 bytes --]

On 04/27/2018 02:53 PM, speck for Linus Torvalds wrote:
> 
> 
> On Fri, 27 Apr 2018, speck for Thomas Gleixner wrote:
> 
>> On Fri, 27 Apr 2018, speck for Linus Torvalds wrote:
> 
>>> Basically, for the store buffer bypass, the *only* worry is JIT'ed code. 
>>> I don't think people expect it to leak from supervisor to user mode, for 
>>> example, so it's not primarily a protection domain issue.
>>
>> Fair enough. I didn't look at it that way. Thanks for the clarification.
> 
> NOTE! My reading of the worry people have about the store buffer bypass 
> attack may simply be wrong. So somebody should verify this part.

The concern we have is about managed code, yeah. I didn't want to fight
too hard on the unset part if we get the set bit :) but there shouldn't
be a security issue to having the prctl go both ways (and that would be
ideal IMO). It's really about a sandboxed JavaScript or Java process (or
somesuch similar like thing) in which that JIT is not expecting the
potential for code to abuse this bypass to dump memory from the process
containing it. e.g. in JavaScript you can make a memory dumping widget
in about three lines of code that runs speculatively for that process.

> Because if the store buffer bypass is actually seen as a security issue on 
> its own, then your argument that people shouldn't be able to turn the 
> mitigation off is obviously very valid.

It /might/ be an issue for the kernel stack. This is the fear many of us
have. However, we do an lfence on entry to the kernel, so it would only
be a problem there if we can find vulnerable syscall functions, and we
haven't yet found any that contain suitable gadgets. That seems
contrived, but it's the reason for the original kernel entry/exit frob.

Jon.

-- 
Computer Architect | Sent from my Fedora powered laptop


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 19:25                   ` [MODERATED] " Andi Kleen
@ 2018-04-27 19:52                     ` Thomas Gleixner
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Gleixner @ 2018-04-27 19:52 UTC (permalink / raw)
  To: speck

On Fri, 27 Apr 2018, speck for Andi Kleen wrote:

> > Still the point is that if you spawn a thread and you know that the thread
> > is going to run untrusted code, you want to make sure that MD is disabled
> > for that thread, right? So if that thread can reenable it, then you lost
> > control.
> 
> There are multiple levels
> 
> 1) JITed sandbox. cannot just do system calls, but can do PRIME+PROBE and
> may have call interfaces to other code
> 2) Native code process. No protection inside the process if you're at this
> level. Can also normally do system calls.
> 3) Kernel.
> 3) Between processes. 
> 
> We try to protect against case (1) doing attacks here.
> 
> If the untrusted code can do random system calls you already lost control
> in a much worse way. So you already need to have system call protection
> in some way (using a JIT not allowing them or seccomp). Or rather
> if the process can subvert its environment somehow to do the prctl
> it can already execute  arbitrary code, which is much worse the SSB.
> 
> Or to put it differently, the point of the prctl is to not allow JITed
> code to read data it shouldn't read from its JITed sandbox. If it already
> has escaped its sandbox then it can already read everything it wants
> in its address space, and do much worse thing.
> 
> On the other hand if you have the ability to switch it everywhere
> you want that gives you more flexibility to do the protection
> you need for your JIT.

That's a reasonable explanation. May I ask why this was not part of the
changelog?

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 19:37                       ` Jon Masters
@ 2018-04-27 19:52                         ` Linus Torvalds
  2018-04-27 20:01                           ` Jon Masters
  2018-05-03 12:55                           ` Jiri Kosina
  0 siblings, 2 replies; 46+ messages in thread
From: Linus Torvalds @ 2018-04-27 19:52 UTC (permalink / raw)
  To: speck



On Fri, 27 Apr 2018, speck for Jon Masters wrote:
> 
> It /might/ be an issue for the kernel stack. This is the fear many of us
> have.

Honestly, the kernel stack worry would be the *reverse* of the store 
buffer bypass.

The actual cache contents should be safe (meltdown), but a a user space 
load that hits in the store buffer of a kernel store might bypass the TLB 
checks _entirely_. So that would be a store buffer *HIT*, not a 
"speculatively do a load that later turns out _should_ have hit the store 
buffer".

That would be an architectural flaw, but one that should be fairly easy to 
mitigate ("just add a barrier before the return to user space, and curse 
the incompetence of hardware designers").

> However, we do an lfence on entry to the kernel

I wouldn't worry about kernel _entry_, since any data the kernel reads is 
by definition not a security issue. We have lots of cases where the kernel 
can get stale data because of speculation. That's not in the least new.

Examples of "kernel gets stale user space data" are things like takin ga 
fault on an instruction that has in the meantime been rewritten (due to 
incomplete I$ coherency). The kernel sees a fault from something that no 
longer even exists.

So it's kernel exit I'd worry about, where a kernel store that is (still) 
in the store buffers can be incorrectly seen by user space because the 
store buffer load forwarding doesn't do any permission checking at all.

I'm assuming nobody has _that_ bug. But it might be worth asking around.

             Linus

PS. The real kernel worry wrt the store buffer bypass is obviously ebpf, 
not kernel entry/exit. 

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 19:26                   ` [MODERATED] " Tim Chen
@ 2018-04-27 19:57                     ` Jon Masters
  2018-04-27 20:07                       ` Tim Chen
  2018-04-27 20:37                       ` Thomas Gleixner
  2018-04-27 22:52                     ` Thomas Gleixner
  1 sibling, 2 replies; 46+ messages in thread
From: Jon Masters @ 2018-04-27 19:57 UTC (permalink / raw)
  To: speck

[-- Attachment #1: Type: text/plain, Size: 2299 bytes --]

On 04/27/2018 03:26 PM, speck for Tim Chen wrote:
> On 04/27/2018 11:30 AM, speck for Linus Torvalds wrote:
>>
>>
>> On Fri, 27 Apr 2018, speck for Thomas Gleixner wrote:
>>> On Fri, 27 Apr 2018, speck for Thomas Gleixner wrote:
>>>>
>>>> Sure. You set it on that sandbox thing and then the thread which is spawned
>>>> of from there can disable it. Brilliant idea.
>>>
>>> And in fact you want it even inherit on exec because then you can start the
>>> JVM or whatever you want to protect with it disabled and never have to
>>> worry about it again.
>>
>> I don't think that's the attack people are worried about.
>>
>> Basically, for the store buffer bypass, the *only* worry is JIT'ed code. 
>> I don't think people expect it to leak from supervisor to user mode, for 
>> example, so it's not primarily a protection domain issue.
>>
>> It's almost purely a "I generated code assuming the architecture would 
>> actualy execute the code I wrote" issue.
>>
>> That means that it's not like you're really executing "untrusted" code in 
>> general. Your jitted code isn't going to just run random sysctl's without 
>> any checking or anything like that. You trust your JVM to take care of 
>> *those* kinds of security issues.
>>
>> So "user can turn it on and off as they please" is not really an issue. In 
>> fact, it could easily be seen as a feature. Making it expensive or hard to 
>> turn off the mitigation means that you can't necessarily just turn it on 
>> temporarily for the code you really care about.

> I'll keep the option in prctl to turn the mitigation off then till we
> find a strong reason to do otherwise.

Konrad and I are sitting here chatting in my office. We debated the
enumeration for this. Do we think simply calling and getting an EINVAL
on older kernels is ok, or do we need a specific enumeration for the
ability to control SSB from software? An enumeration would be nice. You
might want to look to how the kernel handles MPX today to indicate
whether the prctl implemented, and also whether runtime supported.

(he's not got his encrypted email in front of him but he says he'll work
in your update to his patch series if you send it out tonight)

Jon.

-- 
Computer Architect | Sent from my Fedora powered laptop


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 19:52                         ` Linus Torvalds
@ 2018-04-27 20:01                           ` Jon Masters
  2018-05-03 12:55                           ` Jiri Kosina
  1 sibling, 0 replies; 46+ messages in thread
From: Jon Masters @ 2018-04-27 20:01 UTC (permalink / raw)
  To: speck

[-- Attachment #1: Type: text/plain, Size: 1433 bytes --]

On 04/27/2018 03:52 PM, speck for Linus Torvalds wrote:
> 
> 
> On Fri, 27 Apr 2018, speck for Jon Masters wrote:
>>
>> It /might/ be an issue for the kernel stack. This is the fear many of us
>> have.
> 
> Honestly, the kernel stack worry would be the *reverse* of the store 
> buffer bypass.

Kinda, but also on entry we save untrusted user-provided register values
on the stack. If those are updated and then used (or also happens with
uninitialized stack values), then it's possible that the older untrusted
content is used. It's a very unlikely attack since we also do an lfence
as part of the indirect retpolined function calls on entry, but once we
dispatch to the syscall handler functions themselves, this very
theoretical case is vaguely possible.

<snip>

> So it's kernel exit I'd worry about, where a kernel store that is (still) 
> in the store buffers can be incorrectly seen by user space because the 
> store buffer load forwarding doesn't do any permission checking at all.
> 
> I'm assuming nobody has _that_ bug. But it might be worth asking around.

They do and this is still a problem. Not just on x86. POWER are adding a
new "stf-flush" instruction to every entry/exit path for this reason.
They're ready with those patches - obviously they're not on this list,
but they know they can ping you with them.

Jon.

-- 
Computer Architect | Sent from my Fedora powered laptop


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 19:57                     ` Jon Masters
@ 2018-04-27 20:07                       ` Tim Chen
  2018-04-27 20:20                         ` Thomas Gleixner
  2018-04-27 20:37                       ` Thomas Gleixner
  1 sibling, 1 reply; 46+ messages in thread
From: Tim Chen @ 2018-04-27 20:07 UTC (permalink / raw)
  To: speck


[-- Attachment #1.1: Type: text/plain, Size: 942 bytes --]

On 04/27/2018 12:57 PM, speck for Jon Masters wrote:
> On 04/27/2018 03:26 PM, speck for Tim Chen wrote:
>> On 04/27/2018 11:30 AM, speck for Linus Torvalds wrote:
>>>
>>>
>>> 
> 
>> I'll keep the option in prctl to turn the mitigation off then till we
>> find a strong reason to do otherwise.
> 
> Konrad and I are sitting here chatting in my office. We debated the
> enumeration for this. Do we think simply calling and getting an EINVAL
> on older kernels is ok, or do we need a specific enumeration for the
> ability to control SSB from software? An enumeration would be nice. You
> might want to look to how the kernel handles MPX today to indicate
> whether the prctl implemented, and also whether runtime supported.
> 
> (he's not got his encrypted email in front of him but he says he'll work
> in your update to his patch series if you send it out tonight)
> 

I'll do my best to send an update tonight.

Tim

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 20:07                       ` Tim Chen
@ 2018-04-27 20:20                         ` Thomas Gleixner
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Gleixner @ 2018-04-27 20:20 UTC (permalink / raw)
  To: speck

On Fri, 27 Apr 2018, speck for Tim Chen wrote:
> On 04/27/2018 12:57 PM, speck for Jon Masters wrote:
> > On 04/27/2018 03:26 PM, speck for Tim Chen wrote:
> >> On 04/27/2018 11:30 AM, speck for Linus Torvalds wrote:
> >>>
> >>>
> >>> 
> > 
> >> I'll keep the option in prctl to turn the mitigation off then till we
> >> find a strong reason to do otherwise.
> > 
> > Konrad and I are sitting here chatting in my office. We debated the
> > enumeration for this. Do we think simply calling and getting an EINVAL
> > on older kernels is ok, or do we need a specific enumeration for the
> > ability to control SSB from software? An enumeration would be nice. You
> > might want to look to how the kernel handles MPX today to indicate
> > whether the prctl implemented, and also whether runtime supported.
> > 
> > (he's not got his encrypted email in front of him but he says he'll work
> > in your update to his patch series if you send it out tonight)
> > 
> 
> I'll do my best to send an update tonight.

Please take your time. I've already picked up Konrads series for
integration and will post tomorrow the git URL where it can be
retrieved. That prctl stuff just goes incremental on top.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 19:57                     ` Jon Masters
  2018-04-27 20:07                       ` Tim Chen
@ 2018-04-27 20:37                       ` Thomas Gleixner
  2018-04-28  3:57                         ` [MODERATED] " Tim Chen
  1 sibling, 1 reply; 46+ messages in thread
From: Thomas Gleixner @ 2018-04-27 20:37 UTC (permalink / raw)
  To: speck

On Fri, 27 Apr 2018, speck for Jon Masters wrote:
> Konrad and I are sitting here chatting in my office. We debated the
> enumeration for this. Do we think simply calling and getting an EINVAL
> on older kernels is ok, or do we need a specific enumeration for the
> ability to control SSB from software? An enumeration would be nice. You
> might want to look to how the kernel handles MPX today to indicate
> whether the prctl implemented, and also whether runtime supported.

So if you looked at what I wrote earlier:

int arch_prctl_set_rds(struct task_struct *p, unsigned long val)
{
        if (!spec_store_bypass_prctl)
                return -ENOTSUPP;

That's exactly the point. If the (older or not updated) kernel does not
support it then you get -EINVAL because thats the default for not
implemented prctls.

So if spec_store_bypass_prctl is not enabled you can return some other
error code and we surely can make that error code depend on the mitigation
state and return -ENOTVULNERABLE or whatever is suitable.

And no, you don't need the MPX way because that mitigation stuff is
unconditional and cannot be compiled out.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 19:26                   ` [MODERATED] " Tim Chen
  2018-04-27 19:57                     ` Jon Masters
@ 2018-04-27 22:52                     ` Thomas Gleixner
  1 sibling, 0 replies; 46+ messages in thread
From: Thomas Gleixner @ 2018-04-27 22:52 UTC (permalink / raw)
  To: speck

On Fri, 27 Apr 2018, speck for Tim Chen wrote:
> On 04/27/2018 11:30 AM, speck for Linus Torvalds wrote:
> > So "user can turn it on and off as they please" is not really an issue. In 
> > fact, it could easily be seen as a feature. Making it expensive or hard to 
> > turn off the mitigation means that you can't necessarily just turn it on 
> > temporarily for the code you really care about.
> 
> I'll keep the option in prctl to turn the mitigation off then till we
> find a strong reason to do otherwise.

You better add a strong reason to the changelog WHY it is safe to have it
both ways.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 20:37                       ` Thomas Gleixner
@ 2018-04-28  3:57                         ` Tim Chen
  2018-04-28 12:26                           ` Thomas Gleixner
  0 siblings, 1 reply; 46+ messages in thread
From: Tim Chen @ 2018-04-28  3:57 UTC (permalink / raw)
  To: speck


[-- Attachment #1.1: Type: text/plain, Size: 1811 bytes --]

On 04/27/2018 01:37 PM, speck for Thomas Gleixner wrote:
> On Fri, 27 Apr 2018, speck for Jon Masters wrote:
>> Konrad and I are sitting here chatting in my office. We debated the
>> enumeration for this. Do we think simply calling and getting an EINVAL
>> on older kernels is ok, or do we need a specific enumeration for the
>> ability to control SSB from software? An enumeration would be nice. You
>> might want to look to how the kernel handles MPX today to indicate
>> whether the prctl implemented, and also whether runtime supported.
> 
> So if you looked at what I wrote earlier:
> 
> int arch_prctl_set_rds(struct task_struct *p, unsigned long val)
> {
>         if (!spec_store_bypass_prctl)
>                 return -ENOTSUPP;
> 

The ENOTSUPP is in include/linux/errno.h and
not in userspace api, can we return that?

There is a comment in the include/linux/errno.h that says:
/*
 * These should never be seen by user programs....

and then 
/* Defined for the NFSv3 protocol */


> That's exactly the point. If the (older or not updated) kernel does not
> support it then you get -EINVAL because thats the default for not
> implemented prctls.
> 
> So if spec_store_bypass_prctl is not enabled you can return some other
> error code and we surely can make that error code depend on the mitigation
> state and return -ENOTVULNERABLE or whatever is suitable.

You mean add a new ENOTVULNERABLE error number?  I can't seem to find
errno.  If I need to add this, which file should it go to?

I'll keep the EINVAL temporarily in my patch till I can figure out
the ENOTSUPP and ENOTVULNERABLE usage.

Tim

> 
> And no, you don't need the MPX way because that mitigation stuff is
> unconditional and cannot be compiled out.
> 
> Thanks,
> 
> 	tglx
> 


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v2] Linux Patch 1/1
  2018-04-28  3:57                         ` [MODERATED] " Tim Chen
@ 2018-04-28 12:26                           ` Thomas Gleixner
  2018-04-28 17:14                             ` [MODERATED] " Tim Chen
  0 siblings, 1 reply; 46+ messages in thread
From: Thomas Gleixner @ 2018-04-28 12:26 UTC (permalink / raw)
  To: speck

On Fri, 27 Apr 2018, speck for Tim Chen wrote:
> On 04/27/2018 01:37 PM, speck for Thomas Gleixner wrote:
> > So if spec_store_bypass_prctl is not enabled you can return some other
> > error code and we surely can make that error code depend on the mitigation
> > state and return -ENOTVULNERABLE or whatever is suitable.
> 
> You mean add a new ENOTVULNERABLE error number?  I can't seem to find
> errno.  If I need to add this, which file should it go to?
> 
> I'll keep the EINVAL temporarily in my patch till I can figure out
> the ENOTSUPP and ENOTVULNERABLE usage.

I made that up and added:  or whatever is suitable

In other words, find a suitable error code to express what you want to
express.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-28 12:26                           ` Thomas Gleixner
@ 2018-04-28 17:14                             ` Tim Chen
  0 siblings, 0 replies; 46+ messages in thread
From: Tim Chen @ 2018-04-28 17:14 UTC (permalink / raw)
  To: speck


[-- Attachment #1.1: Type: text/plain, Size: 1112 bytes --]

On 04/28/2018 05:26 AM, speck for Thomas Gleixner wrote:
> On Fri, 27 Apr 2018, speck for Tim Chen wrote:
>> On 04/27/2018 01:37 PM, speck for Thomas Gleixner wrote:
>>> So if spec_store_bypass_prctl is not enabled you can return some other
>>> error code and we surely can make that error code depend on the mitigation
>>> state and return -ENOTVULNERABLE or whatever is suitable.
>>
>> You mean add a new ENOTVULNERABLE error number?  I can't seem to find
>> errno.  If I need to add this, which file should it go to?
>>
>> I'll keep the EINVAL temporarily in my patch till I can figure out
>> the ENOTSUPP and ENOTVULNERABLE usage.
> 
> I made that up and added:  or whatever is suitable
> 
> In other words, find a suitable error code to express what you want to
> express.

How about EOPNOTSUPP if we are are trying to set RDS and platform
does not support it.  And EALREADY to indicate that the platform
is not vulnerable?

They are both defined in include/uapi/asm-generic/errno.h.

For older kernels, they will always return EINVAL.

Tim
 

> 
> Thanks,
> 
> 	tglx
> 


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-04-27 19:52                         ` Linus Torvalds
  2018-04-27 20:01                           ` Jon Masters
@ 2018-05-03 12:55                           ` Jiri Kosina
  2018-05-03 16:24                             ` Linus Torvalds
  1 sibling, 1 reply; 46+ messages in thread
From: Jiri Kosina @ 2018-05-03 12:55 UTC (permalink / raw)
  To: speck

On Fri, 27 Apr 2018, speck for Linus Torvalds wrote:

> So it's kernel exit I'd worry about, where a kernel store that is (still) 
> in the store buffers can be incorrectly seen by user space because the 
> store buffer load forwarding doesn't do any permission checking at all.
> 
> I'm assuming nobody has _that_ bug. But it might be worth asking around.
> 
>              Linus
> 
> PS. The real kernel worry wrt the store buffer bypass is obviously ebpf, 
> not kernel entry/exit. 

Well but in principle ebpf makes just the whole thing easier, as the 
attacker can freely construct fine-grained load+store gadget instead of 
having to find it in the code.

We've identified numberous spectre v1 gadgets already present in the 
kernel, I don't really see how this is different in *principle* ("only" 
you need load+store, instead of v1's load+load).

So JITed code is more of a concern because it allows for creating the 
gadget instead to having to actively find it in the existing code, but I 
fail to see how this is not an issue of userspace being able to read all 
of kernel's memory (either by using explicitly ebpf-instrumented gadget, 
or simply finding one in the existing code).

So I still believe we need to protect kernel as well, and prctl()-based 
aproach doesn't work for that.

So ... how does current patchset even deal with the ebpf scenario?

I'd be glad to be enlightened and proven wrong.

Thanks,

-- 
Jiri Kosina
SUSE Labs

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-05-03 12:55                           ` Jiri Kosina
@ 2018-05-03 16:24                             ` Linus Torvalds
  2018-05-03 19:40                               ` Jiri Kosina
  0 siblings, 1 reply; 46+ messages in thread
From: Linus Torvalds @ 2018-05-03 16:24 UTC (permalink / raw)
  To: speck



On Thu, 3 May 2018, speck for Jiri Kosina wrote:
> 
> We've identified numberous spectre v1 gadgets already present in the 
> kernel, I don't really see how this is different in *principle* ("only" 
> you need load+store, instead of v1's load+load).

No, afaik you need slow-address-store + load + load, where the first load 
bypasses the store buffer speculatively, and the second independent load 
then exposes the speculative wrong load data by perturing the cache.

Or am I missing some clever attack?

Honestly, the store buffer bypass is really really hard to use to attack 
the kernel exactly because you need that third memory access to perturb 
the side channel, and the first access has to be a store with the right 
setup so that the second access loads the wrong data, and the wrong data 
is the interesting one (not the right one from the store).

             Linus

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-05-03 16:24                             ` Linus Torvalds
@ 2018-05-03 19:40                               ` Jiri Kosina
  2018-05-04  3:51                                 ` Linus Torvalds
  0 siblings, 1 reply; 46+ messages in thread
From: Jiri Kosina @ 2018-05-03 19:40 UTC (permalink / raw)
  To: speck

On Thu, 3 May 2018, speck for Linus Torvalds wrote:

> No, afaik you need slow-address-store + load + load, where the first load 
> bypasses the store buffer speculatively, and the second independent load 
> then exposes the speculative wrong load data by perturing the cache.
> 
> Or am I missing some clever attack?

Yeah, that's more or less the way I understand it -- when slow store is 
followed by load from the same address, the speculative execution observes 
the stale (previous) value if the store hasn't been retired yet.

If that depending speculatively executed load performs any action that 
creates a sidechannel (such as using that value as array index, thus 
causing observable/measurable cacheline effects), the attack is complete.

Basically the same we had with load+load for v1. And there are plenty of 
such constructs in the kernel. Why would we think that it's not the case 
with store+load?

It's just that noone really bothered too much, as it's all so much simpler 
with BPF.

> Honestly, the store buffer bypass is really really hard to use to attack 
> the kernel exactly because you need that third memory access to perturb 
> the side channel, and the first access has to be a store with the right 
> setup so that the second access loads the wrong data, and the wrong data 
> is the interesting one (not the right one from the store).

So even if there really are no such gadgets / constructs anywhere in the 
kernel, they can be of course created through BPF. And therefore one of 
our primary concerns should be disabling MD around BPF program execution, 
shouldn't it?

Thanks,

-- 
Jiri Kosina
SUSE Labs

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-05-03 19:40                               ` Jiri Kosina
@ 2018-05-04  3:51                                 ` Linus Torvalds
  2018-05-04  6:00                                   ` Jiri Kosina
  2018-05-07 16:40                                   ` [MODERATED] " Dave Hansen
  0 siblings, 2 replies; 46+ messages in thread
From: Linus Torvalds @ 2018-05-04  3:51 UTC (permalink / raw)
  To: speck



On Thu, 3 May 2018, speck for Jiri Kosina wrote:
> 
> > Honestly, the store buffer bypass is really really hard to use to attack 
> > the kernel exactly because you need that third memory access to perturb 
> > the side channel, and the first access has to be a store with the right 
> > setup so that the second access loads the wrong data, and the wrong data 
> > is the interesting one (not the right one from the store).
> 
> So even if there really are no such gadgets / constructs anywhere in the 
> kernel, they can be of course created through BPF.

No, not really. There is no "of course". 

It's not clear that such a gadget can be constructed at all with bpf. We 
should *not* say "of course you can construct that" without having any 
kind of real data on that.

The thing is, to construct that, you really have to construct:

 - a store to the same address that you do a subsequent load from

This part is probably easyish. BUT:

 - you have to use a different base pointer for it, which suddenly means 
   that it's not the obvious case of a stack spill "let's store to the 
   stack and the load the value back", because that generally uses the 
   same base pointer unless your code generation is complete garbage.

So now you have one big wrinkle already. But the *really* big wrinkle is 
that you also have to have interesting data that the store that was 
bypassed was actually hiding. 

But let's assume you have that  store-load pair, and let's assume you 
actually can load some interesting data that the first store overwrote. 
Now you need to do the second indirect load to act as the side channel for 
the cache. 

So it's not trivial to generate in the first place, but then you need to 
*run* this gadget too. Which shouldn't be possible in the first place, 
since you now need bascially the Spectre v1 to execute that arbitrary 
code.

And the thing is, if you need spectre v1 to execute it, why go through all 
that complexity? Just do the original spectre thing to begin with.

No. To be practically exploitable, you need to be able to just generate 
the code and run it directly. But again, I'm assuming just the *regular* 
spectre is easier to just exploit if you can do that.

So what we should look at is "can you make ebpf easily generate that 
"store followed by load from same address with a different base 
register, and can you run it as non-root".

And I haven't seen anybody actually try to look into that.  Have the ebpf 
people been involved on this issue at all?

              Linus

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-05-04  3:51                                 ` Linus Torvalds
@ 2018-05-04  6:00                                   ` Jiri Kosina
  2018-05-04  6:03                                     ` Jiri Kosina
  2018-05-07 16:40                                   ` [MODERATED] " Dave Hansen
  1 sibling, 1 reply; 46+ messages in thread
From: Jiri Kosina @ 2018-05-04  6:00 UTC (permalink / raw)
  To: speck

On Thu, 3 May 2018, speck for Linus Torvalds wrote:

> This part is probably easyish. BUT:
> 
>  - you have to use a different base pointer for it, which suddenly means 
>    that it's not the obvious case of a stack spill "let's store to the 
>    stack and the load the value back", because that generally uses the 
>    same base pointer unless your code generation is complete garbage.

Fair enough; if gcc is ever generating such code, it should probably be 
better to fix gcc directly anyway.

> So what we should look at is "can you make ebpf easily generate that 
> "store followed by load from same address with a different base 
> register, and can you run it as non-root".

So that's, according to my understanding, the PoC that google folks have 
succesfully created (the only modification AFAIK they did to eBPF is 
allowing more easier cacheline flush from within eBPF), basically let 
otherwise sandboxed process to

- store unsingned long into memory
- store pointer into the same memory location
- load from that location and use it so that the side channel is there 
  (array index), as due to MD the speculative execution observes the
  unsinged long, and not the pointer

And reportedly they succeeded (with the easiser cacheline flush 
modification).

> And I haven't seen anybody actually try to look into that.  Have the ebpf 
> people been involved on this issue at all?

I am not sure if any of them is on this list. Jon, would it be possible to 
drag Daniel Borkmann here?

Thanks,

-- 
Jiri Kosina
SUSE Labs

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] Re: [PATCH v2] Linux Patch 1/1
  2018-05-04  6:00                                   ` Jiri Kosina
@ 2018-05-04  6:03                                     ` Jiri Kosina
  0 siblings, 0 replies; 46+ messages in thread
From: Jiri Kosina @ 2018-05-04  6:03 UTC (permalink / raw)
  To: speck

On Fri, 4 May 2018, speck for Jiri Kosina wrote:

> So that's, according to my understanding, the PoC that google folks have 
> succesfully created (the only modification AFAIK they did to eBPF is 
> allowing more easier cacheline flush from within eBPF), basically let 
> otherwise sandboxed process to
> 
> - store unsingned long into memory
> - store pointer into the same memory location
> - load from that location and use it so that the side channel is there 
>   (array index), as due to MD the speculative execution observes the
>   unsinged long, and not the pointer
> 
> And reportedly they succeeded (with the easiser cacheline flush 
> modification).
> 
> > And I haven't seen anybody actually try to look into that.  Have the ebpf 
> > people been involved on this issue at all?
> 
> I am not sure if any of them is on this list. Jon, would it be possible to 
> drag Daniel Borkmann here?

Ah, only now I noticed that Dave already did send a patchset to mitigate 
this in the interpreter by disabling MD while the interpreter is running; 
so probably no need to any more.

Thanks,

-- 
Jiri Kosina
SUSE Labs

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [MODERATED] [PATCH v2] Linux Patch 1/1
  2018-05-04  3:51                                 ` Linus Torvalds
  2018-05-04  6:00                                   ` Jiri Kosina
@ 2018-05-07 16:40                                   ` Dave Hansen
  1 sibling, 0 replies; 46+ messages in thread
From: Dave Hansen @ 2018-05-07 16:40 UTC (permalink / raw)
  To: speck

On 05/03/2018 08:51 PM, speck for Linus Torvalds wrote:
> So now you have one big wrinkle already. But the *really* big wrinkle is 
> that you also have to have interesting data that the store that was 
> bypassed was actually hiding. 

When I first heard about this being another "bounds bypass", I wasn't
all that worried, certainly not for the kernel.  We don't frequently
shrink arrays or put secret data in array slots that were recently
accessible as non-secrets.

But, there is a _partial_ PoC that demonstrates some of this stuff
crossing the kernel boundary with eBPF.  eBPF does a good job of
tracking the architectural value of registers to make sure they, for
instance, contain pointers to things that are supposed to be
dereferenced.  But, if you can cause some type confusion (with
speculative store bypass) you can get speculative dereferences of eBPF
registers that don't contain pointers at all.

I think the bar is still way higher than for Spectre V1, though.

^ permalink raw reply	[flat|nested] 46+ messages in thread

end of thread, other threads:[~2018-05-07 16:40 UTC | newest]

Thread overview: 46+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-26  0:11 [MODERATED] [PATCH v2] Linux Patch 1/1 Tim Chen
2018-04-26  3:15 ` [MODERATED] " Jon Masters
2018-04-26 17:03   ` Tim Chen
2018-04-26  3:29 ` Jon Masters
2018-04-26 12:03   ` Thomas Gleixner
2018-04-26 16:48     ` [MODERATED] " Tim Chen
2018-04-26 16:18 ` Jon Masters
2018-04-27 16:08 ` Thomas Gleixner
2018-04-27 16:47   ` [MODERATED] " Borislav Petkov
2018-04-27 17:13     ` Jon Masters
2018-04-27 17:32       ` Borislav Petkov
2018-04-27 17:48         ` Thomas Gleixner
2018-04-27 18:09           ` [MODERATED] " Andi Kleen
2018-04-27 18:17             ` Thomas Gleixner
2018-04-27 18:20               ` Thomas Gleixner
2018-04-27 18:30                 ` [MODERATED] " Linus Torvalds
2018-04-27 18:36                   ` Thomas Gleixner
2018-04-27 18:53                     ` [MODERATED] " Linus Torvalds
2018-04-27 19:14                       ` Andi Kleen
2018-04-27 19:37                       ` Jon Masters
2018-04-27 19:52                         ` Linus Torvalds
2018-04-27 20:01                           ` Jon Masters
2018-05-03 12:55                           ` Jiri Kosina
2018-05-03 16:24                             ` Linus Torvalds
2018-05-03 19:40                               ` Jiri Kosina
2018-05-04  3:51                                 ` Linus Torvalds
2018-05-04  6:00                                   ` Jiri Kosina
2018-05-04  6:03                                     ` Jiri Kosina
2018-05-07 16:40                                   ` [MODERATED] " Dave Hansen
2018-04-27 19:26                   ` [MODERATED] " Tim Chen
2018-04-27 19:57                     ` Jon Masters
2018-04-27 20:07                       ` Tim Chen
2018-04-27 20:20                         ` Thomas Gleixner
2018-04-27 20:37                       ` Thomas Gleixner
2018-04-28  3:57                         ` [MODERATED] " Tim Chen
2018-04-28 12:26                           ` Thomas Gleixner
2018-04-28 17:14                             ` [MODERATED] " Tim Chen
2018-04-27 22:52                     ` Thomas Gleixner
2018-04-27 18:45               ` [MODERATED] " Andi Kleen
2018-04-27 19:08                 ` Thomas Gleixner
2018-04-27 19:25                   ` [MODERATED] " Andi Kleen
2018-04-27 19:52                     ` Thomas Gleixner
2018-04-27 17:50         ` [MODERATED] " Andi Kleen
2018-04-27 17:58   ` Thomas Gleixner
2018-04-27 19:01   ` [MODERATED] " Tim Chen
2018-04-27 19:19     ` Thomas Gleixner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.