LKML Archive on lore.kernel.org
 help / Atom feed
* [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
@ 2018-01-12 17:49 David Woodhouse
  2018-01-12 18:02 ` Andi Kleen
                   ` (5 more replies)
  0 siblings, 6 replies; 20+ messages in thread
From: David Woodhouse @ 2018-01-12 17:49 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Paul Turner, LKML, Linus Torvalds, Greg Kroah-Hartman, Tim Chen,
	Dave Hansen, tglx, Kees Cook, Rik van Riel, Peter Zijlstra,
	Andy Lutomirski, Jiri Kosina, gnomes, x86, thomas.lendacky,
	Josh Poimboeuf

When we context switch from a shallow call stack to a deeper one, as we
'ret' up the deeper side we may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace. This is
problematic if we have neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel), as malicious code in userspace
may then be executed speculatively. So overwrite the CPU's return
prediction stack with calls which are predicted to return to an infinite
loop, to "capture" speculation if this happens. This is required both
for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so
much overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
---
 arch/x86/entry/entry_32.S          | 11 +++++++++++
 arch/x86/entry/entry_64.S          | 11 +++++++++++
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/kernel/cpu/bugs.c         | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 57 insertions(+)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a1f28a5..ef0e478 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm)
 	movl	%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+	/*
+	 * When we switch from a shallower to a deeper call stack
+	 * the RSB may either underflow or use entries populated
+	 * with userspace addresses. On CPUs where those concerns
+	 * exist, overwrite the RSB with entries which capture
+	 * speculative execution to prevent attack.
+	 */
+	FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
 	/* restore callee-saved registers */
 	popl	%esi
 	popl	%edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 59874bc..b2937d8 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -487,6 +487,17 @@ ENTRY(__switch_to_asm)
 	movq	%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+	/*
+	 * When we switch from a shallower to a deeper call stack
+	 * the RSB may either underflow or use entries populated
+	 * with userspace addresses. On CPUs where those concerns
+	 * exist, overwrite the RSB with entries which capture
+	 * speculative execution to prevent attack.
+	 */
+	FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
 	/* restore callee-saved registers */
 	popq	%r15
 	popq	%r14
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f275447..aa09559 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -211,6 +211,7 @@
 #define X86_FEATURE_AVX512_4FMAPS	( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* Fill RSB on context switches */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e4dc261..c17cce3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -23,6 +23,7 @@
 #include <asm/alternative.h>
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
+#include <asm/intel-family.h>
 
 static void __init spectre_v2_select_mitigation(void);
 
@@ -155,6 +156,22 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 	return SPECTRE_V2_CMD_NONE;
 }
 
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+	if (boot_cpu_data.x86 == 6) {
+		switch (boot_cpu_data.x86_model) {
+		case INTEL_FAM6_SKYLAKE_MOBILE:
+		case INTEL_FAM6_SKYLAKE_DESKTOP:
+		case INTEL_FAM6_SKYLAKE_X:
+		case INTEL_FAM6_KABYLAKE_MOBILE:
+		case INTEL_FAM6_KABYLAKE_DESKTOP:
+			return true;
+		}
+	}
+	return false;
+}
+
 static void __init spectre_v2_select_mitigation(void)
 {
 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -213,6 +230,23 @@ static void __init spectre_v2_select_mitigation(void)
 
 	spectre_v2_enabled = mode;
 	pr_info("%s\n", spectre_v2_strings[mode]);
+
+	/*
+	 * If we don't have SMEP or KPTI, then we run the risk of hitting
+	 * userspace addresses in the RSB after a context switch from a
+	 * shallow call stack to a deeper one. We must must fill the entire
+	 * RSB to avoid that, even when using IBRS.
+	 *
+	 * Skylake era CPUs have a separate issue with *underflow* of the
+	 * RSB, when they will predict 'ret' targets from the generic BTB.
+	 * IBRS makes that safe, but we need to fill the RSB on context
+	 * switch if we're using retpoline.
+	 */
+	if ((!boot_cpu_has(X86_FEATURE_PTI) &&
+	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
+		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+		pr_info("Filling RSB on context switch\n");
+	}
 }
 
 #undef pr_fmt
-- 
2.7.4

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-12 17:49 [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs David Woodhouse
@ 2018-01-12 18:02 ` Andi Kleen
  2018-01-12 18:23   ` David Woodhouse
  2018-01-12 18:05 ` Andrew Cooper
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Andi Kleen @ 2018-01-12 18:02 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Paul Turner, LKML, Linus Torvalds, Greg Kroah-Hartman, Tim Chen,
	Dave Hansen, tglx, Kees Cook, Rik van Riel, Peter Zijlstra,
	Andy Lutomirski, Jiri Kosina, gnomes, x86, thomas.lendacky,
	Josh Poimboeuf

> +	if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> +	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> +		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> +		pr_info("Filling RSB on context switch\n");

We need to do more things for Skylake (like idle and interrupt fill
and possibly deep call cahin), so I don't think it makes sense to

- have an individual flag for each of these. It can be just a single
flag that enables all of this for Skylake

- print something for each of them. that will just be very noisy
without any useful benefit to the user.

-Andi

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-12 17:49 [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs David Woodhouse
  2018-01-12 18:02 ` Andi Kleen
@ 2018-01-12 18:05 ` Andrew Cooper
  2018-01-12 18:56   ` David Woodhouse
  2018-01-14 11:39 ` Thomas Gleixner
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2018-01-12 18:05 UTC (permalink / raw)
  To: David Woodhouse, Andi Kleen
  Cc: Paul Turner, LKML, Linus Torvalds, Greg Kroah-Hartman, Tim Chen,
	Dave Hansen, tglx, Kees Cook, Rik van Riel, Peter Zijlstra,
	Andy Lutomirski, Jiri Kosina, gnomes, x86, thomas.lendacky,
	Josh Poimboeuf

On 12/01/18 17:49, David Woodhouse wrote:
> When we context switch from a shallow call stack to a deeper one, as we
> 'ret' up the deeper side we may encounter RSB entries (predictions for
> where the 'ret' goes to) which were populated in userspace. This is
> problematic if we have neither SMEP nor KPTI (the latter of which marks
> userspace pages as NX for the kernel), as malicious code in userspace
> may then be executed speculatively. So overwrite the CPU's return
> prediction stack with calls which are predicted to return to an infinite
> loop, to "capture" speculation if this happens. This is required both
> for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI.
>
> On Skylake+ the problem is slightly different, and an *underflow* of the
> RSB may cause errant branch predictions to occur. So there it's not so
> much overwrite, as *filling* the RSB to attempt to prevent it getting
> empty. This is only a partial solution for Skylake+ since there are many
> other conditions which may result in the RSB becoming empty. The full
> solution on Skylake+ is to use IBRS, which will prevent the problem even
> when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
> required on context switch.

If you unconditionally fill the RSB on every entry to supervisor mode,
then there are never guest-controlled RSB values to be found.

With that property (and IBRS to protect Skylake+), you shouldn't need
RSB filling anywhere in the middle.

~Andrew

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-12 18:02 ` Andi Kleen
@ 2018-01-12 18:23   ` David Woodhouse
  0 siblings, 0 replies; 20+ messages in thread
From: David Woodhouse @ 2018-01-12 18:23 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Paul Turner, LKML, Linus Torvalds, Greg Kroah-Hartman, Tim Chen,
	Dave Hansen, tglx, Kees Cook, Rik van Riel, Peter Zijlstra,
	Andy Lutomirski, Jiri Kosina, gnomes, x86, thomas.lendacky,
	Josh Poimboeuf

[-- Attachment #1: Type: text/plain, Size: 1059 bytes --]

On Fri, 2018-01-12 at 10:02 -0800, Andi Kleen wrote:
> > +     if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> > +          !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> > +             setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> > +             pr_info("Filling RSB on context switch\n");
> 
> We need to do more things for Skylake (like idle and interrupt fill
> and possibly deep call cahin), so I don't think it makes sense to
> 
> - have an individual flag for each of these. It can be just a single
> flag that enables all of this for Skylake
> 
> - print something for each of them. that will just be very noisy
> without any useful benefit to the user.

I still think we are better off using IBRS by default on Skylake.

This patch wasn't really for Skylake; the real use case was for AMD
CPUs (!PTI) without SMEP. Since it happens to needed on Skylake too we
might as well enable it there... but that doesn't mean I was planning
to do all the other horrible crap we need for Skylake.

[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5213 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-12 18:05 ` Andrew Cooper
@ 2018-01-12 18:56   ` David Woodhouse
  2018-01-12 23:41     ` Josh Poimboeuf
  0 siblings, 1 reply; 20+ messages in thread
From: David Woodhouse @ 2018-01-12 18:56 UTC (permalink / raw)
  To: Andrew Cooper, Andi Kleen
  Cc: Paul Turner, LKML, Linus Torvalds, Greg Kroah-Hartman, Tim Chen,
	Dave Hansen, tglx, Kees Cook, Rik van Riel, Peter Zijlstra,
	Andy Lutomirski, Jiri Kosina, gnomes, x86, thomas.lendacky,
	Josh Poimboeuf

[-- Attachment #1: Type: text/plain, Size: 1179 bytes --]

On Fri, 2018-01-12 at 18:05 +0000, Andrew Cooper wrote:
> 
> If you unconditionally fill the RSB on every entry to supervisor mode,
> then there are never guest-controlled RSB values to be found.
> 
> With that property (and IBRS to protect Skylake+), you shouldn't need
> RSB filling anywhere in the middle.

Yes, that's right.

We have a choice — we can do it on kernel entry (in the interrupt and
syscall and NMI paths), and that's nice and easy and really safe
because we know there's *never* a bad RSB entry lurking while we're in
the kernel.

The alternative, which is what we seem to be learning towards now in
the latest tables from Dave (https://goo.gl/pXbvBE and
https://goo.gl/Grbuhf), is to do it on context switch when we might be
switching from a shallow call stack to a deeper one. Which has much
better performance characteristics for processes which make non-
sleeping syscalls.

The caveat with the latter approach is that we do depend on the fact
that context switches are the only imbalance in the kernel. But that's
OK — we don't have a longjmp or anything else like that. Especially
that goes into a *deeper* call stack. Do we?

[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5213 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-12 18:56   ` David Woodhouse
@ 2018-01-12 23:41     ` Josh Poimboeuf
  0 siblings, 0 replies; 20+ messages in thread
From: Josh Poimboeuf @ 2018-01-12 23:41 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Andrew Cooper, Andi Kleen, Paul Turner, LKML, Linus Torvalds,
	Greg Kroah-Hartman, Tim Chen, Dave Hansen, tglx, Kees Cook,
	Rik van Riel, Peter Zijlstra, Andy Lutomirski, Jiri Kosina,
	gnomes, x86, thomas.lendacky

On Fri, Jan 12, 2018 at 06:56:18PM +0000, David Woodhouse wrote:
> On Fri, 2018-01-12 at 18:05 +0000, Andrew Cooper wrote:
> > 
> > If you unconditionally fill the RSB on every entry to supervisor mode,
> > then there are never guest-controlled RSB values to be found.
> > 
> > With that property (and IBRS to protect Skylake+), you shouldn't need
> > RSB filling anywhere in the middle.
> 
> Yes, that's right.
> 
> We have a choice — we can do it on kernel entry (in the interrupt and
> syscall and NMI paths), and that's nice and easy and really safe
> because we know there's *never* a bad RSB entry lurking while we're in
> the kernel.
> 
> The alternative, which is what we seem to be learning towards now in
> the latest tables from Dave (https://goo.gl/pXbvBE and
> https://goo.gl/Grbuhf), is to do it on context switch when we might be
> switching from a shallow call stack to a deeper one. Which has much
> better performance characteristics for processes which make non-
> sleeping syscalls.
> 
> The caveat with the latter approach is that we do depend on the fact
> that context switches are the only imbalance in the kernel. But that's
> OK — we don't have a longjmp or anything else like that. Especially
> that goes into a *deeper* call stack. Do we?

At least some generated code might create RSB imbalances.  Function
graph tracing and kretprobes, for example.  They mess with the return
path and could probably underflow the RSB pretty easily.  I guess they'd
need to be reworked a bit so they only do a single ret.

-- 
Josh

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-12 17:49 [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs David Woodhouse
  2018-01-12 18:02 ` Andi Kleen
  2018-01-12 18:05 ` Andrew Cooper
@ 2018-01-14 11:39 ` Thomas Gleixner
  2018-01-14 17:04 ` [tip:x86/pti] " tip-bot for David Woodhouse
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 20+ messages in thread
From: Thomas Gleixner @ 2018-01-14 11:39 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Andi Kleen, Paul Turner, LKML, Linus Torvalds,
	Greg Kroah-Hartman, Tim Chen, Dave Hansen, Kees Cook,
	Rik van Riel, Peter Zijlstra, Andy Lutomirski, Jiri Kosina,
	gnomes, x86, thomas.lendacky, Josh Poimboeuf

On Fri, 12 Jan 2018, David Woodhouse wrote:
> +/* Check for Skylake-like CPUs (for RSB handling) */
> +static bool __init is_skylake_era(void)
> +{
> +	if (boot_cpu_data.x86 == 6) {

This wants a checkfor vendor = intel 

> +		switch (boot_cpu_data.x86_model) {
> +		case INTEL_FAM6_SKYLAKE_MOBILE:
> +		case INTEL_FAM6_SKYLAKE_DESKTOP:
> +		case INTEL_FAM6_SKYLAKE_X:
> +		case INTEL_FAM6_KABYLAKE_MOBILE:
> +		case INTEL_FAM6_KABYLAKE_DESKTOP:
> +			return true;
> +		}
> +	}

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-12 17:49 [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs David Woodhouse
                   ` (2 preceding siblings ...)
  2018-01-14 11:39 ` Thomas Gleixner
@ 2018-01-14 17:04 ` " tip-bot for David Woodhouse
  2018-01-15 14:35   ` David Laight
  2018-01-14 23:37 ` tip-bot for David Woodhouse
  2018-03-09 13:12 ` Maciej S. Szmigiero
  5 siblings, 1 reply; 20+ messages in thread
From: tip-bot for David Woodhouse @ 2018-01-14 17:04 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: hpa, arjan, mingo, keescook, peterz, gregkh, torvalds, jikos,
	luto, dave.hansen, jpoimboe, pjt, tim.c.chen, ak, linux-kernel,
	tglx, dwmw, riel

Commit-ID:  a0ab15c0fb68e202bebd9b17fa49fd7ec48975b3
Gitweb:     https://git.kernel.org/tip/a0ab15c0fb68e202bebd9b17fa49fd7ec48975b3
Author:     David Woodhouse <dwmw@amazon.co.uk>
AuthorDate: Fri, 12 Jan 2018 17:49:25 +0000
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Sun, 14 Jan 2018 16:41:39 +0100

x86/retpoline: Fill RSB on context switch for affected CPUs

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
  	changelog ]

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: gnomes@lxorguk.ukuu.org.uk
Cc: Rik van Riel <riel@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: thomas.lendacky@amd.com
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Kees Cook <keescook@google.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-dwmw@amazon.co.uk

---
 arch/x86/entry/entry_32.S          | 11 +++++++++++
 arch/x86/entry/entry_64.S          | 11 +++++++++++
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/kernel/cpu/bugs.c         | 36 ++++++++++++++++++++++++++++++++++++
 4 files changed, 59 insertions(+)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a1f28a5..60c4c34 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm)
 	movl	%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+	/*
+	 * When switching from a shallower to a deeper call stack
+	 * the RSB may either underflow or use entries populated
+	 * with userspace addresses. On CPUs where those concerns
+	 * exist, overwrite the RSB with entries which capture
+	 * speculative execution to prevent attack.
+	 */
+	FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
 	/* restore callee-saved registers */
 	popl	%esi
 	popl	%edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 59874bc..d54a0ed 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -487,6 +487,17 @@ ENTRY(__switch_to_asm)
 	movq	%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+	/*
+	 * When switching from a shallower to a deeper call stack
+	 * the RSB may either underflow or use entries populated
+	 * with userspace addresses. On CPUs where those concerns
+	 * exist, overwrite the RSB with entries which capture
+	 * speculative execution to prevent attack.
+	 */
+	FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
 	/* restore callee-saved registers */
 	popq	%r15
 	popq	%r14
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f275447..aa09559 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -211,6 +211,7 @@
 #define X86_FEATURE_AVX512_4FMAPS	( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* Fill RSB on context switches */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e4dc261..390b3dc 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -23,6 +23,7 @@
 #include <asm/alternative.h>
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
+#include <asm/intel-family.h>
 
 static void __init spectre_v2_select_mitigation(void);
 
@@ -155,6 +156,23 @@ disable:
 	return SPECTRE_V2_CMD_NONE;
 }
 
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+	    boot_cpu_data.x86 == 6) {
+		switch (boot_cpu_data.x86_model) {
+		case INTEL_FAM6_SKYLAKE_MOBILE:
+		case INTEL_FAM6_SKYLAKE_DESKTOP:
+		case INTEL_FAM6_SKYLAKE_X:
+		case INTEL_FAM6_KABYLAKE_MOBILE:
+		case INTEL_FAM6_KABYLAKE_DESKTOP:
+			return true;
+		}
+	}
+	return false;
+}
+
 static void __init spectre_v2_select_mitigation(void)
 {
 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -213,6 +231,24 @@ retpoline_auto:
 
 	spectre_v2_enabled = mode;
 	pr_info("%s\n", spectre_v2_strings[mode]);
+
+	/*
+	 * If neither SMEP or KPTI are available, there is a risk of
+	 * hitting userspace addresses in the RSB after a context switch
+	 * from a shallow call stack to a deeper one. To prevent this fill
+	 * the entire RSB, even when using IBRS.
+	 *
+	 * Skylake era CPUs have a separate issue with *underflow* of the
+	 * RSB, when they will predict 'ret' targets from the generic BTB.
+	 * The proper mitigation for this is IBRS. If IBRS is not supported
+	 * or deactivated in favour of retpolines the RSB fill on context
+	 * switch is required.
+	 */
+	if ((!boot_cpu_has(X86_FEATURE_PTI) &&
+	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
+		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+		pr_info("Filling RSB on context switch\n");
+	}
 }
 
 #undef pr_fmt

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-12 17:49 [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs David Woodhouse
                   ` (3 preceding siblings ...)
  2018-01-14 17:04 ` [tip:x86/pti] " tip-bot for David Woodhouse
@ 2018-01-14 23:37 ` tip-bot for David Woodhouse
  2018-01-15  0:05   ` Andi Kleen
  2018-03-09 13:12 ` Maciej S. Szmigiero
  5 siblings, 1 reply; 20+ messages in thread
From: tip-bot for David Woodhouse @ 2018-01-14 23:37 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: ak, jpoimboe, pjt, linux-kernel, tim.c.chen, jikos, riel, peterz,
	dave.hansen, hpa, arjan, mingo, keescook, gregkh, tglx, luto,
	torvalds, dwmw

Commit-ID:  c995efd5a740d9cbafbf58bde4973e8b50b4d761
Gitweb:     https://git.kernel.org/tip/c995efd5a740d9cbafbf58bde4973e8b50b4d761
Author:     David Woodhouse <dwmw@amazon.co.uk>
AuthorDate: Fri, 12 Jan 2018 17:49:25 +0000
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Mon, 15 Jan 2018 00:32:44 +0100

x86/retpoline: Fill RSB on context switch for affected CPUs

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
  	changelog ]

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: gnomes@lxorguk.ukuu.org.uk
Cc: Rik van Riel <riel@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: thomas.lendacky@amd.com
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Kees Cook <keescook@google.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-dwmw@amazon.co.uk

---
 arch/x86/entry/entry_32.S          | 11 +++++++++++
 arch/x86/entry/entry_64.S          | 11 +++++++++++
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/kernel/cpu/bugs.c         | 36 ++++++++++++++++++++++++++++++++++++
 4 files changed, 59 insertions(+)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a1f28a5..60c4c34 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm)
 	movl	%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+	/*
+	 * When switching from a shallower to a deeper call stack
+	 * the RSB may either underflow or use entries populated
+	 * with userspace addresses. On CPUs where those concerns
+	 * exist, overwrite the RSB with entries which capture
+	 * speculative execution to prevent attack.
+	 */
+	FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
 	/* restore callee-saved registers */
 	popl	%esi
 	popl	%edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 59874bc..d54a0ed 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -487,6 +487,17 @@ ENTRY(__switch_to_asm)
 	movq	%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+	/*
+	 * When switching from a shallower to a deeper call stack
+	 * the RSB may either underflow or use entries populated
+	 * with userspace addresses. On CPUs where those concerns
+	 * exist, overwrite the RSB with entries which capture
+	 * speculative execution to prevent attack.
+	 */
+	FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
 	/* restore callee-saved registers */
 	popq	%r15
 	popq	%r14
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f275447..aa09559 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -211,6 +211,7 @@
 #define X86_FEATURE_AVX512_4FMAPS	( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* Fill RSB on context switches */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e4dc261..390b3dc 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -23,6 +23,7 @@
 #include <asm/alternative.h>
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
+#include <asm/intel-family.h>
 
 static void __init spectre_v2_select_mitigation(void);
 
@@ -155,6 +156,23 @@ disable:
 	return SPECTRE_V2_CMD_NONE;
 }
 
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+	    boot_cpu_data.x86 == 6) {
+		switch (boot_cpu_data.x86_model) {
+		case INTEL_FAM6_SKYLAKE_MOBILE:
+		case INTEL_FAM6_SKYLAKE_DESKTOP:
+		case INTEL_FAM6_SKYLAKE_X:
+		case INTEL_FAM6_KABYLAKE_MOBILE:
+		case INTEL_FAM6_KABYLAKE_DESKTOP:
+			return true;
+		}
+	}
+	return false;
+}
+
 static void __init spectre_v2_select_mitigation(void)
 {
 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -213,6 +231,24 @@ retpoline_auto:
 
 	spectre_v2_enabled = mode;
 	pr_info("%s\n", spectre_v2_strings[mode]);
+
+	/*
+	 * If neither SMEP or KPTI are available, there is a risk of
+	 * hitting userspace addresses in the RSB after a context switch
+	 * from a shallow call stack to a deeper one. To prevent this fill
+	 * the entire RSB, even when using IBRS.
+	 *
+	 * Skylake era CPUs have a separate issue with *underflow* of the
+	 * RSB, when they will predict 'ret' targets from the generic BTB.
+	 * The proper mitigation for this is IBRS. If IBRS is not supported
+	 * or deactivated in favour of retpolines the RSB fill on context
+	 * switch is required.
+	 */
+	if ((!boot_cpu_has(X86_FEATURE_PTI) &&
+	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
+		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+		pr_info("Filling RSB on context switch\n");
+	}
 }
 
 #undef pr_fmt

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-14 23:37 ` tip-bot for David Woodhouse
@ 2018-01-15  0:05   ` Andi Kleen
  2018-01-15  0:09     ` Andi Kleen
  2018-01-15 10:13     ` David Woodhouse
  0 siblings, 2 replies; 20+ messages in thread
From: Andi Kleen @ 2018-01-15  0:05 UTC (permalink / raw)
  To: pjt, linux-kernel, tim.c.chen, riel, jikos, jpoimboe, luto,
	torvalds, tglx, dwmw, hpa, peterz, dave.hansen, mingo, keescook,
	gregkh, arjan
  Cc: linux-tip-commits

> +	if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> +	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> +		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> +		pr_info("Filling RSB on context switch\n");
> +	}

Missing an option to turn this off.

-Andi

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-15  0:05   ` Andi Kleen
@ 2018-01-15  0:09     ` Andi Kleen
  2018-01-15 10:13     ` David Woodhouse
  1 sibling, 0 replies; 20+ messages in thread
From: Andi Kleen @ 2018-01-15  0:09 UTC (permalink / raw)
  To: pjt, linux-kernel, tim.c.chen, riel, jikos, jpoimboe, luto,
	torvalds, tglx, dwmw, hpa, peterz, dave.hansen, mingo, keescook,
	gregkh, arjan
  Cc: linux-tip-commits

On Sun, Jan 14, 2018 at 04:05:54PM -0800, Andi Kleen wrote:
> > +	if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> > +	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> > +		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> > +		pr_info("Filling RSB on context switch\n");
> > +	}
> 
> Missing an option to turn this off.

My earlier patch did this properly by folding it 
into the big option parser.

https://marc.info/?l=linux-kernel&m=151578282016915&w=2

-Andi

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-15  0:05   ` Andi Kleen
  2018-01-15  0:09     ` Andi Kleen
@ 2018-01-15 10:13     ` David Woodhouse
  1 sibling, 0 replies; 20+ messages in thread
From: David Woodhouse @ 2018-01-15 10:13 UTC (permalink / raw)
  To: Andi Kleen, pjt, linux-kernel, tim.c.chen, riel, jikos, jpoimboe,
	luto, torvalds, tglx, hpa, peterz, dave.hansen, mingo, keescook,
	gregkh, arjan
  Cc: linux-tip-commits

[-- Attachment #1: Type: text/plain, Size: 889 bytes --]

On Sun, 2018-01-14 at 16:05 -0800, Andi Kleen wrote:
> > +     if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> > +          !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> > +             setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> > +             pr_info("Filling RSB on context switch\n");
> > +     }
> 
> Missing an option to turn this off.

Deliberately so. You can already boot with 'spectre_v2=off' to turn off
the mitigations. We are not intending to permit all the bullshit micro-
management of IBRS=3/IBPB=2/RSB=π nonsense.

If you choose retpoline, you get the RSB stuffing which is appropriate
along with that. With IBRS, you get the RSB stuffing which is
appropriate with that. You don't get command line or sysfs tunables to
mess it. You *do* have the source code, if you really want to make
changes. Don't.

[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5213 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-14 17:04 ` [tip:x86/pti] " tip-bot for David Woodhouse
@ 2018-01-15 14:35   ` David Laight
  2018-01-15 14:39     ` David Woodhouse
  2018-01-15 14:42     ` Arjan van de Ven
  0 siblings, 2 replies; 20+ messages in thread
From: David Laight @ 2018-01-15 14:35 UTC (permalink / raw)
  To: dwmw, riel, tglx, linux-kernel, tim.c.chen, pjt, jpoimboe, ak,
	gregkh, torvalds, dave.hansen, luto, jikos, peterz, keescook,
	arjan, mingo, hpa, linux-tip-commits

From: David Woodhouse
> Sent: 14 January 2018 17:04
> x86/retpoline: Fill RSB on context switch for affected CPUs
> 
> On context switch from a shallow call stack to a deeper one, as the CPU
> does 'ret' up the deeper side it may encounter RSB entries (predictions for
> where the 'ret' goes to) which were populated in userspace.
> 
> This is problematic if neither SMEP nor KPTI (the latter of which marks
> userspace pages as NX for the kernel) are active, as malicious code in
> userspace may then be executed speculatively.
...

Do we have a guarantee that all cpu actually detect the related RSB underflow?

It wouldn't surprise me if at least some cpu just let it wrap.

This would means that userspace would see return predictions based
on the values the kernel 'stuffed' into the RSB to fill it.

Potentially this leaks a kernel address to userspace.

	David


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-15 14:35   ` David Laight
@ 2018-01-15 14:39     ` David Woodhouse
  2018-01-15 14:42     ` Arjan van de Ven
  1 sibling, 0 replies; 20+ messages in thread
From: David Woodhouse @ 2018-01-15 14:39 UTC (permalink / raw)
  To: David Laight, riel, tglx, linux-kernel, tim.c.chen, pjt,
	jpoimboe, ak, gregkh, torvalds, dave.hansen, luto, jikos, peterz,
	keescook, arjan, mingo, hpa, linux-tip-commits

[-- Attachment #1: Type: text/plain, Size: 1364 bytes --]

On Mon, 2018-01-15 at 14:35 +0000, David Laight wrote:
> From: David Woodhouse
> > 
> > Sent: 14 January 2018 17:04
> > x86/retpoline: Fill RSB on context switch for affected CPUs
> > 
> > On context switch from a shallow call stack to a deeper one, as the CPU
> > does 'ret' up the deeper side it may encounter RSB entries (predictions for
> > where the 'ret' goes to) which were populated in userspace.
> > 
> > This is problematic if neither SMEP nor KPTI (the latter of which marks
> > userspace pages as NX for the kernel) are active, as malicious code in
> > userspace may then be executed speculatively.
> ...
> 
> Do we have a guarantee that all cpu actually detect the related RSB underflow?
> 
> It wouldn't surprise me if at least some cpu just let it wrap.
> 
> This would means that userspace would see return predictions based
> on the values the kernel 'stuffed' into the RSB to fill it.
> 
> Potentially this leaks a kernel address to userspace.

Yeah, KASLR is dead unless we do a full IBPB before *every* VMLAUNCH or
return to userspace anyway, isn't it? With KPTI we could put the RSB-
stuffer into the syscall trampoline page perhaps...

For this to be a concern for userspace, I think it does have to be true
that only the lower bits are used, which adds a little complexity but
probably isn't insurmountable?


[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5213 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-15 14:35   ` David Laight
  2018-01-15 14:39     ` David Woodhouse
@ 2018-01-15 14:42     ` Arjan van de Ven
  2018-01-15 20:03       ` Kees Cook
  1 sibling, 1 reply; 20+ messages in thread
From: Arjan van de Ven @ 2018-01-15 14:42 UTC (permalink / raw)
  To: David Laight, dwmw, riel, tglx, linux-kernel, tim.c.chen, pjt,
	jpoimboe, ak, gregkh, torvalds, dave.hansen, luto, jikos, peterz,
	keescook, mingo, hpa, linux-tip-commits

> 
> This would means that userspace would see return predictions based
> on the values the kernel 'stuffed' into the RSB to fill it.
> 
> Potentially this leaks a kernel address to userspace.

KASLR pretty much died in May this year to be honest with the KAISER paper (if not before then)

also with KPTI the address won't have a TLB mapping so it wouldn't
actually be speculated into.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-15 14:42     ` Arjan van de Ven
@ 2018-01-15 20:03       ` Kees Cook
  0 siblings, 0 replies; 20+ messages in thread
From: Kees Cook @ 2018-01-15 20:03 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: David Laight, dwmw, riel, tglx, linux-kernel, tim.c.chen, pjt,
	jpoimboe, ak, gregkh, torvalds, dave.hansen, luto, jikos, peterz,
	mingo, hpa, linux-tip-commits

On Mon, Jan 15, 2018 at 6:42 AM, Arjan van de Ven <arjan@linux.intel.com> wrote:
>>
>> This would means that userspace would see return predictions based
>> on the values the kernel 'stuffed' into the RSB to fill it.
>>
>> Potentially this leaks a kernel address to userspace.
>
>
> KASLR pretty much died in May this year to be honest with the KAISER paper
> (if not before then)

KASLR was always on shaky ground for local attacks. For pure remote
attacks, it's still useful. And for driving forward research, it
appears to be quite useful. ;)

-Kees

-- 
Kees Cook
Pixel Security

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-01-12 17:49 [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs David Woodhouse
                   ` (4 preceding siblings ...)
  2018-01-14 23:37 ` tip-bot for David Woodhouse
@ 2018-03-09 13:12 ` Maciej S. Szmigiero
  2018-03-09 15:14   ` Andi Kleen
  5 siblings, 1 reply; 20+ messages in thread
From: Maciej S. Szmigiero @ 2018-03-09 13:12 UTC (permalink / raw)
  To: Woodhouse, David
  Cc: Andi Kleen, Paul Turner, LKML, Linus Torvalds,
	Greg Kroah-Hartman, Tim Chen, Dave Hansen, tglx, Kees Cook,
	Rik van Riel, Peter Zijlstra, Andy Lutomirski, Jiri Kosina,
	gnomes, x86, thomas.lendacky, Josh Poimboeuf

On 12.01.2018 18:49, Woodhouse, David wrote:
> When we context switch from a shallow call stack to a deeper one, as we
> 'ret' up the deeper side we may encounter RSB entries (predictions for
> where the 'ret' goes to) which were populated in userspace. This is
> problematic if we have neither SMEP nor KPTI (the latter of which marks
> userspace pages as NX for the kernel), as malicious code in userspace
> may then be executed speculatively. So overwrite the CPU's return
> prediction stack with calls which are predicted to return to an infinite
> loop, to "capture" speculation if this happens. This is required both
> for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI.
> 
> On Skylake+ the problem is slightly different, and an *underflow* of the
> RSB may cause errant branch predictions to occur. So there it's not so
> much overwrite, as *filling* the RSB to attempt to prevent it getting
> empty. This is only a partial solution for Skylake+ since there are many
> other conditions which may result in the RSB becoming empty. The full
> solution on Skylake+ is to use IBRS, which will prevent the problem even
> when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
> required on context switch.
> 
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> Acked-by: Arjan van de Ven <arjan@linux.intel.com>
> ---
(..)
> @@ -213,6 +230,23 @@ static void __init spectre_v2_select_mitigation(void)
>  
>  	spectre_v2_enabled = mode;
>  	pr_info("%s\n", spectre_v2_strings[mode]);
> +
> +	/*
> +	 * If we don't have SMEP or KPTI, then we run the risk of hitting
> +	 * userspace addresses in the RSB after a context switch from a
> +	 * shallow call stack to a deeper one. We must must fill the entire
> +	 * RSB to avoid that, even when using IBRS.
> +	 *
> +	 * Skylake era CPUs have a separate issue with *underflow* of the
> +	 * RSB, when they will predict 'ret' targets from the generic BTB.
> +	 * IBRS makes that safe, but we need to fill the RSB on context
> +	 * switch if we're using retpoline.
> +	 */
> +	if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> +	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> +		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> +		pr_info("Filling RSB on context switch\n");
> +	}

Shouldn't the RSB filling on context switch also be done on non-IBPB
CPUs to protect (retpolined) user space tasks from other user space
tasks?

We already issue a IBPB when switching to high-value user space tasks
to protect them from other user space tasks.

Thanks,
Maciej

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-03-09 13:12 ` Maciej S. Szmigiero
@ 2018-03-09 15:14   ` Andi Kleen
  2018-03-09 15:33     ` Maciej S. Szmigiero
  2018-03-09 15:38     ` Woodhouse, David
  0 siblings, 2 replies; 20+ messages in thread
From: Andi Kleen @ 2018-03-09 15:14 UTC (permalink / raw)
  To: Maciej S. Szmigiero
  Cc: Woodhouse, David, Paul Turner, LKML, Linus Torvalds,
	Greg Kroah-Hartman, Tim Chen, Dave Hansen, tglx, Kees Cook,
	Rik van Riel, Peter Zijlstra, Andy Lutomirski, Jiri Kosina,
	gnomes, x86, thomas.lendacky, Josh Poimboeuf

> Shouldn't the RSB filling on context switch also be done on non-IBPB
> CPUs to protect (retpolined) user space tasks from other user space
> tasks?

The comment is actually incorrect. There's no risk to hit user space
addresses if we have KPTI and NX (which is fairly universal).

It's mainly needed on Skylake era CPUs.

Should fix the comment. I'll send a patch.

-Andi

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-03-09 15:14   ` Andi Kleen
@ 2018-03-09 15:33     ` Maciej S. Szmigiero
  2018-03-09 15:38     ` Woodhouse, David
  1 sibling, 0 replies; 20+ messages in thread
From: Maciej S. Szmigiero @ 2018-03-09 15:33 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Woodhouse, David, Paul Turner, LKML, Linus Torvalds,
	Greg Kroah-Hartman, Tim Chen, Dave Hansen, tglx, Kees Cook,
	Rik van Riel, Peter Zijlstra, Andy Lutomirski, Jiri Kosina,
	gnomes, x86, thomas.lendacky, Josh Poimboeuf

On 09.03.2018 16:14, Andi Kleen wrote:
>> Shouldn't the RSB filling on context switch also be done on non-IBPB
>> CPUs to protect (retpolined) user space tasks from other user space
>> tasks?
> 
> The comment is actually incorrect. There's no risk to hit user space
> addresses if we have KPTI and NX (which is fairly universal).
> 
> It's mainly needed on Skylake era CPUs.
> 
> Should fix the comment. I'll send a patch.

But what about userspace-to-userspace attacks? - the ones that IBPB on 
context switches currently protects against (at least for high-value, or
as implemented currently, non-dumpable, processes)?

If understand the issue correctly, high-value user space processes can
be protected from other user space processes even on CPUs that lack
IBPB as long as they are recompiled with retpolines and there is no
danger of RSB entries from one process being used by another one after
a context switch.
For Skyklake this would not be enough, but there we'll (hopefully) have
the IBPB instead.

> -Andi
> 

Maciej

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: x86/retpoline: Fill RSB on context switch for affected CPUs
  2018-03-09 15:14   ` Andi Kleen
  2018-03-09 15:33     ` Maciej S. Szmigiero
@ 2018-03-09 15:38     ` Woodhouse, David
  1 sibling, 0 replies; 20+ messages in thread
From: Woodhouse, David @ 2018-03-09 15:38 UTC (permalink / raw)
  To: mail, ak
  Cc: linux-kernel, tim.c.chen, peterz, torvalds, tglx, jpoimboe, x86,
	riel, keescook, gnomes, pjt, dave.hansen, luto, jikos,
	thomas.lendacky, gregkh

[-- Attachment #1.1: Type: text/plain, Size: 863 bytes --]

On Fri, 2018-03-09 at 07:14 -0800, Andi Kleen wrote:
> > 
> > Shouldn't the RSB filling on context switch also be done on non-
> > IBPB
> > CPUs to protect (retpolined) user space tasks from other user space
> > tasks?
> The comment is actually incorrect. There's no risk to hit user space
> addresses if we have KPTI and NX (which is fairly universal).

... on non-AMD CPUS.

The comment does say "If we don't have SMEP or KPTI".

> It's mainly needed on Skylake era CPUs.

I wouldn't have added it if it were only for Skylake. We still have no
coherent overall mitigation for Skylake except the original IBRS setup.

Did anyone ever get any further with the call stack counting hacks?


Maciej, you might be right that to protect userspace processes from
each other we should also flush the RSB on context switch to a
"sensitive" process.

[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5210 bytes --]
[-- Attachment #2.1: Type: text/plain, Size: 208 bytes --]




Amazon Web Services UK Limited. Registered in England and Wales with registration number 08650665 with its registered office at 1 Principal Place, Worship Street, London, EC2A 2FA, United Kingdom.



[-- Attachment #2.2: Type: text/html, Size: 222 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, back to index

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-12 17:49 [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs David Woodhouse
2018-01-12 18:02 ` Andi Kleen
2018-01-12 18:23   ` David Woodhouse
2018-01-12 18:05 ` Andrew Cooper
2018-01-12 18:56   ` David Woodhouse
2018-01-12 23:41     ` Josh Poimboeuf
2018-01-14 11:39 ` Thomas Gleixner
2018-01-14 17:04 ` [tip:x86/pti] " tip-bot for David Woodhouse
2018-01-15 14:35   ` David Laight
2018-01-15 14:39     ` David Woodhouse
2018-01-15 14:42     ` Arjan van de Ven
2018-01-15 20:03       ` Kees Cook
2018-01-14 23:37 ` tip-bot for David Woodhouse
2018-01-15  0:05   ` Andi Kleen
2018-01-15  0:09     ` Andi Kleen
2018-01-15 10:13     ` David Woodhouse
2018-03-09 13:12 ` Maciej S. Szmigiero
2018-03-09 15:14   ` Andi Kleen
2018-03-09 15:33     ` Maciej S. Szmigiero
2018-03-09 15:38     ` Woodhouse, David

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org linux-kernel@archiver.kernel.org
	public-inbox-index lkml


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox