Linux-Doc Archive on lore.kernel.org
 help / color / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: tglx@linutronix.de, luto@kernel.org, ak@linux.intel.com
Cc: corbet@lwn.net, mingo@redhat.com, bp@alien8.de, x86@kernel.org,
	shuah@kernel.org, gregkh@linuxfoundation.org,
	tony.luck@intel.com, chang.seok.bae@intel.com,
	dave.hansen@linux.intel.com, peterz@infradead.org,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	jarkko.sakkinen@linux.intel.com,
	"H . Peter Anvin" <hpa@zytor.com>,
	Ravi Shankar <ravi.v.shankar@intel.com>,
	Sasha Levin <sashal@kernel.org>
Subject: [PATCH v13 11/16] x86/entry/64: Handle FSGSBASE enabled paranoid entry/exit
Date: Thu, 28 May 2020 16:13:57 -0400
Message-ID: <20200528201402.1708239-12-sashal@kernel.org> (raw)
In-Reply-To: <20200528201402.1708239-1-sashal@kernel.org>

From: "Chang S. Bae" <chang.seok.bae@intel.com>

Without FSGSBASE, user space cannot change GSBASE other than through a
PRCTL. The kernel enforces that the user space GSBASE value is postive as
negative values are used for detecting the kernel space GSBASE value in the
paranoid entry code.

If FSGSBASE is enabled, user space can set arbitrary GSBASE values without
kernel intervention, including negative ones, which breaks the paranoid
entry assumptions.

To avoid this, paranoid entry needs to unconditionally save the current
GSBASE value independent of the interrupted context, retrieve and write the
kernel GSBASE and unconditionally restore the saved value on exit. The
restore happens either in paranoid_exit or in the special exit path of the
NMI low level code.

All other entry code pathes which use unconditional SWAPGS are not affected
as they do not depend on the actual content.

[ tglx: Massaged changelogs and comments ]

Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Andy Lutomirski <luto@kernel.org>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ravi Shankar <ravi.v.shankar@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lkml.kernel.org/r/1557309753-24073-13-git-send-email-chang.seok.bae@intel.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 arch/x86/entry/calling.h  |   6 +++
 arch/x86/entry/entry_64.S | 107 ++++++++++++++++++++++++++++++--------
 2 files changed, 91 insertions(+), 22 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 1b95cdc19b18..57335f948bf7 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -342,6 +342,12 @@ For 32-bit we have the following conventions - kernel is built with
 #endif
 .endm
 
+.macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req
+	rdgsbase \save_reg
+	GET_PERCPU_BASE \scratch_reg
+	wrgsbase \scratch_reg
+.endm
+
 #endif /* CONFIG_X86_64 */
 
 .macro STACKLEAK_ERASE
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3b9ccba6c4b4..53246c470607 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -38,6 +38,7 @@
 #include <asm/export.h>
 #include <asm/frame.h>
 #include <asm/nospec-branch.h>
+#include <asm/fsgsbase.h>
 #include <linux/err.h>
 
 #include "calling.h"
@@ -921,7 +922,6 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
 	.endif
 
 	.if \paranoid
-	/* this procedure expect "no swapgs" flag in ebx */
 	jmp	paranoid_exit
 	.else
 	jmp	error_exit
@@ -1211,9 +1211,14 @@ idtentry machine_check		do_mce			has_error_code=0	paranoid=1
 #endif
 
 /*
- * Save all registers in pt_regs, and switch gs if needed.
- * Use slow, but surefire "are we in kernel?" check.
- * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+ * Save all registers in pt_regs. Return GSBASE related information
+ * in EBX depending on the availability of the FSGSBASE instructions:
+ *
+ * FSGSBASE	R/EBX
+ *     N        0 -> SWAPGS on exit
+ *              1 -> no SWAPGS on exit
+ *
+ *     Y        GSBASE value at entry, must be restored in paranoid_exit
  */
 SYM_CODE_START_LOCAL(paranoid_entry)
 	UNWIND_HINT_FUNC
@@ -1221,7 +1226,6 @@ SYM_CODE_START_LOCAL(paranoid_entry)
 	PUSH_AND_CLEAR_REGS save_ret=1
 	ENCODE_FRAME_POINTER 8
 
-1:
 	/*
 	 * Always stash CR3 in %r14.  This value will be restored,
 	 * verbatim, at exit.  Needed if paranoid_entry interrupted
@@ -1239,6 +1243,28 @@ SYM_CODE_START_LOCAL(paranoid_entry)
 	 */
 	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
 
+	/*
+	 * Handling GSBASE depends on the availability of FSGSBASE.
+	 *
+	 * Without FSGSBASE the kernel enforces that negative GSBASE
+	 * values indicate kernel GSBASE. With FSGSBASE no assumptions
+	 * can be made about the GSBASE value when entering from user
+	 * space.
+	 */
+	ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE
+
+	/*
+	 * Read the current GSBASE and store it in %rbx unconditionally,
+	 * retrieve and set the current CPUs kernel GSBASE. The stored value
+	 * has to be restored in paranoid_exit unconditionally.
+	 *
+	 * The MSR write ensures that no subsequent load is based on a
+	 * mispredicted GSBASE. No extra FENCE required.
+	 */
+	SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
+	ret
+
+.Lparanoid_entry_checkgs:
 	/* EBX = 1 -> kernel GSBASE active, no restore required */
 	movl	$1, %ebx
 	/*
@@ -1273,28 +1299,48 @@ SYM_CODE_END(paranoid_entry)
  *
  * We may be returning to very strange contexts (e.g. very early
  * in syscall entry), so checking for preemption here would
- * be complicated.  Fortunately, we there's no good reason
- * to try to handle preemption here.
+ * be complicated.  Fortunately, there's no good reason to try
+ * to handle preemption here.
+ *
+ * R/EBX contains the GSBASE related information depending on the
+ * availability of the FSGSBASE instructions:
+ *
+ * FSGSBASE	R/EBX
+ *     N        0 -> SWAPGS on exit
+ *              1 -> no SWAPGS on exit
  *
- * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
+ *     Y        User space GSBASE, must be restored unconditionally
  */
 SYM_CODE_START_LOCAL(paranoid_exit)
 	UNWIND_HINT_REGS
 	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF_DEBUG
-	/* If EBX is 0, SWAPGS is required */
-	testl	%ebx, %ebx
-	jnz	.Lparanoid_exit_no_swapgs
-	TRACE_IRQS_IRETQ
-	/* Always restore stashed CR3 value (see paranoid_entry) */
-	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
-	SWAPGS_UNSAFE_STACK
-	jmp	restore_regs_and_return_to_kernel
-.Lparanoid_exit_no_swapgs:
+	/*
+	 * The order of operations is important. IRQ tracing requires
+	 * kernel GSBASE and CR3. RESTORE_CR3 requires kernel GSBASE.
+	 *
+	 * NB to anyone to try to optimize this code: this code does
+	 * not execute at all for exceptions from user mode. Those
+	 * exceptions go through error_exit instead.
+	 */
 	TRACE_IRQS_IRETQ_DEBUG
-	/* Always restore stashed CR3 value (see paranoid_entry) */
-	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
-	jmp restore_regs_and_return_to_kernel
+
+	RESTORE_CR3	scratch_reg=%rax save_reg=%r14
+
+	/* Handle the three GSBASE cases */
+	ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE
+
+	/* With FSGSBASE enabled, unconditionally restore GSBASE */
+	wrgsbase	%rbx
+	jmp		restore_regs_and_return_to_kernel
+
+.Lparanoid_exit_checkgs:
+	/* On non-FSGSBASE systems, conditionally do SWAPGS */
+	testl		%ebx, %ebx
+	jnz		restore_regs_and_return_to_kernel
+
+	/* We are returning to a context with user GSBASE */
+	SWAPGS_UNSAFE_STACK
+	jmp		restore_regs_and_return_to_kernel
 SYM_CODE_END(paranoid_exit)
 
 /*
@@ -1702,10 +1748,27 @@ end_repeat_nmi:
 	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
 
-	testl	%ebx, %ebx			/* swapgs needed? */
+	/*
+	 * The above invocation of paranoid_entry stored the GSBASE
+	 * related information in R/EBX depending on the availability
+	 * of FSGSBASE.
+	 *
+	 * If FSGSBASE is enabled, restore the saved GSBASE value
+	 * unconditionally, otherwise take the conditional SWAPGS path.
+	 */
+	ALTERNATIVE "jmp nmi_no_fsgsbase", "", X86_FEATURE_FSGSBASE
+
+	wrgsbase	%rbx
+	jmp	nmi_restore
+
+nmi_no_fsgsbase:
+	/* EBX == 0 -> invoke SWAPGS */
+	testl	%ebx, %ebx
 	jnz	nmi_restore
+
 nmi_swapgs:
 	SWAPGS_UNSAFE_STACK
+
 nmi_restore:
 	POP_REGS
 
-- 
2.25.1


  parent reply index

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-28 20:13 [PATCH v13 00/16] Enable FSGSBASE instructions Sasha Levin
2020-05-28 20:13 ` [PATCH v13 01/16] x86/ptrace: Prevent ptrace from clearing the FS/GS selector Sasha Levin
2020-05-28 20:13 ` [PATCH v13 02/16] x86/cpu: Add 'unsafe_fsgsbase' to enable CR4.FSGSBASE Sasha Levin
2020-05-28 20:13 ` [PATCH v13 03/16] x86/fsgsbase/64: Add intrinsics for FSGSBASE instructions Sasha Levin
2020-05-28 20:13 ` [PATCH v13 04/16] x86/fsgsbase/64: Enable FSGSBASE instructions in helper functions Sasha Levin
2020-05-28 20:13 ` [PATCH v13 05/16] x86/process/64: Use FSBSBASE in switch_to() if available Sasha Levin
2020-05-28 20:13 ` [PATCH v13 06/16] x86/process/64: Make save_fsgs() public available Sasha Levin
2020-05-28 20:13 ` [PATCH v13 07/16] x86/process/64: Use FSGSBASE instructions on thread copy and ptrace Sasha Levin
2020-05-28 20:13 ` [PATCH v13 08/16] x86/speculation/swapgs: Check FSGSBASE in enabling SWAPGS mitigation Sasha Levin
2020-05-28 20:13 ` [PATCH v13 09/16] x86/entry/64: Switch CR3 before SWAPGS in paranoid entry Sasha Levin
2020-05-28 20:13 ` [PATCH v13 10/16] x86/entry/64: Introduce the FIND_PERCPU_BASE macro Sasha Levin
2020-05-28 20:13 ` Sasha Levin [this message]
2020-05-28 20:13 ` [PATCH v13 12/16] x86/cpu: Enable FSGSBASE on 64bit by default and add a chicken bit Sasha Levin
2020-05-28 20:13 ` [PATCH v13 13/16] x86/elf: Enumerate kernel FSGSBASE capability in AT_HWCAP2 Sasha Levin
2020-05-28 20:14 ` [PATCH v13 14/16] Documentation/x86/64: Add documentation for GS/FS addressing mode Sasha Levin
2020-05-28 20:14 ` [PATCH v13 15/16] selftests/x86/fsgsbase: Test GS selector on ptracer-induced GS base write Sasha Levin
2020-05-29 14:40   ` Shuah Khan
2020-05-28 20:14 ` [PATCH v13 16/16] selftests/x86/fsgsbase: Test ptracer-induced GS base write with FSGSBASE Sasha Levin
2020-05-29 14:42   ` Shuah Khan
2020-06-18 14:18 ` [PATCH v13 00/16] Enable FSGSBASE instructions Thomas Gleixner
2020-06-18 18:08   ` Andy Lutomirski
2020-06-18 18:25     ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200528201402.1708239-12-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=chang.seok.bae@intel.com \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hpa@zytor.com \
    --cc=jarkko.sakkinen@linux.intel.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=ravi.v.shankar@intel.com \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Doc Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-doc/0 linux-doc/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-doc linux-doc/ https://lore.kernel.org/linux-doc \
		linux-doc@vger.kernel.org
	public-inbox-index linux-doc

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-doc


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git