All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lai Jiangshan <jiangshanlai@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: Lai Jiangshan <laijs@linux.alibaba.com>,
	Andy Lutomirski <luto@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
	Joerg Roedel <jroedel@suse.de>,
	Youquan Song <youquan.song@intel.com>,
	Tony Luck <tony.luck@intel.com>
Subject: [PATCH 22/24] x86/entry: Implement and use do_paranoid_entry() and paranoid_exit()
Date: Wed,  1 Sep 2021 01:50:23 +0800	[thread overview]
Message-ID: <20210831175025.27570-23-jiangshanlai@gmail.com> (raw)
In-Reply-To: <20210831175025.27570-1-jiangshanlai@gmail.com>

From: Lai Jiangshan <laijs@linux.alibaba.com>

All the facilities are set in traps.c, so we can implement the major body
of paranoid_entry() in C as do_paranoid_entry() and the whole
paranoid_exit() in C.

paranoid_entry() needs to save two values which are added into the
struct ist_regs.  And paranoid_exit() use them after the interrupt
is handled.

No functional change intended.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/entry/entry_64.S    | 128 +++--------------------------------
 arch/x86/entry/traps.c       |  62 +++++++++++++++++
 arch/x86/include/asm/traps.h |  22 ++++++
 3 files changed, 92 insertions(+), 120 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1ae10ca351f4..8b2e19e6c9e1 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -437,6 +437,7 @@ SYM_CODE_START(\asmsym)
 
 	call	\cfunc
 
+	movq	%rsp, %rdi		/* ist_regs pointer */
 	call	paranoid_exit
 	addq	$IST_pt_regs, %rsp	/* put %rsp back to pt_regs */
 	jmp	restore_regs_and_return_to_kernel
@@ -516,6 +517,7 @@ SYM_CODE_START(\asmsym)
 	 * identical to the stack in the IRET frame or the VC fall-back stack,
 	 * so it is definitely mapped even with PTI enabled.
 	 */
+	movq	%rsp, %rdi		/* ist_regs pointer */
 	call	paranoid_exit
 	addq	$IST_pt_regs, %rsp	/* put %rsp back to pt_regs */
 	jmp	restore_regs_and_return_to_kernel
@@ -548,6 +550,7 @@ SYM_CODE_START(\asmsym)
 	movq	$-1, ORIG_RAX(%rdi)	/* no syscall to restart */
 	call	\cfunc
 
+	movq	%rsp, %rdi		/* ist_regs pointer */
 	call	paranoid_exit
 	addq	$IST_pt_regs, %rsp	/* put %rsp back to pt_regs */
 	jmp	restore_regs_and_return_to_kernel
@@ -840,14 +843,8 @@ SYM_CODE_END(xen_failsafe_callback)
 #endif /* CONFIG_XEN_PV */
 
 /*
- * Save all registers in pt_regs. Return GSBASE related information
- * in EBX depending on the availability of the FSGSBASE instructions:
- *
- * FSGSBASE	R/EBX
- *     N        0 -> SWAPGS on exit
- *              1 -> no SWAPGS on exit
- *
- *     Y        GSBASE value at entry, must be restored in paranoid_exit
+ * Save all registers and addtional info in ist_regs.
+ * Switch CR3 and gsbase if needed.
  */
 SYM_CODE_START_LOCAL(paranoid_entry)
 	UNWIND_HINT_FUNC
@@ -856,124 +853,14 @@ SYM_CODE_START_LOCAL(paranoid_entry)
 	movq	RDI(%rsp), %rsi	/* temporarily store the return address in %rsi */
 	movq	%rdi, RDI(%rsp) /* put %rdi onto pt_regs */
 	subq	$IST_pt_regs, %rsp /* reserve room for ist_regs */
+	movq	%rsp, %rdi	/* ist_regs pointer */
 	pushq	%rsi		/* put the return address onto the stack */
 	ENCODE_FRAME_POINTER 8+IST_pt_regs
 
-	/*
-	 * Always stash CR3 in %r14.  This value will be restored,
-	 * verbatim, at exit.  Needed if paranoid_entry interrupted
-	 * another entry that already switched to the user CR3 value
-	 * but has not yet returned to userspace.
-	 *
-	 * This is also why CS (stashed in the "iret frame" by the
-	 * hardware at entry) can not be used: this may be a return
-	 * to kernel code, but with a user CR3 value.
-	 *
-	 * Switching CR3 does not depend on kernel GSBASE so it can
-	 * be done before switching to the kernel GSBASE. This is
-	 * required for FSGSBASE because the kernel GSBASE has to
-	 * be retrieved from a kernel internal table.
-	 */
-	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
-
-	/*
-	 * Handling GSBASE depends on the availability of FSGSBASE.
-	 *
-	 * Without FSGSBASE the kernel enforces that negative GSBASE
-	 * values indicate kernel GSBASE. With FSGSBASE no assumptions
-	 * can be made about the GSBASE value when entering from user
-	 * space.
-	 */
-	ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE
-
-	/*
-	 * Read the current GSBASE and store it in %rbx unconditionally,
-	 * retrieve and set the current CPUs kernel GSBASE. The stored value
-	 * has to be restored in paranoid_exit unconditionally.
-	 *
-	 * The unconditional write to GS base below ensures that no subsequent
-	 * loads based on a mispredicted GS base can happen, therefore no LFENCE
-	 * is needed here.
-	 */
-	SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
-	ret
-
-.Lparanoid_entry_checkgs:
-	/* EBX = 1 -> kernel GSBASE active, no restore required */
-	movl	$1, %ebx
-	/*
-	 * The kernel-enforced convention is a negative GSBASE indicates
-	 * a kernel value. No SWAPGS needed on entry and exit.
-	 */
-	movl	$MSR_GS_BASE, %ecx
-	rdmsr
-	testl	%edx, %edx
-	jns	.Lparanoid_entry_swapgs
-	ret
-
-.Lparanoid_entry_swapgs:
-	swapgs
-
-	/*
-	 * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
-	 * unconditional CR3 write, even in the PTI case.  So do an lfence
-	 * to prevent GS speculation, regardless of whether PTI is enabled.
-	 */
-	FENCE_SWAPGS_KERNEL_ENTRY
-
-	/* EBX = 0 -> SWAPGS required on exit */
-	xorl	%ebx, %ebx
+	call	do_paranoid_entry
 	ret
 SYM_CODE_END(paranoid_entry)
 
-/*
- * "Paranoid" exit path from exception stack.  This is invoked
- * only on return from IST interrupts that came from kernel space.
- *
- * We may be returning to very strange contexts (e.g. very early
- * in syscall entry), so checking for preemption here would
- * be complicated.  Fortunately, there's no good reason to try
- * to handle preemption here.
- *
- * R/EBX contains the GSBASE related information depending on the
- * availability of the FSGSBASE instructions:
- *
- * FSGSBASE	R/EBX
- *     N        0 -> SWAPGS on exit
- *              1 -> no SWAPGS on exit
- *
- *     Y        User space GSBASE, must be restored unconditionally
- */
-SYM_CODE_START_LOCAL(paranoid_exit)
-	UNWIND_HINT_REGS offset=8
-	/*
-	 * The order of operations is important. RESTORE_CR3 requires
-	 * kernel GSBASE.
-	 *
-	 * NB to anyone to try to optimize this code: this code does
-	 * not execute at all for exceptions from user mode. Those
-	 * exceptions go through error_exit instead.
-	 */
-	RESTORE_CR3	scratch_reg=%rax save_reg=%r14
-
-	/* Handle the three GSBASE cases */
-	ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE
-
-	/* With FSGSBASE enabled, unconditionally restore GSBASE */
-	wrgsbase	%rbx
-	ret
-
-.Lparanoid_exit_checkgs:
-	/* On non-FSGSBASE systems, conditionally do SWAPGS */
-	testl		%ebx, %ebx
-	jnz		.Lparanoid_exit_done
-
-	/* We are returning to a context with user GSBASE */
-	swapgs
-.Lparanoid_exit_done:
-	ret
-SYM_CODE_END(paranoid_exit)
-
 /*
  * Save all registers in pt_regs, and switch GS if needed.
  */
@@ -1308,6 +1195,7 @@ end_repeat_nmi:
 	 * Use paranoid_exit to handle SWAPGS and CR3, but no need to use
 	 * restore_regs_and_return_to_kernel as we must handle nested NMI.
 	 */
+	movq	%rsp, %rdi			/* ist_regs pointer */
 	call	paranoid_exit
 	addq	$IST_pt_regs, %rsp		/* put %rsp back to pt_regs */
 
diff --git a/arch/x86/entry/traps.c b/arch/x86/entry/traps.c
index b5c92b4e0cb5..52511db6baa6 100644
--- a/arch/x86/entry/traps.c
+++ b/arch/x86/entry/traps.c
@@ -1029,6 +1029,68 @@ static __always_inline unsigned long ist_switch_to_kernel_gsbase(void)
 	/* SWAPGS required on exit */
 	return 0;
 }
+
+asmlinkage __visible __entry_text
+void do_paranoid_entry(struct ist_regs *ist)
+{
+	/*
+	 * Always stash CR3 in ist->cr3.  This value will be restored,
+	 * verbatim, at exit.  Needed if paranoid_entry interrupted
+	 * another entry that already switched to the user CR3 value
+	 * but has not yet returned to userspace.
+	 *
+	 * This is also why CS (stashed in the "iret frame" by the
+	 * hardware at entry) can not be used: this may be a return
+	 * to kernel code, but with a user CR3 value.
+	 *
+	 * Switching CR3 does not depend on kernel GSBASE so it can
+	 * be done before switching to the kernel GSBASE. This is
+	 * required for FSGSBASE because the kernel GSBASE has to
+	 * be retrieved from a kernel internal table.
+	 */
+	ist->cr3 = ist_switch_to_kernel_cr3();
+
+	/* Handle GSBASE, store the return value in ist_regs for exit. */
+	ist->gsbase = ist_switch_to_kernel_gsbase();
+}
+
+/*
+ * "Paranoid" exit path from exception stack.  This is invoked
+ * only on return from IST interrupts that came from kernel space.
+ *
+ * We may be returning to very strange contexts (e.g. very early
+ * in syscall entry), so checking for preemption here would
+ * be complicated.  Fortunately, there's no good reason to try
+ * to handle preemption here.
+ */
+asmlinkage __visible __entry_text
+void paranoid_exit(struct ist_regs *ist)
+{
+	/*
+	 * Restore cr3 at first, it can use kernel GSBASE.
+	 */
+	ist_restore_cr3(ist->cr3);
+
+	/*
+	 * Handle the three GSBASE cases.
+	 *
+	 * ist->gsbase contains the GSBASE related information depending
+	 * on the availability of the FSGSBASE instructions:
+	 *
+	 * FSGSBASE	ist->gsbase
+	 *     N        0 -> SWAPGS on exit
+	 *              1 -> no SWAPGS on exit
+	 *
+	 *     Y        User space GSBASE, must be restored unconditionally
+	 */
+	if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+		wrgsbase(ist->gsbase);
+		return;
+	}
+
+	if (!ist->gsbase)
+		native_swapgs();
+}
 #endif
 
 static bool is_sysenter_singlestep(struct pt_regs *regs)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index e24c63bbc30a..0bc7117a01cd 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -12,6 +12,26 @@
 
 #ifdef CONFIG_X86_64
 struct ist_regs {
+	/*
+	 * Always stash CR3 in cr3.  This value will be restored,
+	 * verbatim, at exit.  Needed if paranoid_entry interrupted
+	 * another entry that already switched to the user CR3 value
+	 * but has not yet returned to userspace.
+	 */
+	unsigned long cr3;
+
+	/*
+	 * gsbase contains the GSBASE related information depending on the
+	 * availability of the FSGSBASE instructions:
+	 *
+	 * FSGSBASE	gsbase
+	 *     N        0 -> SWAPGS on exit
+	 *              1 -> no SWAPGS on exit
+	 *
+	 *     Y        User space GSBASE, must be restored unconditionally
+	 */
+	unsigned long gsbase;
+
 	/*
 	 * ist specific fields must be defined before pt_regs
 	 * and they are located below pt_regs on the stacks.
@@ -20,6 +40,8 @@ struct ist_regs {
 };
 
 asmlinkage __visible notrace struct pt_regs *do_error_entry(struct pt_regs *eregs);
+asmlinkage __visible notrace void do_paranoid_entry(struct ist_regs *ist);
+asmlinkage __visible notrace void paranoid_exit(struct ist_regs *ist);
 void __init trap_init(void);
 asmlinkage __visible noinstr struct ist_regs *vc_switch_off_ist(struct ist_regs *ist);
 #endif
-- 
2.19.1.6.gb485710b


  parent reply	other threads:[~2021-08-31 17:52 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-31 17:50 [PATCH 00/24] x86/entry/64: Convert a bunch of ASM entry code into C code Lai Jiangshan
2021-08-31 17:50 ` [PATCH 01/24] x86/traps: Remove stack-protector from traps.c Lai Jiangshan
2021-08-31 17:50 ` [PATCH 02/24] x86/traps: Move arch/x86/kernel/traps.c to arch/x86/entry/ Lai Jiangshan
2021-09-02  8:09   ` Joerg Roedel
2021-09-02  9:21     ` Lai Jiangshan
2021-09-02 10:50       ` Peter Zijlstra
2021-09-02 11:54         ` Lai Jiangshan
2021-09-02 12:05           ` Peter Zijlstra
2021-09-02 13:34             ` Peter Zijlstra
2021-09-02 17:05               ` Nick Desaulniers
2021-09-02 17:19                 ` Miguel Ojeda
2021-09-02 17:23                   ` Nick Desaulniers
2021-09-03  7:36                 ` Martin Liška
2021-09-07 21:12                   ` Nick Desaulniers
2021-09-08  7:33                     ` Martin Liška
2021-08-31 17:50 ` [PATCH 03/24] x86/traps: Move declaration of native_irq_return_iret up Lai Jiangshan
2021-08-31 17:50 ` [PATCH 04/24] x86/entry: Expose the address of .Lgs_change to traps.c Lai Jiangshan
2021-09-02  9:14   ` Peter Zijlstra
2021-09-02  9:20     ` Lai Jiangshan
2021-08-31 17:50 ` [PATCH 05/24] x86/entry: Introduce __entry_text for entry code written in C Lai Jiangshan
2021-08-31 19:34   ` Peter Zijlstra
2021-09-01  0:23     ` Lai Jiangshan
2021-08-31 17:50 ` [PATCH 06/24] x86/entry: Move PTI_USER_* to arch/x86/include/asm/processor-flags.h Lai Jiangshan
2021-08-31 17:50 ` [PATCH 07/24] x86: Mark __native_read_cr3() & native_write_cr3() as __always_inline Lai Jiangshan
2021-08-31 17:50 ` [PATCH 08/24] x86/traps: Add C verion of SWITCH_TO_KERNEL_CR3 as switch_to_kernel_cr3() Lai Jiangshan
2021-08-31 17:50 ` [PATCH 09/24] x86/traps: Add fence_swapgs_{user,kernel}_entry() Lai Jiangshan
2021-09-02  9:25   ` Peter Zijlstra
2021-08-31 17:50 ` [PATCH 10/24] x86/traps: Move pt_regs only in fixup_bad_iret() Lai Jiangshan
2021-08-31 17:50 ` [PATCH 11/24] x86/entry: Replace the most of asm code of error_entry to C code Lai Jiangshan
2021-09-02 10:16   ` Peter Zijlstra
2021-09-02 12:08     ` Lai Jiangshan
2021-08-31 17:50 ` [PATCH 12/24] x86/traps: Reconstruct pt_regs on task stack directly in fixup_bad_iret() Lai Jiangshan
2021-08-31 17:50 ` [PATCH 13/24] x86/traps: Mark sync_regs() and fixup_bad_iret() as static __always_inline Lai Jiangshan
2021-08-31 17:50 ` [PATCH 14/24] x86/entry: Make paranoid_exit() callable Lai Jiangshan
2021-08-31 17:50 ` [PATCH 15/24] x86/entry: Call paranoid_exit() in asm_exc_nmi() Lai Jiangshan
2021-08-31 17:50 ` [PATCH 16/24] x86/entry: Use skip_rdi instead of save_ret for PUSH_AND_CLEAR_REGS Lai Jiangshan
2021-08-31 17:50 ` [PATCH 17/24] x86/entry: Introduce struct ist_regs Lai Jiangshan
2021-09-10  0:18   ` Lai Jiangshan
2021-09-10  0:36     ` Lai Jiangshan
2021-09-10  4:31     ` H. Peter Anvin
2021-09-10  7:13       ` Lai Jiangshan
2021-09-10  7:14         ` H. Peter Anvin
2021-09-10  4:50     ` H. Peter Anvin
2021-09-10  4:51       ` H. Peter Anvin
2021-08-31 17:50 ` [PATCH 18/24] x86/entry: Add the C version ist_switch_to_kernel_cr3() Lai Jiangshan
2021-08-31 17:50 ` [PATCH 19/24] x86/entry: Add the C version ist_restore_cr3() Lai Jiangshan
2021-08-31 17:50 ` [PATCH 20/24] x86/entry: Add the C version get_percpu_base() Lai Jiangshan
2021-08-31 17:50 ` [PATCH 21/24] x86/entry: Add the C version ist_switch_to_kernel_gsbase() Lai Jiangshan
2021-08-31 17:50 ` Lai Jiangshan [this message]
2021-09-02 10:33   ` [PATCH 22/24] x86/entry: Implement and use do_paranoid_entry() and paranoid_exit() Peter Zijlstra
2021-09-02 10:42     ` Lai Jiangshan
2021-09-02 12:02       ` Peter Zijlstra
2021-09-02 11:58     ` Lai Jiangshan
2021-09-02 12:29       ` Joerg Roedel
2021-08-31 17:50 ` [PATCH 23/24] x86/entry: Remove the unused ASM macros Lai Jiangshan
2021-08-31 17:50 ` [PATCH 24/24] x86/syscall/64: Move the checking for sysret to C code Lai Jiangshan
2021-09-10  7:20   ` Nikolay Borisov
2021-09-10  7:30     ` Lai Jiangshan
2021-08-31 20:44 ` [PATCH 00/24] x86/entry/64: Convert a bunch of ASM entry code into " Peter Zijlstra
2021-09-02  6:28   ` Lai Jiangshan
2021-09-02  7:44     ` Peter Zijlstra
2021-09-02 10:50 ` [PATCH 25/24] x86/traps: Rewrite native_load_gs_index in " Lai Jiangshan
2021-09-08  1:38   ` H. Peter Anvin
2021-09-08  4:42     ` H. Peter Anvin
2021-09-08  5:00       ` H. Peter Anvin
2021-09-08  7:12         ` Lai Jiangshan
2021-09-09 23:16           ` H. Peter Anvin
2021-09-13 20:01   ` Andy Lutomirski
2021-09-14  2:04     ` Lai Jiangshan
2021-09-14  8:14       ` Peter Zijlstra
2021-09-14  8:17         ` Borislav Petkov
2021-09-14  8:40         ` Lai Jiangshan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210831175025.27570-23-jiangshanlai@gmail.com \
    --to=jiangshanlai@gmail.com \
    --cc=bp@alien8.de \
    --cc=hpa@zytor.com \
    --cc=jroedel@suse.de \
    --cc=laijs@linux.alibaba.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    --cc=youquan.song@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.