All of lore.kernel.org
 help / color / mirror / Atom feed
From: Petr Tesarik <petrtesarik@huaweicloud.com>
To: Jonathan Corbet <corbet@lwn.net>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	x86@kernel.org (maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)),
	"H. Peter Anvin" <hpa@zytor.com>,
	Andy Lutomirski <luto@kernel.org>,
	Oleg Nesterov <oleg@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>, Xin Li <xin3.li@intel.com>,
	Arnd Bergmann <arnd@arndb.de>,
	Andrew Morton <akpm@linux-foundation.org>,
	Rick Edgecombe <rick.p.edgecombe@intel.com>,
	Kees Cook <keescook@chromium.org>,
	"Masami Hiramatsu (Google)" <mhiramat@kernel.org>,
	Pengfei Xu <pengfei.xu@intel.com>,
	Josh Poimboeuf <jpoimboe@kernel.org>,
	Ze Gao <zegao2021@gmail.com>,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Kai Huang <kai.huang@intel.com>,
	David Woodhouse <dwmw@amazon.co.uk>,
	Brian Gerst <brgerst@gmail.com>, Jason Gunthorpe <jgg@ziepe.ca>,
	Joerg Roedel <jroedel@suse.de>,
	"Mike Rapoport (IBM)" <rppt@kernel.org>,
	Tina Zhang <tina.zhang@intel.com>,
	Jacob Pan <jacob.jun.pan@linux.intel.com>,
	linux-doc@vger.kernel.org (open list:DOCUMENTATION),
	linux-kernel@vger.kernel.org (open list)
Cc: Roberto Sassu <roberto.sassu@huaweicloud.com>,
	petr@tesarici.cz,
	Petr Tesarik <petr.tesarik1@huawei-partners.com>
Subject: [PATCH v1 5/8] sbm: x86: handle sandbox mode faults
Date: Wed, 14 Feb 2024 12:35:13 +0100	[thread overview]
Message-ID: <20240214113516.2307-6-petrtesarik@huaweicloud.com> (raw)
In-Reply-To: <20240214113516.2307-1-petrtesarik@huaweicloud.com>

From: Petr Tesarik <petr.tesarik1@huawei-partners.com>

Provide a fault handler for sandbox mode. Set the sandbox mode instance
error code, abort the sandbox and return to the caller. To allow graceful
return from a fatal fault, save all callee-saved registers (including the
stack pointer) just before passing control to the target function.

Modify the handlers for #PF and #DF CPU exceptions to call this handler if
coming from sandbox mode. The check is based on the saved CS register,
which should be modified in the entry path to a value that is otherwise not
possible (__SBM_CS).

For the page fault handler, make sure that sandbox mode check is placed
before do_kern_addr_fault(). That function calls spurious_kernel_fault(),
which implements lazy TLB invalidation of kernel pages and it assumes that
the faulting instruction ran with kernel-mode page tables; it would produce
false positives for sandbox mode.

Signed-off-by: Petr Tesarik <petr.tesarik1@huawei-partners.com>
---
 arch/x86/include/asm/ptrace.h  | 21 +++++++++++++++++++++
 arch/x86/include/asm/sbm.h     | 24 ++++++++++++++++++++++++
 arch/x86/include/asm/segment.h |  7 +++++++
 arch/x86/kernel/asm-offsets.c  |  5 +++++
 arch/x86/kernel/sbm/call_64.S  | 21 +++++++++++++++++++++
 arch/x86/kernel/sbm/core.c     | 26 ++++++++++++++++++++++++++
 arch/x86/kernel/traps.c        | 11 +++++++++++
 arch/x86/mm/fault.c            |  6 ++++++
 8 files changed, 121 insertions(+)

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index f4db78b09c8f..f66f16f037b0 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -164,6 +164,27 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
 #endif
 }
 
+/*
+ * sandbox_mode() - did a register set come from SandBox Mode?
+ * @regs:  register set
+ */
+static inline bool sandbox_mode(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_SANDBOX_MODE
+	/*
+	 * SandBox Mode always runs in 64-bit and it is not implemented
+	 * on paravirt systems, so this is the only possible value.
+	 */
+	return regs->cs == __SBM_CS;
+#else /* !CONFIG_SANDBOX_MODE */
+	return false;
+#endif
+#else /* !CONFIG_X86_64 */
+	return false;
+#endif
+}
+
 /*
  * Determine whether the register set came from any context that is running in
  * 64-bit mode.
diff --git a/arch/x86/include/asm/sbm.h b/arch/x86/include/asm/sbm.h
index ca4741b449e8..229b1ac3bbd4 100644
--- a/arch/x86/include/asm/sbm.h
+++ b/arch/x86/include/asm/sbm.h
@@ -11,23 +11,29 @@
 
 #include <asm/processor.h>
 
+struct pt_regs;
+
 #if defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE)
 
 #include <asm/pgtable_types.h>
 
 /**
  * struct x86_sbm_state - Run-time state of the environment.
+ * @sbm:         Link back to the SBM instance.
  * @pgd:         Sandbox mode page global directory.
  * @stack:       Sandbox mode stack.
  * @exc_stack:   Exception and IRQ stack.
+ * @return_sp:   Stack pointer for returning to kernel mode.
  *
  * One instance of this union is allocated for each sandbox and stored as SBM
  * instance private data.
  */
 struct x86_sbm_state {
+	struct sbm *sbm;
 	pgd_t *pgd;
 	unsigned long stack;
 	unsigned long exc_stack;
+	unsigned long return_sp;
 };
 
 /**
@@ -43,6 +49,18 @@ static inline unsigned long top_of_intr_stack(void)
 	return current_top_of_stack();
 }
 
+/**
+ * handle_sbm_fault() - Handle a CPU fault in sandbox mode.
+ * @regs:       Saved registers at fault.
+ * @error_code: CPU error code.
+ * @address:    Fault address (CR2 register).
+ *
+ * Handle a sandbox mode fault. The caller should use sandbox_mode() to
+ * check that @regs came from sandbox mode before calling this function.
+ */
+void handle_sbm_fault(struct pt_regs *regs, unsigned long error_code,
+		      unsigned long address);
+
 #else /* defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE) */
 
 static inline unsigned long top_of_intr_stack(void)
@@ -50,6 +68,12 @@ static inline unsigned long top_of_intr_stack(void)
 	return current_top_of_stack();
 }
 
+static inline void handle_sbm_fault(struct pt_regs *regs,
+				    unsigned long error_code,
+				    unsigned long address)
+{
+}
+
 #endif /* defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE) */
 
 #endif /* __ASM_SBM_H */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 9d6411c65920..966831385d18 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -217,6 +217,13 @@
 #define __USER_CS			(GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
 #define __CPUNODE_SEG			(GDT_ENTRY_CPUNODE*8 + 3)
 
+/*
+ * Sandbox runs with __USER_CS, but the interrupt entry code sets the RPL
+ * in the saved selector to zero to avoid user-mode processing (FPU, signal
+ * delivery, etc.). This is the resulting pseudo-CS.
+ */
+#define __SBM_CS			(GDT_ENTRY_DEFAULT_USER_CS*8)
+
 #endif
 
 #define IDT_ENTRIES			256
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 6913b372ccf7..44d4f0a0cb19 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -20,6 +20,7 @@
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
 #include <asm/tdx.h>
+#include <asm/sbm.h>
 
 #ifdef CONFIG_XEN
 #include <xen/interface/xen.h>
@@ -120,4 +121,8 @@ static void __used common(void)
 	OFFSET(ARIA_CTX_rounds, aria_ctx, rounds);
 #endif
 
+#if defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE)
+	COMMENT("SandBox Mode");
+	OFFSET(SBM_return_sp, x86_sbm_state, return_sp);
+#endif
 }
diff --git a/arch/x86/kernel/sbm/call_64.S b/arch/x86/kernel/sbm/call_64.S
index 1b232c8d15b7..6a615b4f6047 100644
--- a/arch/x86/kernel/sbm/call_64.S
+++ b/arch/x86/kernel/sbm/call_64.S
@@ -22,6 +22,17 @@
  * rcx  .. top of sandbox stack
  */
 SYM_FUNC_START(x86_sbm_exec)
+	/* save all callee-saved registers */
+	push	%rbp
+	push	%rbx
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+
+	/* to be used by sandbox abort */
+	mov	%rsp, SBM_return_sp(%rdi)
+
 	/*
 	 * Set up the sandbox stack:
 	 * 1. Store the old stack pointer at the top of the sandbox stack,
@@ -37,5 +48,15 @@ SYM_FUNC_START(x86_sbm_exec)
 
 	pop	%rsp
 
+SYM_INNER_LABEL(x86_sbm_return, SYM_L_GLOBAL)
+	ANNOTATE_NOENDBR	// IRET target via x86_sbm_fault()
+
+	/* restore callee-saved registers and return */
+	pop	%r15
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbx
+	pop	%rbp
 	RET
 SYM_FUNC_END(x86_sbm_exec)
diff --git a/arch/x86/kernel/sbm/core.c b/arch/x86/kernel/sbm/core.c
index 81f1b0093537..d4c378847e93 100644
--- a/arch/x86/kernel/sbm/core.c
+++ b/arch/x86/kernel/sbm/core.c
@@ -13,6 +13,8 @@
 #include <asm/page.h>
 #include <asm/sbm.h>
 #include <asm/sections.h>
+#include <asm/segment.h>
+#include <asm/trap_pf.h>
 #include <linux/cpumask.h>
 #include <linux/mm.h>
 #include <linux/sbm.h>
@@ -23,6 +25,7 @@
 
 asmlinkage int x86_sbm_exec(struct x86_sbm_state *state, sbm_func func,
 			    void *args, unsigned long sbm_tos);
+extern char x86_sbm_return[];
 
 static inline phys_addr_t page_to_ptval(struct page *page)
 {
@@ -343,6 +346,8 @@ int arch_sbm_exec(struct sbm *sbm, sbm_func func, void *args)
 	struct x86_sbm_state *state = sbm->private;
 	int err;
 
+	state->sbm = sbm;
+
 	/* let interrupt handlers use the sandbox state page */
 	barrier();
 	WRITE_ONCE(current_thread_info()->sbm_state, state);
@@ -354,3 +359,24 @@ int arch_sbm_exec(struct sbm *sbm, sbm_func func, void *args)
 
 	return err;
 }
+
+void handle_sbm_fault(struct pt_regs *regs, unsigned long error_code,
+		      unsigned long address)
+{
+	struct x86_sbm_state *state = current_thread_info()->sbm_state;
+
+	/*
+	 * Force -EFAULT unless the fault was due to a user-mode instruction
+	 * fetch from the designated return address.
+	 */
+	if (error_code != (X86_PF_PROT | X86_PF_USER | X86_PF_INSTR) ||
+	    address != (unsigned long)x86_sbm_return)
+		state->sbm->error = -EFAULT;
+
+	/* modify IRET frame to exit from sandbox */
+	regs->ip = (unsigned long)x86_sbm_return;
+	regs->cs = __KERNEL_CS;
+	regs->flags = X86_EFLAGS_IF;
+	regs->sp = state->return_sp;
+	regs->ss = __KERNEL_DS;
+}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b9c9c74314e7..8fc5b17b8fb4 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -416,6 +416,12 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
 
 	irqentry_nmi_enter(regs);
 	instrumentation_begin();
+
+	if (sandbox_mode(regs)) {
+		handle_sbm_fault(regs, error_code, 0);
+		return;
+	}
+
 	notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
 
 	tsk->thread.error_code = error_code;
@@ -675,6 +681,11 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 		goto exit;
 	}
 
+	if (sandbox_mode(regs)) {
+		handle_sbm_fault(regs, error_code, 0);
+		return;
+	}
+
 	if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc, 0))
 		goto exit;
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 679b09cfe241..f223b258e53f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -34,6 +34,7 @@
 #include <asm/kvm_para.h>		/* kvm_handle_async_pf		*/
 #include <asm/vdso.h>			/* fixup_vdso_exception()	*/
 #include <asm/irq_stack.h>
+#include <asm/sbm.h>
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/exceptions.h>
@@ -1500,6 +1501,11 @@ handle_page_fault(struct pt_regs *regs, unsigned long error_code,
 	if (unlikely(kmmio_fault(regs, address)))
 		return;
 
+	if (sandbox_mode(regs)) {
+		handle_sbm_fault(regs, error_code, address);
+		return;
+	}
+
 	/* Was the fault on kernel-controlled part of the address space? */
 	if (unlikely(fault_in_kernel_space(address))) {
 		do_kern_addr_fault(regs, error_code, address);
-- 
2.34.1


  parent reply	other threads:[~2024-02-14 11:37 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-14 11:35 [PATCH v1 0/8] x86_64 SandBox Mode arch hooks Petr Tesarik
2024-02-14 11:35 ` [PATCH v1 1/8] sbm: x86: page table " Petr Tesarik
2024-02-14 11:35 ` [PATCH v1 2/8] sbm: x86: execute target function on sandbox mode stack Petr Tesarik
2024-02-14 11:35 ` [PATCH v1 3/8] sbm: x86: map system data structures into the sandbox Petr Tesarik
2024-02-14 11:35 ` [PATCH v1 4/8] sbm: x86: allocate and map an exception stack Petr Tesarik
2024-02-14 11:35 ` Petr Tesarik [this message]
2024-02-14 11:35 ` [PATCH v1 6/8] sbm: x86: switch to sandbox mode pages in arch_sbm_exec() Petr Tesarik
2024-02-14 11:35 ` [PATCH v1 7/8] sbm: documentation of the x86-64 SandBox Mode implementation Petr Tesarik
2024-02-14 18:37   ` Xin Li
2024-02-14 19:16     ` Petr Tesařík
2024-02-14 11:35 ` [PATCH v1 8/8] sbm: x86: lazy TLB flushing Petr Tesarik
2024-02-14 14:52 ` [PATCH v1 0/8] x86_64 SandBox Mode arch hooks Dave Hansen
2024-02-14 15:28   ` H. Peter Anvin
2024-02-14 16:41     ` Petr Tesařík
2024-02-14 17:29       ` H. Peter Anvin
2024-02-14 19:14         ` Petr Tesařík
2024-02-14 18:14       ` Edgecombe, Rick P
2024-02-14 18:32         ` Petr Tesařík
2024-02-14 19:19           ` Edgecombe, Rick P
2024-02-14 19:35             ` Petr Tesařík
2024-02-14 18:22   ` Petr Tesařík
2024-02-14 18:42     ` Dave Hansen
2024-02-14 19:33       ` Petr Tesařík
2024-02-14 20:16         ` Dave Hansen
2024-02-16 15:24           ` [RFC 0/8] PGP key parser using SandBox Mode Petr Tesarik
2024-02-16 15:24             ` [RFC 1/8] mpi: Introduce mpi_key_length() Petr Tesarik
2024-02-16 15:24             ` [RFC 2/8] rsa: add parser of raw format Petr Tesarik
2024-02-16 15:24             ` [RFC 3/8] PGPLIB: PGP definitions (RFC 4880) Petr Tesarik
2024-02-16 15:24             ` [RFC 4/8] PGPLIB: Basic packet parser Petr Tesarik
2024-02-16 15:24             ` [RFC 5/8] PGPLIB: Signature parser Petr Tesarik
2024-02-16 15:24             ` [RFC 6/8] KEYS: PGP data parser Petr Tesarik
2024-02-16 16:44               ` Matthew Wilcox
2024-02-16 16:53                 ` Roberto Sassu
2024-02-16 17:08                   ` H. Peter Anvin
2024-02-16 17:13                     ` Roberto Sassu
2024-02-20 10:55                     ` Petr Tesarik
2024-02-21 14:02                       ` H. Peter Anvin
2024-02-22  7:53                         ` Petr Tesařík
2024-02-16 18:44                   ` Matthew Wilcox
2024-02-16 19:54                     ` Roberto Sassu
2024-02-28 17:58                       ` Roberto Sassu
2024-02-16 15:24             ` [RFC 7/8] KEYS: Run PGP key parser in a sandbox Petr Tesarik
2024-02-18  6:07               ` kernel test robot
2024-02-18  8:02               ` kernel test robot
2024-02-16 15:24             ` [RFC 8/8] KEYS: Add intentional fault injection Petr Tesarik
2024-02-16 15:38             ` [RFC 0/8] PGP key parser using SandBox Mode Dave Hansen
2024-02-16 16:08               ` Petr Tesařík
2024-02-16 17:21                 ` Jonathan Corbet
2024-02-16 18:24                   ` Roberto Sassu
2024-02-22 13:12           ` [RFC 0/5] PoC: convert AppArmor parser to " Petr Tesarik
2024-02-22 13:12             ` [RFC 1/5] sbm: x86: fix SBM error entry path Petr Tesarik
2024-02-22 13:12             ` [RFC 2/5] sbm: enhance buffer mapping API Petr Tesarik
2024-02-22 13:12             ` [RFC 3/5] sbm: x86: infrastructure to fix up sandbox faults Petr Tesarik
2024-02-22 13:12             ` [RFC 4/5] sbm: fix up calls to dynamic memory allocators Petr Tesarik
2024-02-22 15:51               ` Dave Hansen
2024-02-22 17:57                 ` Petr Tesařík
2024-02-22 18:03                   ` Dave Hansen
2024-02-22 13:12             ` [RFC 5/5] apparmor: parse profiles in sandbox mode Petr Tesarik
2024-02-14 18:52     ` [PATCH v1 0/8] x86_64 SandBox Mode arch hooks Xin Li
2024-02-15  6:59       ` Petr Tesařík
2024-02-15  8:16         ` H. Peter Anvin
2024-02-15  9:30           ` Petr Tesařík
2024-02-15  9:37             ` Roberto Sassu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240214113516.2307-6-petrtesarik@huaweicloud.com \
    --to=petrtesarik@huaweicloud.com \
    --cc=akpm@linux-foundation.org \
    --cc=arnd@arndb.de \
    --cc=bp@alien8.de \
    --cc=brgerst@gmail.com \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=dwmw@amazon.co.uk \
    --cc=hpa@zytor.com \
    --cc=jacob.jun.pan@linux.intel.com \
    --cc=jgg@ziepe.ca \
    --cc=jpoimboe@kernel.org \
    --cc=jroedel@suse.de \
    --cc=kai.huang@intel.com \
    --cc=keescook@chromium.org \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=pengfei.xu@intel.com \
    --cc=peterz@infradead.org \
    --cc=petr.tesarik1@huawei-partners.com \
    --cc=petr@tesarici.cz \
    --cc=rick.p.edgecombe@intel.com \
    --cc=roberto.sassu@huaweicloud.com \
    --cc=rppt@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=tina.zhang@intel.com \
    --cc=x86@kernel.org \
    --cc=xin3.li@intel.com \
    --cc=zegao2021@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.