linux-api.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v7 1/4] syscalls: Restore address limit after a syscall
@ 2017-04-10 16:44 Thomas Garnier
  2017-04-10 16:44 ` [PATCH v7 3/4] arm/syscalls: Architecture specific pre-usermode check Thomas Garnier
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Thomas Garnier @ 2017-04-10 16:44 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, Arnd Bergmann, Dave Hansen,
	Andrew Morton, Thomas Garnier, David Howells,
	René Nyffenegger, Paul E . McKenney, Ingo Molnar,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Ingo Molnar, H . Peter Anvin, Andy Lutomirski,
	Paolo Bonzini, Kees Cook, Rik van Riel, Josh Poimboeuf,
	Borislav Petkov, Brian Gerst
  Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

This patch ensures a syscall does not return to user-mode with a kernel
address limit. If that happened, a process can corrupt kernel-mode
memory and elevate privileges.

For example, it would mitigation this bug:

- https://bugs.chromium.org/p/project-zero/issues/detail?id=990

The CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE option is also
added so each architecture can optimize this change.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
Tested-by: Kees Cook <keescook@chromium.org>
---
Based on next-20170410
---
 arch/s390/Kconfig        |  1 +
 include/linux/syscalls.h | 26 +++++++++++++++++++++++++-
 init/Kconfig             |  6 ++++++
 kernel/sys.c             | 13 +++++++++++++
 4 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index d25435d94b6e..489a0cc6e46b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -103,6 +103,7 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_BH
 	select ARCH_INLINE_WRITE_UNLOCK_IRQ
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 980c3c9b06f8..801a7a74fe28 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -191,6 +191,27 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 	SYSCALL_METADATA(sname, x, __VA_ARGS__)			\
 	__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
 
+
+/*
+ * Called before coming back to user-mode. Returning to user-mode with an
+ * address limit different than USER_DS can allow to overwrite kernel memory.
+ */
+static inline void verify_pre_usermode_state(void) {
+	BUG_ON(!segment_eq(get_fs(), USER_DS));
+}
+
+#ifndef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
+#define __CHECK_USER_CALLER() \
+	bool user_caller = segment_eq(get_fs(), USER_DS)
+#define __VERIFY_PRE_USERMODE_STATE() \
+	if (user_caller) verify_pre_usermode_state()
+#else
+#define __CHECK_USER_CALLER()
+#define __VERIFY_PRE_USERMODE_STATE()
+asmlinkage void address_limit_check_failed(void);
+#endif
+
+
 #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
 #define __SYSCALL_DEFINEx(x, name, ...)					\
 	asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))	\
@@ -199,7 +220,10 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
 	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
 	{								\
-		long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
+		long ret;						\
+		__CHECK_USER_CALLER();					\
+		ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
+		__VERIFY_PRE_USERMODE_STATE();				\
 		__MAP(x,__SC_TEST,__VA_ARGS__);				\
 		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));	\
 		return ret;						\
diff --git a/init/Kconfig b/init/Kconfig
index 7f7027817bce..e5fbd0becfa7 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1958,6 +1958,12 @@ config PROFILING
 config TRACEPOINTS
 	bool
 
+config ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
+	bool
+	help
+	  Disable the generic pre-usermode state verification. Allow each
+	  architecture to optimize how and when the verification is done.
+
 source "arch/Kconfig"
 
 endmenu		# General setup
diff --git a/kernel/sys.c b/kernel/sys.c
index 196c7134bee6..d30530ff8166 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2459,3 +2459,16 @@ COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
 	return 0;
 }
 #endif /* CONFIG_COMPAT */
+
+#ifdef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
+/*
+ * This function is called when an architecture specific implementation detected
+ * an invalid address limit. The generic user-mode state checker will finish on
+ * the appropriate BUG_ON.
+ */
+asmlinkage void address_limit_check_failed(void)
+{
+	verify_pre_usermode_state();
+	panic("address_limit_check_failed called with a valid user-mode state");
+}
+#endif
-- 
2.12.2.715.g7642488e1d-goog

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v7 2/4] x86/syscalls: Architecture specific pre-usermode check
       [not found] ` <20170410164420.64003-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
@ 2017-04-10 16:44   ` Thomas Garnier
  2017-04-24 23:57   ` [PATCH v7 1/4] syscalls: Restore address limit after a syscall Kees Cook
  2017-04-25  6:33   ` Ingo Molnar
  2 siblings, 0 replies; 15+ messages in thread
From: Thomas Garnier @ 2017-04-10 16:44 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, Arnd Bergmann, Dave Hansen,
	Andrew Morton, Thomas Garnier, David Howells,
	René Nyffenegger, Paul E . McKenney, Ingo Molnar,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Ingo Molnar, H . Peter Anvin, Andy Lutomirski,
	Paolo Bonzini, Kees Cook
  Cc: linux-s390-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	kernel-hardening-ZwoEplunGu1jrUoiu81ncdBPR1lH4CV8

Disable the generic pre-usermode check in favor of an optimized
implementation. This patch adds specific checks on user-mode return path
to make it faster and smaller.

The user-mode state check is added to the prepare_exit_to_usermode
function. This function is called before any user-mode return on 32-bit
and on the 64-bit syscall slowpath. For the 64-bit syscall fast path, an
assembly address limit check redirects to the slow path if the address
limit is different.

The TASK_SIZE_MAX define is moved to the pgtable_64_types header so it
can be used in assembly code.

Signed-off-by: Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
Based on next-20170410
---
 arch/x86/Kconfig                        |  1 +
 arch/x86/entry/common.c                 |  3 +++
 arch/x86/entry/entry_64.S               |  8 ++++++++
 arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
 arch/x86/include/asm/processor.h        | 11 -----------
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c43f47622440..74185795eb24 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -63,6 +63,7 @@ config X86
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
 	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index cdefcfdd9e63..76ef050255c9 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -23,6 +23,7 @@
 #include <linux/user-return-notifier.h>
 #include <linux/uprobes.h>
 #include <linux/livepatch.h>
+#include <linux/syscalls.h>
 
 #include <asm/desc.h>
 #include <asm/traps.h>
@@ -183,6 +184,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	struct thread_info *ti = current_thread_info();
 	u32 cached_flags;
 
+	verify_pre_usermode_state();
+
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
 		local_irq_disable();
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 607d72c4a485..62aca6dcdaf4 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -218,6 +218,14 @@ entry_SYSCALL_64_fastpath:
 	testl	$_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
 	jnz	1f
 
+	/*
+	 * If address limit is not based on user-mode, jump to slow path for
+	 * additional security checks.
+	 */
+	movq	$TASK_SIZE_MAX, %rcx
+	cmp	%rcx, TASK_addr_limit(%r11)
+	jne	1f
+
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
 	movq	RIP(%rsp), %rcx
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 06470da156ba..78af4d43a7ce 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -104,4 +104,15 @@ typedef struct { pteval_t pte; } pte_t;
 
 #define EARLY_DYNAMIC_PAGE_TABLES	64
 
+/*
+ * User space process size. 47bits minus one guard page.  The guard
+ * page is necessary on Intel CPUs: if a SYSCALL instruction is at
+ * the highest possible canonical userspace address, then that
+ * syscall will enter the kernel with a non-canonical return
+ * address, and SYSRET will explode dangerously.  We avoid this
+ * particular problem by preventing anything from being mapped
+ * at the maximum canonical address.
+ */
+#define TASK_SIZE_MAX	((_AC(1, UL) << 47) - PAGE_SIZE)
+
 #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3cada998a402..e80822582d3e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -825,17 +825,6 @@ static inline void spin_lock_prefetch(const void *x)
 #define KSTK_ESP(task)		(task_pt_regs(task)->sp)
 
 #else
-/*
- * User space process size. 47bits minus one guard page.  The guard
- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
- * the highest possible canonical userspace address, then that
- * syscall will enter the kernel with a non-canonical return
- * address, and SYSRET will explode dangerously.  We avoid this
- * particular problem by preventing anything from being mapped
- * at the maximum canonical address.
- */
-#define TASK_SIZE_MAX	((1UL << 47) - PAGE_SIZE)
-
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-- 
2.12.2.715.g7642488e1d-goog

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v7 3/4] arm/syscalls: Architecture specific pre-usermode check
  2017-04-10 16:44 [PATCH v7 1/4] syscalls: Restore address limit after a syscall Thomas Garnier
@ 2017-04-10 16:44 ` Thomas Garnier
  2017-04-10 16:44 ` [PATCH v7 4/4] arm64/syscalls: " Thomas Garnier
       [not found] ` <20170410164420.64003-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
  2 siblings, 0 replies; 15+ messages in thread
From: Thomas Garnier @ 2017-04-10 16:44 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, Arnd Bergmann, Dave Hansen,
	Andrew Morton, Thomas Garnier, David Howells,
	René Nyffenegger, Paul E . McKenney, Ingo Molnar,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Ingo Molnar, H . Peter Anvin, Andy Lutomirski,
	Paolo Bonzini, Kees Cook, Rik van Riel, Josh Poimboeuf,
	Borislav Petkov, Brian Gerst
  Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

Disable the generic pre-usermode check in favor of an optimized
implementation. This patch adds specific checks on user-mode return path
to make it faster and smaller.

The address limit is checked on each syscall return path to user-mode
path as well as the irq user-mode return function. If the address limit
was changed, a generic handler is called to stop the kernel on an
explicit check.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170410
---
 arch/arm/Kconfig               |  1 +
 arch/arm/kernel/entry-common.S | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b0e8c9763e94..af0992debfd2 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -12,6 +12,7 @@ config ARM
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
 	select ARCH_SUPPORTS_ATOMIC_RMW
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index eb5cd77bf1d8..c8a8ba5c22ad 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -12,6 +12,7 @@
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
 #include <asm/unwind.h>
+#include <asm/memory.h>
 #ifdef CONFIG_AEABI
 #include <asm/unistd-oabi.h>
 #endif
@@ -27,7 +28,6 @@
 
 #include "entry-header.S"
 
-
 	.align	5
 #if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING))
 /*
@@ -40,9 +40,12 @@ ret_fast_syscall:
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	bne	fast_work_pending
+	cmp	r2, #TASK_SIZE
+	blne	address_limit_check_failed
 
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
@@ -66,6 +69,7 @@ ret_fast_syscall:
  UNWIND(.cantunwind	)
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	beq	no_work_pending
@@ -82,6 +86,7 @@ slow_work_pending:
 	mov	r2, why				@ 'syscall'
 	bl	do_work_pending
 	cmp	r0, #0
+	ldreq   r2, [tsk, #TI_ADDR_LIMIT]
 	beq	no_work_pending
 	movlt	scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
 	ldmia	sp, {r0 - r6}			@ have to reload r0 - r6
@@ -99,9 +104,12 @@ ret_slow_syscall:
 	disable_irq_notrace			@ disable interrupts
 ENTRY(ret_to_user_from_irq)
 	ldr	r1, [tsk, #TI_FLAGS]
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
 	tst	r1, #_TIF_WORK_MASK
 	bne	slow_work_pending
 no_work_pending:
+	cmp	r2, #TASK_SIZE
+	blne	address_limit_check_failed
 	asm_trace_hardirqs_on save = 0
 
 	/* perform architecture specific actions before user return */
-- 
2.12.2.715.g7642488e1d-goog

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v7 4/4] arm64/syscalls: Architecture specific pre-usermode check
  2017-04-10 16:44 [PATCH v7 1/4] syscalls: Restore address limit after a syscall Thomas Garnier
  2017-04-10 16:44 ` [PATCH v7 3/4] arm/syscalls: Architecture specific pre-usermode check Thomas Garnier
@ 2017-04-10 16:44 ` Thomas Garnier
  2017-04-10 17:12   ` Catalin Marinas
       [not found] ` <20170410164420.64003-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
  2 siblings, 1 reply; 15+ messages in thread
From: Thomas Garnier @ 2017-04-10 16:44 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, Arnd Bergmann, Dave Hansen,
	Andrew Morton, Thomas Garnier, David Howells,
	René Nyffenegger, Paul E . McKenney, Ingo Molnar,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Ingo Molnar, H . Peter Anvin, Andy Lutomirski,
	Paolo Bonzini, Kees Cook, Rik van Riel, Josh Poimboeuf,
	Borislav Petkov, Brian Gerst
  Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

Disable the generic pre-usermode check in favor of an optimized
implementation. This patch adds specific checks on user-mode return path
to make it faster and smaller.

The address limit is checked on each syscall return path to user-mode.
If it was changed, a generic handler is called to stop the kernel on an
explicit check.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170410
---
 arch/arm64/Kconfig        |  1 +
 arch/arm64/kernel/entry.S | 13 +++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9b8fcab7da56..3f9e8e7d9376 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -24,6 +24,7 @@ config ARM64
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 43512d4d7df2..bdd094c7837a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -744,6 +744,8 @@ ENDPROC(cpu_switch_to)
 ret_fast_syscall:
 	disable_irq				// disable interrupts
 	str	x0, [sp, #S_X0]			// returned x0
+	ldr	x2, [tsk, #TSK_TI_ADDR_LIMIT]	// check addr limit change
+	tbnz	x2, #63, addr_limit_fail
 	ldr	x1, [tsk, #TSK_TI_FLAGS]	// re-check for syscall tracing
 	and	x2, x1, #_TIF_SYSCALL_WORK
 	cbnz	x2, ret_fast_syscall_trace
@@ -771,6 +773,9 @@ work_pending:
  */
 ret_to_user:
 	disable_irq				// disable interrupts
+	ldr	x2, [tsk, #TSK_TI_ADDR_LIMIT]	// check addr limit change
+	tbnz	x2, #63, addr_limit_fail
+
 	ldr	x1, [tsk, #TSK_TI_FLAGS]
 	and	x2, x1, #_TIF_WORK_MASK
 	cbnz	x2, work_pending
@@ -780,6 +785,14 @@ finish_ret_to_user:
 ENDPROC(ret_to_user)
 
 /*
+ * Address limit was incorrect before returning in user-mode which could be
+ * used to elevate privileges. Call the generic handler to stop the kernel on
+ * the appropriate check. This function does not return.
+ */
+addr_limit_fail:
+	bl address_limit_check_failed
+
+/*
  * This is how we return from a fork.
  */
 ENTRY(ret_from_fork)
-- 
2.12.2.715.g7642488e1d-goog

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v7 4/4] arm64/syscalls: Architecture specific pre-usermode check
  2017-04-10 16:44 ` [PATCH v7 4/4] arm64/syscalls: " Thomas Garnier
@ 2017-04-10 17:12   ` Catalin Marinas
       [not found]     ` <20170410171240.GE30647-M2fw3Uu6cmfZROr8t4l/smS4ubULX0JqMm0uRHvK7Nw@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Catalin Marinas @ 2017-04-10 17:12 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Martin Schwidefsky, Heiko Carstens, Arnd Bergmann, Dave Hansen,
	Andrew Morton, David Howells, René Nyffenegger,
	Paul E . McKenney, Ingo Molnar, Thomas Gleixner, Oleg Nesterov,
	Stephen Smalley, Pavel Tikhomirov, Ingo Molnar, H . Peter Anvin,
	Andy Lutomirski, Paolo Bonzini, Kees Cook, Rik van Riel,
	Josh Poimboeuf, Borislav Petkov, Brian Gerst

On Mon, Apr 10, 2017 at 09:44:20AM -0700, Thomas Garnier wrote:
> Disable the generic pre-usermode check in favor of an optimized
> implementation. This patch adds specific checks on user-mode return path
> to make it faster and smaller.
> 
> The address limit is checked on each syscall return path to user-mode.
> If it was changed, a generic handler is called to stop the kernel on an
> explicit check.
> 
> Signed-off-by: Thomas Garnier <thgarnie@google.com>
> ---
> Based on next-20170410
> ---
>  arch/arm64/Kconfig        |  1 +
>  arch/arm64/kernel/entry.S | 13 +++++++++++++
>  2 files changed, 14 insertions(+)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 9b8fcab7da56..3f9e8e7d9376 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -24,6 +24,7 @@ config ARM64
>  	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
>  	select ARCH_WANT_FRAME_POINTERS
>  	select ARCH_HAS_UBSAN_SANITIZE_ALL
> +	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>  	select ARM_AMBA
>  	select ARM_ARCH_TIMER
>  	select ARM_GIC
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 43512d4d7df2..bdd094c7837a 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -744,6 +744,8 @@ ENDPROC(cpu_switch_to)
>  ret_fast_syscall:
>  	disable_irq				// disable interrupts
>  	str	x0, [sp, #S_X0]			// returned x0
> +	ldr	x2, [tsk, #TSK_TI_ADDR_LIMIT]	// check addr limit change
> +	tbnz	x2, #63, addr_limit_fail
>  	ldr	x1, [tsk, #TSK_TI_FLAGS]	// re-check for syscall tracing
>  	and	x2, x1, #_TIF_SYSCALL_WORK
>  	cbnz	x2, ret_fast_syscall_trace
> @@ -771,6 +773,9 @@ work_pending:
>   */
>  ret_to_user:
>  	disable_irq				// disable interrupts
> +	ldr	x2, [tsk, #TSK_TI_ADDR_LIMIT]	// check addr limit change
> +	tbnz	x2, #63, addr_limit_fail
> +

Nitpick: unnecessary empty line added.

>  	ldr	x1, [tsk, #TSK_TI_FLAGS]
>  	and	x2, x1, #_TIF_WORK_MASK
>  	cbnz	x2, work_pending
> @@ -780,6 +785,14 @@ finish_ret_to_user:
>  ENDPROC(ret_to_user)
>  
>  /*
> + * Address limit was incorrect before returning in user-mode which could be
> + * used to elevate privileges. Call the generic handler to stop the kernel on
> + * the appropriate check. This function does not return.
> + */
> +addr_limit_fail:
> +	bl address_limit_check_failed

You could do a "b address_limit_check_failed" rather than "bl" since we
don't care about returning here (and a nitpick: insert tab between "b"
and "address_limit_check_failed").

With the above:

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v7 4/4] arm64/syscalls: Architecture specific pre-usermode check
       [not found]     ` <20170410171240.GE30647-M2fw3Uu6cmfZROr8t4l/smS4ubULX0JqMm0uRHvK7Nw@public.gmane.org>
@ 2017-04-10 20:06       ` Thomas Garnier
       [not found]         ` <20170410200622.8654-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
  2017-04-10 20:07       ` Thomas Garnier
  1 sibling, 1 reply; 15+ messages in thread
From: Thomas Garnier @ 2017-04-10 20:06 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, Arnd Bergmann, Dave Hansen,
	Andrew Morton, Thomas Garnier, David Howells,
	René Nyffenegger, Paul E . McKenney, Ingo Molnar,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Ingo Molnar, H . Peter Anvin, Andy Lutomirski,
	Paolo Bonzini, Kees Cook
  Cc: linux-s390-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	kernel-hardening-ZwoEplunGu1jrUoiu81ncdBPR1lH4CV8

Disable the generic pre-usermode check in favor of an optimized
implementation. This patch adds specific checks on user-mode return path
to make it faster and smaller.

The address limit is checked on each syscall return path to user-mode.
If it was changed, a generic handler is called to stop the kernel on an
explicit check.

Signed-off-by: Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
Based on next-20170410
---
 arch/arm64/Kconfig        |  1 +
 arch/arm64/kernel/entry.S | 13 +++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9b8fcab7da56..3f9e8e7d9376 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -24,6 +24,7 @@ config ARM64
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 43512d4d7df2..bdd094c7837a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -744,6 +744,8 @@ ENDPROC(cpu_switch_to)
 ret_fast_syscall:
 	disable_irq				// disable interrupts
 	str	x0, [sp, #S_X0]			// returned x0
+	ldr	x2, [tsk, #TSK_TI_ADDR_LIMIT]	// check addr limit change
+	tbnz	x2, #63, addr_limit_fail
 	ldr	x1, [tsk, #TSK_TI_FLAGS]	// re-check for syscall tracing
 	and	x2, x1, #_TIF_SYSCALL_WORK
 	cbnz	x2, ret_fast_syscall_trace
@@ -771,6 +773,9 @@ work_pending:
  */
 ret_to_user:
 	disable_irq				// disable interrupts
+	ldr	x2, [tsk, #TSK_TI_ADDR_LIMIT]	// check addr limit change
+	tbnz	x2, #63, addr_limit_fail
+
 	ldr	x1, [tsk, #TSK_TI_FLAGS]
 	and	x2, x1, #_TIF_WORK_MASK
 	cbnz	x2, work_pending
@@ -780,6 +785,14 @@ finish_ret_to_user:
 ENDPROC(ret_to_user)
 
 /*
+ * Address limit was incorrect before returning in user-mode which could be
+ * used to elevate privileges. Call the generic handler to stop the kernel on
+ * the appropriate check. This function does not return.
+ */
+addr_limit_fail:
+	bl address_limit_check_failed
+
+/*
  * This is how we return from a fork.
  */
 ENTRY(ret_from_fork)
-- 
2.12.2.715.g7642488e1d-goog

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v7 4/4] arm64/syscalls: Architecture specific pre-usermode check
       [not found]     ` <20170410171240.GE30647-M2fw3Uu6cmfZROr8t4l/smS4ubULX0JqMm0uRHvK7Nw@public.gmane.org>
  2017-04-10 20:06       ` Thomas Garnier
@ 2017-04-10 20:07       ` Thomas Garnier
  1 sibling, 0 replies; 15+ messages in thread
From: Thomas Garnier @ 2017-04-10 20:07 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, Arnd Bergmann, Dave Hansen,
	Andrew Morton, Thomas Garnier, David Howells,
	René Nyffenegger, Paul E . McKenney, Ingo Molnar,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Ingo Molnar, H . Peter Anvin, Andy Lutomirski,
	Paolo Bonzini, Kees Cook
  Cc: linux-s390-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	kernel-hardening-ZwoEplunGu1jrUoiu81ncdBPR1lH4CV8

Disable the generic pre-usermode check in favor of an optimized
implementation. This patch adds specific checks on user-mode return path
to make it faster and smaller.

The address limit is checked on each syscall return path to user-mode.
If it was changed, a generic handler is called to stop the kernel on an
explicit check.

Signed-off-by: Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Reviewed-by: Catalin Marinas <catalin.marinas-5wv7dgnIgG8@public.gmane.org>
---
Based on next-20170410
Fix comments from Catalin and add review-by in the message.
---
 arch/arm64/Kconfig        |  1 +
 arch/arm64/kernel/entry.S | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9b8fcab7da56..3f9e8e7d9376 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -24,6 +24,7 @@ config ARM64
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 43512d4d7df2..04c44c6f41b1 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -744,6 +744,8 @@ ENDPROC(cpu_switch_to)
 ret_fast_syscall:
 	disable_irq				// disable interrupts
 	str	x0, [sp, #S_X0]			// returned x0
+	ldr	x2, [tsk, #TSK_TI_ADDR_LIMIT]	// check addr limit change
+	tbnz	x2, #63, addr_limit_fail
 	ldr	x1, [tsk, #TSK_TI_FLAGS]	// re-check for syscall tracing
 	and	x2, x1, #_TIF_SYSCALL_WORK
 	cbnz	x2, ret_fast_syscall_trace
@@ -771,6 +773,8 @@ work_pending:
  */
 ret_to_user:
 	disable_irq				// disable interrupts
+	ldr	x2, [tsk, #TSK_TI_ADDR_LIMIT]	// check addr limit change
+	tbnz	x2, #63, addr_limit_fail
 	ldr	x1, [tsk, #TSK_TI_FLAGS]
 	and	x2, x1, #_TIF_WORK_MASK
 	cbnz	x2, work_pending
@@ -780,6 +784,14 @@ finish_ret_to_user:
 ENDPROC(ret_to_user)
 
 /*
+ * Address limit was incorrect before returning in user-mode which could be
+ * used to elevate privileges. Call the generic handler to stop the kernel on
+ * the appropriate check. This function does not return.
+ */
+addr_limit_fail:
+	b	address_limit_check_failed
+
+/*
  * This is how we return from a fork.
  */
 ENTRY(ret_from_fork)
-- 
2.12.2.715.g7642488e1d-goog

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v7 4/4] arm64/syscalls: Architecture specific pre-usermode check
       [not found]         ` <20170410200622.8654-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
@ 2017-04-10 20:09           ` Thomas Garnier
  0 siblings, 0 replies; 15+ messages in thread
From: Thomas Garnier @ 2017-04-10 20:09 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, Arnd Bergmann, Dave Hansen,
	Andrew Morton, Thomas Garnier, David Howells,
	René Nyffenegger, Paul E . McKenney, Ingo Molnar,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Ingo Molnar, H . Peter Anvin, Andy Lutomirski,
	Paolo Bonzini, Kees Cook
  Cc: linux-s390, LKML, Linux API, the arch/x86 maintainers,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	Kernel Hardening

On Mon, Apr 10, 2017 at 1:06 PM, Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
> Disable the generic pre-usermode check in favor of an optimized
> implementation. This patch adds specific checks on user-mode return path
> to make it faster and smaller.
>
> The address limit is checked on each syscall return path to user-mode.
> If it was changed, a generic handler is called to stop the kernel on an
> explicit check.
>
> Signed-off-by: Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> ---
> Based on next-20170410
> ---
>  arch/arm64/Kconfig        |  1 +
>  arch/arm64/kernel/entry.S | 13 +++++++++++++
>  2 files changed, 14 insertions(+)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 9b8fcab7da56..3f9e8e7d9376 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -24,6 +24,7 @@ config ARM64
>         select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
>         select ARCH_WANT_FRAME_POINTERS
>         select ARCH_HAS_UBSAN_SANITIZE_ALL
> +       select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>         select ARM_AMBA
>         select ARM_ARCH_TIMER
>         select ARM_GIC
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 43512d4d7df2..bdd094c7837a 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -744,6 +744,8 @@ ENDPROC(cpu_switch_to)
>  ret_fast_syscall:
>         disable_irq                             // disable interrupts
>         str     x0, [sp, #S_X0]                 // returned x0
> +       ldr     x2, [tsk, #TSK_TI_ADDR_LIMIT]   // check addr limit change
> +       tbnz    x2, #63, addr_limit_fail
>         ldr     x1, [tsk, #TSK_TI_FLAGS]        // re-check for syscall tracing
>         and     x2, x1, #_TIF_SYSCALL_WORK
>         cbnz    x2, ret_fast_syscall_trace
> @@ -771,6 +773,9 @@ work_pending:
>   */
>  ret_to_user:
>         disable_irq                             // disable interrupts
> +       ldr     x2, [tsk, #TSK_TI_ADDR_LIMIT]   // check addr limit change
> +       tbnz    x2, #63, addr_limit_fail
> +
>         ldr     x1, [tsk, #TSK_TI_FLAGS]
>         and     x2, x1, #_TIF_WORK_MASK
>         cbnz    x2, work_pending
> @@ -780,6 +785,14 @@ finish_ret_to_user:
>  ENDPROC(ret_to_user)
>
>  /*
> + * Address limit was incorrect before returning in user-mode which could be
> + * used to elevate privileges. Call the generic handler to stop the kernel on
> + * the appropriate check. This function does not return.
> + */
> +addr_limit_fail:
> +       bl address_limit_check_failed
> +
> +/*
>   * This is how we return from a fork.
>   */
>  ENTRY(ret_from_fork)
> --
> 2.12.2.715.g7642488e1d-goog
>

Ignore this revision, I sent the previous patch by mistake. The next
email is the changed patch.
Sorry about that.


-- 
Thomas

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v7 1/4] syscalls: Restore address limit after a syscall
       [not found] ` <20170410164420.64003-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
  2017-04-10 16:44   ` [PATCH v7 2/4] x86/syscalls: " Thomas Garnier
@ 2017-04-24 23:57   ` Kees Cook
       [not found]     ` <CAGXu5j+J67zC9tcncacHTDvXKMF3HZuPCivk-Uz4J6h-cEv-qQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2017-04-25  6:33   ` Ingo Molnar
  2 siblings, 1 reply; 15+ messages in thread
From: Kees Cook @ 2017-04-24 23:57 UTC (permalink / raw)
  To: Thomas Garnier, Ingo Molnar
  Cc: Martin Schwidefsky, Heiko Carstens, Arnd Bergmann, Dave Hansen,
	Andrew Morton, David Howells, René Nyffenegger,
	Paul E . McKenney, Thomas Gleixner, Oleg Nesterov,
	Stephen Smalley, Pavel Tikhomirov, Ingo Molnar, H . Peter Anvin,
	Andy Lutomirski, Paolo Bonzini, Rik van Riel, Josh Poimboeuf,
	Borislav Petkov

On Mon, Apr 10, 2017 at 9:44 AM, Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
> This patch ensures a syscall does not return to user-mode with a kernel
> address limit. If that happened, a process can corrupt kernel-mode
> memory and elevate privileges.
>
> For example, it would mitigation this bug:
>
> - https://bugs.chromium.org/p/project-zero/issues/detail?id=990
>
> The CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE option is also
> added so each architecture can optimize this change.
>
> Signed-off-by: Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> Tested-by: Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>

Ingo, I think this series is ready. Can you pull it? (And if not, what
should next steps be?)

-Kees

> ---
> Based on next-20170410
> ---
>  arch/s390/Kconfig        |  1 +
>  include/linux/syscalls.h | 26 +++++++++++++++++++++++++-
>  init/Kconfig             |  6 ++++++
>  kernel/sys.c             | 13 +++++++++++++
>  4 files changed, 45 insertions(+), 1 deletion(-)
>
> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
> index d25435d94b6e..489a0cc6e46b 100644
> --- a/arch/s390/Kconfig
> +++ b/arch/s390/Kconfig
> @@ -103,6 +103,7 @@ config S390
>         select ARCH_INLINE_WRITE_UNLOCK_BH
>         select ARCH_INLINE_WRITE_UNLOCK_IRQ
>         select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
> +       select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>         select ARCH_SAVE_PAGE_KEYS if HIBERNATION
>         select ARCH_SUPPORTS_ATOMIC_RMW
>         select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index 980c3c9b06f8..801a7a74fe28 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -191,6 +191,27 @@ extern struct trace_event_functions exit_syscall_print_funcs;
>         SYSCALL_METADATA(sname, x, __VA_ARGS__)                 \
>         __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
>
> +
> +/*
> + * Called before coming back to user-mode. Returning to user-mode with an
> + * address limit different than USER_DS can allow to overwrite kernel memory.
> + */
> +static inline void verify_pre_usermode_state(void) {
> +       BUG_ON(!segment_eq(get_fs(), USER_DS));
> +}
> +
> +#ifndef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
> +#define __CHECK_USER_CALLER() \
> +       bool user_caller = segment_eq(get_fs(), USER_DS)
> +#define __VERIFY_PRE_USERMODE_STATE() \
> +       if (user_caller) verify_pre_usermode_state()
> +#else
> +#define __CHECK_USER_CALLER()
> +#define __VERIFY_PRE_USERMODE_STATE()
> +asmlinkage void address_limit_check_failed(void);
> +#endif
> +
> +
>  #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
>  #define __SYSCALL_DEFINEx(x, name, ...)                                        \
>         asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))       \
> @@ -199,7 +220,10 @@ extern struct trace_event_functions exit_syscall_print_funcs;
>         asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));      \
>         asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))       \
>         {                                                               \
> -               long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));  \
> +               long ret;                                               \
> +               __CHECK_USER_CALLER();                                  \
> +               ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));       \
> +               __VERIFY_PRE_USERMODE_STATE();                          \
>                 __MAP(x,__SC_TEST,__VA_ARGS__);                         \
>                 __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));       \
>                 return ret;                                             \
> diff --git a/init/Kconfig b/init/Kconfig
> index 7f7027817bce..e5fbd0becfa7 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1958,6 +1958,12 @@ config PROFILING
>  config TRACEPOINTS
>         bool
>
> +config ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
> +       bool
> +       help
> +         Disable the generic pre-usermode state verification. Allow each
> +         architecture to optimize how and when the verification is done.
> +
>  source "arch/Kconfig"
>
>  endmenu                # General setup
> diff --git a/kernel/sys.c b/kernel/sys.c
> index 196c7134bee6..d30530ff8166 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -2459,3 +2459,16 @@ COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
>         return 0;
>  }
>  #endif /* CONFIG_COMPAT */
> +
> +#ifdef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
> +/*
> + * This function is called when an architecture specific implementation detected
> + * an invalid address limit. The generic user-mode state checker will finish on
> + * the appropriate BUG_ON.
> + */
> +asmlinkage void address_limit_check_failed(void)
> +{
> +       verify_pre_usermode_state();
> +       panic("address_limit_check_failed called with a valid user-mode state");
> +}
> +#endif
> --
> 2.12.2.715.g7642488e1d-goog
>



-- 
Kees Cook
Pixel Security

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v7 1/4] syscalls: Restore address limit after a syscall
       [not found]     ` <CAGXu5j+J67zC9tcncacHTDvXKMF3HZuPCivk-Uz4J6h-cEv-qQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-04-25  6:23       ` Ingo Molnar
       [not found]         ` <20170425062324.pdpi5v7ypobw74ki-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2017-04-25  6:23 UTC (permalink / raw)
  To: Kees Cook
  Cc: Thomas Garnier, Martin Schwidefsky, Heiko Carstens,
	Arnd Bergmann, Dave Hansen, Andrew Morton, David Howells,
	René Nyffenegger, Paul E . McKenney, Thomas Gleixner,
	Oleg Nesterov, Stephen Smalley, Pavel Tikhomirov, Ingo Molnar,
	H . Peter Anvin, Andy Lutomirski, Paolo Bonzini, Rik van Riel,
	Josh Poimboeuf


* Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> wrote:

> On Mon, Apr 10, 2017 at 9:44 AM, Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
> > This patch ensures a syscall does not return to user-mode with a kernel
> > address limit. If that happened, a process can corrupt kernel-mode
> > memory and elevate privileges.
> >
> > For example, it would mitigation this bug:
> >
> > - https://bugs.chromium.org/p/project-zero/issues/detail?id=990
> >
> > The CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE option is also
> > added so each architecture can optimize this change.
> >
> > Signed-off-by: Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> > Tested-by: Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
> 
> Ingo, I think this series is ready. Can you pull it? (And if not, what
> should next steps be?)

I have some feedback for other patches in this series, plus for this one as well:

> > +/*
> > + * Called before coming back to user-mode. Returning to user-mode with an
> > + * address limit different than USER_DS can allow to overwrite kernel memory.
> > + */
> > +static inline void verify_pre_usermode_state(void) {
> > +       BUG_ON(!segment_eq(get_fs(), USER_DS));
> > +}

That's not standard kernel coding style.

Also, patch titles should start with a verb - 75% of the series doesn't.

> > +#ifndef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
> > +#define __CHECK_USER_CALLER() \
> > +       bool user_caller = segment_eq(get_fs(), USER_DS)
> > +#define __VERIFY_PRE_USERMODE_STATE() \
> > +       if (user_caller) verify_pre_usermode_state()
> > +#else
> > +#define __CHECK_USER_CALLER()
> > +#define __VERIFY_PRE_USERMODE_STATE()
> > +asmlinkage void address_limit_check_failed(void);
> > +#endif

> > +#ifdef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE

That Kconfig name is way too long.

Plus please don't put logical operations into Kconfig names.

> > +/*
> > + * This function is called when an architecture specific implementation detected
> > + * an invalid address limit. The generic user-mode state checker will finish on
> > + * the appropriate BUG_ON.
> > + */
> > +asmlinkage void address_limit_check_failed(void)
> > +{
> > +       verify_pre_usermode_state();
> > +       panic("address_limit_check_failed called with a valid user-mode state");
> > +}
> > +#endif

Awful naming all around:

	verify_pre_usermode_state()
	address_limit_check_failed()

Both names start with very common names that makes one read these again and again. 
(And yes, there's lots of bad names in the kernel, but we should not follow bad 
examples.)

Best practice for such functionality is to use a common prefix that is both easy 
to recognize and easy to skip. For example we could use 'addr_limit_check' as the 
prefix:

	addr_limit_check_failed()
	addr_limit_check_syscall()

No need to over-specify it that it's a "pre" check - it's obvious from existing 
implementation and should be documented in the function itself for new 
implementations.

Harmonize the Kconfig namespace to the common prefix as well, i.e. use something 
like:

	CONFIG_ADDR_LIMIT_CHECK

No need to add 'ARCH' I think - an architecture that enables this should get it 
unconditionally.

etc.

It's all cobbled together I'm afraid and will need more iterations.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v7 1/4] syscalls: Restore address limit after a syscall
       [not found] ` <20170410164420.64003-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
  2017-04-10 16:44   ` [PATCH v7 2/4] x86/syscalls: " Thomas Garnier
  2017-04-24 23:57   ` [PATCH v7 1/4] syscalls: Restore address limit after a syscall Kees Cook
@ 2017-04-25  6:33   ` Ingo Molnar
       [not found]     ` <20170425063305.hwjuxupa37rwe6zj-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2 siblings, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2017-04-25  6:33 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Martin Schwidefsky, Heiko Carstens, Arnd Bergmann, Dave Hansen,
	Andrew Morton, David Howells, René Nyffenegger,
	Paul E . McKenney, Thomas Gleixner, Oleg Nesterov,
	Stephen Smalley, Pavel Tikhomirov, Ingo Molnar, H . Peter Anvin,
	Andy Lutomirski, Paolo Bonzini, Kees Cook, Rik van Riel,
	Josh Poimboeuf


* Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:

> This patch ensures a syscall does not return to user-mode with a kernel
> address limit. If that happened, a process can corrupt kernel-mode
> memory and elevate privileges.

Don't start changelogs with 'This patch' - it's obvious that we are talking about 
this patch. Writing:

   Ensure that a syscall does not return to user-mode with a kernel address limit. 
   If that happens, a process can corrupt kernel-mode memory and elevate 
   privileges.

also note the spelling fix I did. (There's another spelling error elsewhere in 
this changelog as well.)

Please read changelogs!

> For example, it would mitigation this bug:
> 
> - https://bugs.chromium.org/p/project-zero/issues/detail?id=990
> 
> The CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE option is also
> added so each architecture can optimize this change.

As I pointed it out in my previous reply this Kconfig name is awfully long - but 
it should have been obvious when this changelog was written ...

> Signed-off-by: Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> Tested-by: Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
> ---
> Based on next-20170410
> ---
>  arch/s390/Kconfig        |  1 +
>  include/linux/syscalls.h | 26 +++++++++++++++++++++++++-
>  init/Kconfig             |  6 ++++++
>  kernel/sys.c             | 13 +++++++++++++
>  4 files changed, 45 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
> index d25435d94b6e..489a0cc6e46b 100644
> --- a/arch/s390/Kconfig
> +++ b/arch/s390/Kconfig
> @@ -103,6 +103,7 @@ config S390
>  	select ARCH_INLINE_WRITE_UNLOCK_BH
>  	select ARCH_INLINE_WRITE_UNLOCK_IRQ
>  	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
> +	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>  	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
>  	select ARCH_SUPPORTS_ATOMIC_RMW
>  	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index 980c3c9b06f8..801a7a74fe28 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -191,6 +191,27 @@ extern struct trace_event_functions exit_syscall_print_funcs;
>  	SYSCALL_METADATA(sname, x, __VA_ARGS__)			\
>  	__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
>  
> +
> +/*
> + * Called before coming back to user-mode. Returning to user-mode with an
> + * address limit different than USER_DS can allow to overwrite kernel memory.
> + */
> +static inline void verify_pre_usermode_state(void) {
> +	BUG_ON(!segment_eq(get_fs(), USER_DS));
> +}

Non-standard coding style.

> +
> +#ifndef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
> +#define __CHECK_USER_CALLER() \
> +	bool user_caller = segment_eq(get_fs(), USER_DS)
> +#define __VERIFY_PRE_USERMODE_STATE() \
> +	if (user_caller) verify_pre_usermode_state()
> +#else
> +#define __CHECK_USER_CALLER()
> +#define __VERIFY_PRE_USERMODE_STATE()
> +asmlinkage void address_limit_check_failed(void);
> +#endif
> +
> +
>  #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
>  #define __SYSCALL_DEFINEx(x, name, ...)					\
>  	asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))	\
> @@ -199,7 +220,10 @@ extern struct trace_event_functions exit_syscall_print_funcs;
>  	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
>  	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
>  	{								\
> -		long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
> +		long ret;						\
> +		__CHECK_USER_CALLER();					\
> +		ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
> +		__VERIFY_PRE_USERMODE_STATE();				\
>  		__MAP(x,__SC_TEST,__VA_ARGS__);				\
>  		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));	\
>  		return ret;						\

BTW., the '__VERIFY_PRE_USERMODE_STATE()' name is highly misleading: the 'pre' 
prefix suggests that this is done before a system call - while it's done 
afterwards.

The solution is to not try to specify the exact call placement in the name, just 
describe the functionality (and harmonize along the common prefix).

> +config ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
> +	bool
> +	help
> +	  Disable the generic pre-usermode state verification. Allow each
> +	  architecture to optimize how and when the verification is done.
> +

Please name the Kconfig symbols something like this:

	CONFIG_ADDR_LIMIT_CHECK
	CONFIG_ADDR_LIMIT_CHECK_ARCH

or so, which tells us whether the check is done by the architecture code, without 
breaking the col80 limit with a single Kconfig name.

BTW:

> +#ifdef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
> +/*
> + * This function is called when an architecture specific implementation detected
> + * an invalid address limit. The generic user-mode state checker will finish on
> + * the appropriate BUG_ON.
> + */
> +asmlinkage void address_limit_check_failed(void)
> +{
> +	verify_pre_usermode_state();
> +	panic("address_limit_check_failed called with a valid user-mode state");

It's very unconstructive to unconditionally panic the system, just because some 
kernel code leaked the address limit! Do a warn-once printout and kill the current 
task (i.e. don't continue execution), but don't crash everything else!

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v7 1/4] syscalls: Restore address limit after a syscall
       [not found]         ` <20170425062324.pdpi5v7ypobw74ki-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-04-25 14:12           ` Thomas Garnier
  0 siblings, 0 replies; 15+ messages in thread
From: Thomas Garnier @ 2017-04-25 14:12 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Kees Cook, Martin Schwidefsky, Heiko Carstens, Arnd Bergmann,
	Dave Hansen, Andrew Morton, David Howells, René Nyffenegger,
	Paul E . McKenney, Thomas Gleixner, Oleg Nesterov,
	Stephen Smalley, Pavel Tikhomirov, Ingo Molnar, H . Peter Anvin,
	Andy Lutomirski, Paolo Bonzini, Rik van Riel, Josh Poimboeuf

On Mon, Apr 24, 2017 at 11:23 PM, Ingo Molnar <mingo-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>
> * Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> wrote:
>
>> On Mon, Apr 10, 2017 at 9:44 AM, Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
>> > This patch ensures a syscall does not return to user-mode with a kernel
>> > address limit. If that happened, a process can corrupt kernel-mode
>> > memory and elevate privileges.
>> >
>> > For example, it would mitigation this bug:
>> >
>> > - https://bugs.chromium.org/p/project-zero/issues/detail?id=990
>> >
>> > The CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE option is also
>> > added so each architecture can optimize this change.
>> >
>> > Signed-off-by: Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
>> > Tested-by: Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
>>
>> Ingo, I think this series is ready. Can you pull it? (And if not, what
>> should next steps be?)
>
> I have some feedback for other patches in this series, plus for this one as well:
>
>> > +/*
>> > + * Called before coming back to user-mode. Returning to user-mode with an
>> > + * address limit different than USER_DS can allow to overwrite kernel memory.
>> > + */
>> > +static inline void verify_pre_usermode_state(void) {
>> > +       BUG_ON(!segment_eq(get_fs(), USER_DS));
>> > +}
>
> That's not standard kernel coding style.
>
> Also, patch titles should start with a verb - 75% of the series doesn't.

Will fix both.

>
>> > +#ifndef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>> > +#define __CHECK_USER_CALLER() \
>> > +       bool user_caller = segment_eq(get_fs(), USER_DS)
>> > +#define __VERIFY_PRE_USERMODE_STATE() \
>> > +       if (user_caller) verify_pre_usermode_state()
>> > +#else
>> > +#define __CHECK_USER_CALLER()
>> > +#define __VERIFY_PRE_USERMODE_STATE()
>> > +asmlinkage void address_limit_check_failed(void);
>> > +#endif
>
>> > +#ifdef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>
> That Kconfig name is way too long.
>
> Plus please don't put logical operations into Kconfig names.
>
>> > +/*
>> > + * This function is called when an architecture specific implementation detected
>> > + * an invalid address limit. The generic user-mode state checker will finish on
>> > + * the appropriate BUG_ON.
>> > + */
>> > +asmlinkage void address_limit_check_failed(void)
>> > +{
>> > +       verify_pre_usermode_state();
>> > +       panic("address_limit_check_failed called with a valid user-mode state");
>> > +}
>> > +#endif
>
> Awful naming all around:
>
>         verify_pre_usermode_state()
>         address_limit_check_failed()
>
> Both names start with very common names that makes one read these again and again.
> (And yes, there's lots of bad names in the kernel, but we should not follow bad
> examples.)
>
> Best practice for such functionality is to use a common prefix that is both easy
> to recognize and easy to skip. For example we could use 'addr_limit_check' as the
> prefix:
>
>         addr_limit_check_failed()
>         addr_limit_check_syscall()
>
> No need to over-specify it that it's a "pre" check - it's obvious from existing
> implementation and should be documented in the function itself for new
> implementations.
>
> Harmonize the Kconfig namespace to the common prefix as well, i.e. use something
> like:
>
>         CONFIG_ADDR_LIMIT_CHECK
>
> No need to add 'ARCH' I think - an architecture that enables this should get it
> unconditionally.
>
> etc.
>
> It's all cobbled together I'm afraid and will need more iterations.

Make sense. Thanks for the feedback.

>
> Thanks,
>
>         Ingo



-- 
Thomas

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v7 1/4] syscalls: Restore address limit after a syscall
       [not found]     ` <20170425063305.hwjuxupa37rwe6zj-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-04-25 14:18       ` Thomas Garnier
  2017-04-26  8:12         ` Ingo Molnar
  0 siblings, 1 reply; 15+ messages in thread
From: Thomas Garnier @ 2017-04-25 14:18 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Martin Schwidefsky, Heiko Carstens, Arnd Bergmann, Dave Hansen,
	Andrew Morton, David Howells, René Nyffenegger,
	Paul E . McKenney, Thomas Gleixner, Oleg Nesterov,
	Stephen Smalley, Pavel Tikhomirov, Ingo Molnar, H . Peter Anvin,
	Andy Lutomirski, Paolo Bonzini, Kees Cook, Rik van Riel,
	Josh Poimboeuf

On Mon, Apr 24, 2017 at 11:33 PM, Ingo Molnar <mingo-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>
> * Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
>
>> This patch ensures a syscall does not return to user-mode with a kernel
>> address limit. If that happened, a process can corrupt kernel-mode
>> memory and elevate privileges.
>
> Don't start changelogs with 'This patch' - it's obvious that we are talking about
> this patch. Writing:
>
>    Ensure that a syscall does not return to user-mode with a kernel address limit.
>    If that happens, a process can corrupt kernel-mode memory and elevate
>    privileges.
>
> also note the spelling fix I did. (There's another spelling error elsewhere in
> this changelog as well.)

I will take your change and go over other commits.

>
> Please read changelogs!
>
>> For example, it would mitigation this bug:
>>
>> - https://bugs.chromium.org/p/project-zero/issues/detail?id=990
>>
>> The CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE option is also
>> added so each architecture can optimize this change.
>
> As I pointed it out in my previous reply this Kconfig name is awfully long - but
> it should have been obvious when this changelog was written ...
>
>> Signed-off-by: Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
>> Tested-by: Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
>> ---
>> Based on next-20170410
>> ---
>>  arch/s390/Kconfig        |  1 +
>>  include/linux/syscalls.h | 26 +++++++++++++++++++++++++-
>>  init/Kconfig             |  6 ++++++
>>  kernel/sys.c             | 13 +++++++++++++
>>  4 files changed, 45 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
>> index d25435d94b6e..489a0cc6e46b 100644
>> --- a/arch/s390/Kconfig
>> +++ b/arch/s390/Kconfig
>> @@ -103,6 +103,7 @@ config S390
>>       select ARCH_INLINE_WRITE_UNLOCK_BH
>>       select ARCH_INLINE_WRITE_UNLOCK_IRQ
>>       select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
>> +     select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>>       select ARCH_SAVE_PAGE_KEYS if HIBERNATION
>>       select ARCH_SUPPORTS_ATOMIC_RMW
>>       select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
>> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
>> index 980c3c9b06f8..801a7a74fe28 100644
>> --- a/include/linux/syscalls.h
>> +++ b/include/linux/syscalls.h
>> @@ -191,6 +191,27 @@ extern struct trace_event_functions exit_syscall_print_funcs;
>>       SYSCALL_METADATA(sname, x, __VA_ARGS__)                 \
>>       __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
>>
>> +
>> +/*
>> + * Called before coming back to user-mode. Returning to user-mode with an
>> + * address limit different than USER_DS can allow to overwrite kernel memory.
>> + */
>> +static inline void verify_pre_usermode_state(void) {
>> +     BUG_ON(!segment_eq(get_fs(), USER_DS));
>> +}
>
> Non-standard coding style.
>
>> +
>> +#ifndef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>> +#define __CHECK_USER_CALLER() \
>> +     bool user_caller = segment_eq(get_fs(), USER_DS)
>> +#define __VERIFY_PRE_USERMODE_STATE() \
>> +     if (user_caller) verify_pre_usermode_state()
>> +#else
>> +#define __CHECK_USER_CALLER()
>> +#define __VERIFY_PRE_USERMODE_STATE()
>> +asmlinkage void address_limit_check_failed(void);
>> +#endif
>> +
>> +
>>  #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
>>  #define __SYSCALL_DEFINEx(x, name, ...)                                      \
>>       asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))       \
>> @@ -199,7 +220,10 @@ extern struct trace_event_functions exit_syscall_print_funcs;
>>       asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));      \
>>       asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))       \
>>       {                                                               \
>> -             long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));  \
>> +             long ret;                                               \
>> +             __CHECK_USER_CALLER();                                  \
>> +             ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));       \
>> +             __VERIFY_PRE_USERMODE_STATE();                          \
>>               __MAP(x,__SC_TEST,__VA_ARGS__);                         \
>>               __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));       \
>>               return ret;                                             \
>
> BTW., the '__VERIFY_PRE_USERMODE_STATE()' name is highly misleading: the 'pre'
> prefix suggests that this is done before a system call - while it's done
> afterwards.
>
> The solution is to not try to specify the exact call placement in the name, just
> describe the functionality (and harmonize along the common prefix).

Yes, I think BEFORE would have made more sense but things are getting
really long so I am fine with your proposal in the previous thread.

>
>> +config ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>> +     bool
>> +     help
>> +       Disable the generic pre-usermode state verification. Allow each
>> +       architecture to optimize how and when the verification is done.
>> +
>
> Please name the Kconfig symbols something like this:
>
>         CONFIG_ADDR_LIMIT_CHECK
>         CONFIG_ADDR_LIMIT_CHECK_ARCH
>
> or so, which tells us whether the check is done by the architecture code, without
> breaking the col80 limit with a single Kconfig name.
>
> BTW:
>
>> +#ifdef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>> +/*
>> + * This function is called when an architecture specific implementation detected
>> + * an invalid address limit. The generic user-mode state checker will finish on
>> + * the appropriate BUG_ON.
>> + */
>> +asmlinkage void address_limit_check_failed(void)
>> +{
>> +     verify_pre_usermode_state();
>> +     panic("address_limit_check_failed called with a valid user-mode state");
>
> It's very unconstructive to unconditionally panic the system, just because some
> kernel code leaked the address limit! Do a warn-once printout and kill the current
> task (i.e. don't continue execution), but don't crash everything else!

The original change did not crash the kernel for this exact reason.
Through reviews, there was an overall agreement that the kernel should
not continue in this state.

>
> Thanks,
>
>         Ingo



-- 
Thomas

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v7 1/4] syscalls: Restore address limit after a syscall
  2017-04-25 14:18       ` Thomas Garnier
@ 2017-04-26  8:12         ` Ingo Molnar
       [not found]           ` <20170426081229.6wnugrs7w3at4xry-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2017-04-26  8:12 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Martin Schwidefsky, Heiko Carstens, Arnd Bergmann, Dave Hansen,
	Andrew Morton, David Howells, René Nyffenegger,
	Paul E . McKenney, Thomas Gleixner, Oleg Nesterov,
	Stephen Smalley, Pavel Tikhomirov, Ingo Molnar, H . Peter Anvin,
	Andy Lutomirski, Paolo Bonzini, Kees Cook, Rik van Riel,
	Josh Poimboeuf, Borislav Petkov, Brian Gerst,
	Kirill A . Shutemov


* Thomas Garnier <thgarnie@google.com> wrote:

> >> +#ifdef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
> >> +/*
> >> + * This function is called when an architecture specific implementation detected
> >> + * an invalid address limit. The generic user-mode state checker will finish on
> >> + * the appropriate BUG_ON.
> >> + */
> >> +asmlinkage void address_limit_check_failed(void)
> >> +{
> >> +     verify_pre_usermode_state();
> >> +     panic("address_limit_check_failed called with a valid user-mode state");
> >
> > It's very unconstructive to unconditionally panic the system, just because some
> > kernel code leaked the address limit! Do a warn-once printout and kill the current
> > task (i.e. don't continue execution), but don't crash everything else!
> 
> The original change did not crash the kernel for this exact reason.
> Through reviews, there was an overall agreement that the kernel should
> not continue in this state.

Ok, I guess we can try that - but the panic message is still pretty misleading:

	panic("address_limit_check_failed called with a valid user-mode state");

... so it was called with a _valid_ user-mode state, and we crash due to something 
valid? Huh?

( Also, the style rule applies to kernel messages as well: function names should 
  be referred to as "function_name()". )

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v7 1/4] syscalls: Restore address limit after a syscall
       [not found]           ` <20170426081229.6wnugrs7w3at4xry-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-04-26 14:09             ` Thomas Garnier
  0 siblings, 0 replies; 15+ messages in thread
From: Thomas Garnier @ 2017-04-26 14:09 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Martin Schwidefsky, Heiko Carstens, Arnd Bergmann, Dave Hansen,
	Andrew Morton, David Howells, René Nyffenegger,
	Paul E . McKenney, Thomas Gleixner, Oleg Nesterov,
	Stephen Smalley, Pavel Tikhomirov, Ingo Molnar, H . Peter Anvin,
	Andy Lutomirski, Paolo Bonzini, Kees Cook, Rik van Riel,
	Josh Poimboeuf

On Wed, Apr 26, 2017 at 1:12 AM, Ingo Molnar <mingo-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>
> * Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
>
>> >> +#ifdef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>> >> +/*
>> >> + * This function is called when an architecture specific implementation detected
>> >> + * an invalid address limit. The generic user-mode state checker will finish on
>> >> + * the appropriate BUG_ON.
>> >> + */
>> >> +asmlinkage void address_limit_check_failed(void)
>> >> +{
>> >> +     verify_pre_usermode_state();
>> >> +     panic("address_limit_check_failed called with a valid user-mode state");
>> >
>> > It's very unconstructive to unconditionally panic the system, just because some
>> > kernel code leaked the address limit! Do a warn-once printout and kill the current
>> > task (i.e. don't continue execution), but don't crash everything else!
>>
>> The original change did not crash the kernel for this exact reason.
>> Through reviews, there was an overall agreement that the kernel should
>> not continue in this state.
>
> Ok, I guess we can try that - but the panic message is still pretty misleading:
>
>         panic("address_limit_check_failed called with a valid user-mode state");
>
> ... so it was called with a _valid_ user-mode state, and we crash due to something
> valid? Huh?

Yes the message is accurate but I agree that it is misleading and I
will improve it. The address_limit_check_failed function is called by
assembly code on different architectures once the state was detected
as invalid. Instead of crashing at different places, we redirect to
the generic handler (verify_pre_usermode_state) that will crash on the
appropriate BUG_ON line. The address_limit_check_failed function is
not supposed to comeback, the panic call is just a safe guard.

>
> ( Also, the style rule applies to kernel messages as well: function names should
>   be referred to as "function_name()". )

Will change.

>
> Thanks,
>
>         Ingo



-- 
Thomas

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-04-26 14:09 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-10 16:44 [PATCH v7 1/4] syscalls: Restore address limit after a syscall Thomas Garnier
2017-04-10 16:44 ` [PATCH v7 3/4] arm/syscalls: Architecture specific pre-usermode check Thomas Garnier
2017-04-10 16:44 ` [PATCH v7 4/4] arm64/syscalls: " Thomas Garnier
2017-04-10 17:12   ` Catalin Marinas
     [not found]     ` <20170410171240.GE30647-M2fw3Uu6cmfZROr8t4l/smS4ubULX0JqMm0uRHvK7Nw@public.gmane.org>
2017-04-10 20:06       ` Thomas Garnier
     [not found]         ` <20170410200622.8654-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
2017-04-10 20:09           ` Thomas Garnier
2017-04-10 20:07       ` Thomas Garnier
     [not found] ` <20170410164420.64003-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
2017-04-10 16:44   ` [PATCH v7 2/4] x86/syscalls: " Thomas Garnier
2017-04-24 23:57   ` [PATCH v7 1/4] syscalls: Restore address limit after a syscall Kees Cook
     [not found]     ` <CAGXu5j+J67zC9tcncacHTDvXKMF3HZuPCivk-Uz4J6h-cEv-qQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-04-25  6:23       ` Ingo Molnar
     [not found]         ` <20170425062324.pdpi5v7ypobw74ki-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-25 14:12           ` Thomas Garnier
2017-04-25  6:33   ` Ingo Molnar
     [not found]     ` <20170425063305.hwjuxupa37rwe6zj-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-25 14:18       ` Thomas Garnier
2017-04-26  8:12         ` Ingo Molnar
     [not found]           ` <20170426081229.6wnugrs7w3at4xry-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-26 14:09             ` Thomas Garnier

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).