linux-api.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 1/4] syscalls: Restore address limit after a syscall
@ 2017-03-22 20:38 Thomas Garnier
  2017-03-22 20:38 ` [PATCH v4 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state Thomas Garnier
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Thomas Garnier @ 2017-03-22 20:38 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells,
	Al Viro, Arnd Bergmann, Thomas Garnier, René Nyffenegger,
	Andrew Morton, Paul E . McKenney, Ingo Molnar, Thomas Gleixner,
	Oleg Nesterov, Pavel Tikhomirov, Stephen Smalley, Ingo Molnar,
	H . Peter Anvin, Andy Lutomirski, Paolo Bonzini
  Cc: linux-s390-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	kernel-hardening-ZwoEplunGu1jrUoiu81ncdBPR1lH4CV8

This patch ensures a syscall does not return to user-mode with a kernel
address limit. If that happened, a process can corrupt kernel-mode
memory and elevate privileges.

For example, it would mitigation this bug:

- https://bugs.chromium.org/p/project-zero/issues/detail?id=990

If the CONFIG_BUG_ON_DATA_CORRUPTION option is enabled, an incorrect
state will result in a BUG_ON.

The CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE option is also
added so each architecture can optimize this change.

Signed-off-by: Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
Based on next-20170322
---
 arch/s390/Kconfig        |  1 +
 include/linux/syscalls.h | 18 +++++++++++++++++-
 init/Kconfig             |  7 +++++++
 kernel/sys.c             |  8 ++++++++
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a2dcef0aacc7..b73f5b87bc99 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -103,6 +103,7 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_BH
 	select ARCH_INLINE_WRITE_UNLOCK_IRQ
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_NUMA_BALANCING
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 980c3c9b06f8..e659076adf6c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -191,6 +191,19 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 	SYSCALL_METADATA(sname, x, __VA_ARGS__)			\
 	__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
 
+asmlinkage void verify_pre_usermode_state(void);
+
+#ifndef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
+#define __CHECK_USER_CALLER() \
+	bool user_caller = segment_eq(get_fs(), USER_DS)
+#define __VERIFY_PRE_USERMODE_STATE() \
+	if (user_caller) verify_pre_usermode_state()
+#else
+#define __CHECK_USER_CALLER()
+#define __VERIFY_PRE_USERMODE_STATE()
+#endif
+
+
 #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
 #define __SYSCALL_DEFINEx(x, name, ...)					\
 	asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))	\
@@ -199,7 +212,10 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
 	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
 	{								\
-		long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
+		long ret;						\
+		__CHECK_USER_CALLER();					\
+		ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
+		__VERIFY_PRE_USERMODE_STATE();				\
 		__MAP(x,__SC_TEST,__VA_ARGS__);				\
 		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));	\
 		return ret;						\
diff --git a/init/Kconfig b/init/Kconfig
index c859c993c26f..c4efc3a95e4a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1929,6 +1929,13 @@ config PROFILING
 config TRACEPOINTS
 	bool
 
+#
+# Set by each architecture that want to optimize how verify_pre_usermode_state
+# is called.
+#
+config ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
+	bool
+
 source "arch/Kconfig"
 
 endmenu		# General setup
diff --git a/kernel/sys.c b/kernel/sys.c
index 196c7134bee6..411163ac9dc3 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2459,3 +2459,11 @@ COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
 	return 0;
 }
 #endif /* CONFIG_COMPAT */
+
+/* Called before coming back to user-mode */
+asmlinkage void verify_pre_usermode_state(void)
+{
+	if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS),
+				  "incorrect get_fs() on user-mode return"))
+		set_fs(USER_DS);
+}
-- 
2.12.1.500.gab5fba24ee-goog

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-22 20:38 [PATCH v4 1/4] syscalls: Restore address limit after a syscall Thomas Garnier
@ 2017-03-22 20:38 ` Thomas Garnier
  2017-03-22 20:38 ` [PATCH v4 3/4] arm/syscalls: " Thomas Garnier
       [not found] ` <20170322203834.67556-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
  2 siblings, 0 replies; 11+ messages in thread
From: Thomas Garnier @ 2017-03-22 20:38 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells,
	Al Viro, Arnd Bergmann, Thomas Garnier, René Nyffenegger,
	Andrew Morton, Paul E . McKenney, Ingo Molnar, Thomas Gleixner,
	Oleg Nesterov, Pavel Tikhomirov, Stephen Smalley, Ingo Molnar,
	H . Peter Anvin, Andy Lutomirski, Paolo Bonzini, Rik van Riel,
	Kees Cook, Josh Poimboeuf, Borislav Petkov
  Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

Implement specific usage of verify_pre_usermode_state for user-mode
returns for x86.
---
Based on next-20170322
---
 arch/x86/Kconfig                        |  1 +
 arch/x86/entry/common.c                 |  3 +++
 arch/x86/entry/entry_64.S               |  8 ++++++++
 arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
 arch/x86/include/asm/processor.h        | 11 -----------
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0e1bdadc8222..f48c96b834b5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -63,6 +63,7 @@ config X86
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
 	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index cdefcfdd9e63..76ef050255c9 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -23,6 +23,7 @@
 #include <linux/user-return-notifier.h>
 #include <linux/uprobes.h>
 #include <linux/livepatch.h>
+#include <linux/syscalls.h>
 
 #include <asm/desc.h>
 #include <asm/traps.h>
@@ -183,6 +184,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	struct thread_info *ti = current_thread_info();
 	u32 cached_flags;
 
+	verify_pre_usermode_state();
+
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
 		local_irq_disable();
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d2b2a2948ffe..c079b010205c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -218,6 +218,14 @@ entry_SYSCALL_64_fastpath:
 	testl	$_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
 	jnz	1f
 
+	/*
+	 * If address limit is not based on user-mode, jump to slow path for
+	 * additional security checks.
+	 */
+	movq	$TASK_SIZE_MAX, %rcx
+	cmp	%rcx, TASK_addr_limit(%r11)
+	jnz	1f
+
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
 	movq	RIP(%rsp), %rcx
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 516593e66bd6..12fa851c7fa8 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -78,4 +78,15 @@ typedef struct { pteval_t pte; } pte_t;
 
 #define EARLY_DYNAMIC_PAGE_TABLES	64
 
+/*
+ * User space process size. 47bits minus one guard page.  The guard
+ * page is necessary on Intel CPUs: if a SYSCALL instruction is at
+ * the highest possible canonical userspace address, then that
+ * syscall will enter the kernel with a non-canonical return
+ * address, and SYSRET will explode dangerously.  We avoid this
+ * particular problem by preventing anything from being mapped
+ * at the maximum canonical address.
+ */
+#define TASK_SIZE_MAX	((_AC(1, UL) << 47) - PAGE_SIZE)
+
 #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3cada998a402..e80822582d3e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -825,17 +825,6 @@ static inline void spin_lock_prefetch(const void *x)
 #define KSTK_ESP(task)		(task_pt_regs(task)->sp)
 
 #else
-/*
- * User space process size. 47bits minus one guard page.  The guard
- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
- * the highest possible canonical userspace address, then that
- * syscall will enter the kernel with a non-canonical return
- * address, and SYSRET will explode dangerously.  We avoid this
- * particular problem by preventing anything from being mapped
- * at the maximum canonical address.
- */
-#define TASK_SIZE_MAX	((1UL << 47) - PAGE_SIZE)
-
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-- 
2.12.1.500.gab5fba24ee-goog

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 3/4] arm/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-22 20:38 [PATCH v4 1/4] syscalls: Restore address limit after a syscall Thomas Garnier
  2017-03-22 20:38 ` [PATCH v4 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state Thomas Garnier
@ 2017-03-22 20:38 ` Thomas Garnier
       [not found] ` <20170322203834.67556-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
  2 siblings, 0 replies; 11+ messages in thread
From: Thomas Garnier @ 2017-03-22 20:38 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells,
	Al Viro, Arnd Bergmann, Thomas Garnier, René Nyffenegger,
	Andrew Morton, Paul E . McKenney, Ingo Molnar, Thomas Gleixner,
	Oleg Nesterov, Pavel Tikhomirov, Stephen Smalley, Ingo Molnar,
	H . Peter Anvin, Andy Lutomirski, Paolo Bonzini, Rik van Riel,
	Kees Cook, Josh Poimboeuf, Borislav Petkov
  Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

Implement specific usage of verify_pre_usermode_state for user-mode
returns for arm.
---
Based on next-20170322
---
 arch/arm/Kconfig               |  1 +
 arch/arm/include/asm/domain.h  |  3 +++
 arch/arm/kernel/entry-common.S | 32 +++++++++++++++++++++++++++++++-
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index fcbc5ef1ec69..10c6dc3dfff9 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -12,6 +12,7 @@ config ARM
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
 	select ARCH_SUPPORTS_ATOMIC_RMW
diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index 99d9f630d6b6..fd1cd24ca9fe 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -58,6 +58,9 @@
 #define domain_mask(dom)	((3) << (2 * (dom)))
 #define domain_val(dom,type)	((type) << (2 * (dom)))
 
+#define DOMAIN_KERNEL_MASK	domain_mask(DOMAIN_KERNEL)
+#define DOMAIN_CLIENT_VALUE	domain_val(DOMAIN_KERNEL, DOMAIN_CLIENT)
+
 #ifdef CONFIG_CPU_SW_DOMAIN_PAN
 #define DACR_INIT \
 	(domain_val(DOMAIN_USER, DOMAIN_NOACCESS) | \
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index eb5cd77bf1d8..8d9bfe9ff313 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -12,6 +12,7 @@
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
 #include <asm/unwind.h>
+#include <asm/memory.h>
 #ifdef CONFIG_AEABI
 #include <asm/unistd-oabi.h>
 #endif
@@ -27,7 +28,6 @@
 
 #include "entry-header.S"
 
-
 	.align	5
 #if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING))
 /*
@@ -40,9 +40,12 @@ ret_fast_syscall:
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	bne	fast_work_pending
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_fail
 
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
@@ -66,6 +69,7 @@ ret_fast_syscall:
  UNWIND(.cantunwind	)
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	beq	no_work_pending
@@ -82,6 +86,7 @@ slow_work_pending:
 	mov	r2, why				@ 'syscall'
 	bl	do_work_pending
 	cmp	r0, #0
+	ldreq   r2, [tsk, #TI_ADDR_LIMIT]
 	beq	no_work_pending
 	movlt	scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
 	ldmia	sp, {r0 - r6}			@ have to reload r0 - r6
@@ -99,9 +104,12 @@ ret_slow_syscall:
 	disable_irq_notrace			@ disable interrupts
 ENTRY(ret_to_user_from_irq)
 	ldr	r1, [tsk, #TI_FLAGS]
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
 	tst	r1, #_TIF_WORK_MASK
 	bne	slow_work_pending
 no_work_pending:
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_fail
 	asm_trace_hardirqs_on save = 0
 
 	/* perform architecture specific actions before user return */
@@ -125,6 +133,28 @@ ENTRY(ret_from_fork)
 	b	ret_slow_syscall
 ENDPROC(ret_from_fork)
 
+addr_limit_fail:
+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
+	stmfd	sp!, {r0, lr}
+	bl	verify_pre_usermode_state
+	ldmfd	sp!, {r0, lr}
+#else
+	/*
+	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
+	 * warning.
+	 */
+	mov	r2, #TASK_SIZE
+	str	r2, [tsk, #TI_ADDR_LIMIT]
+#ifdef CONFIG_CPU_USE_DOMAINS
+	/* Switch domain like modify_domain(DOMAIN_KERNEL, DOMAIN_CLIENT) */
+	mrc	p15, 0, r2, c3, c0, 0		@ Get domain register
+	bic	r2, r2, #DOMAIN_KERNEL_MASK	@ Clear the domain part
+	orr	r2, r2, #DOMAIN_CLIENT_VALUE	@ Enforce DOMAIN_CLIENT
+	mcr	p15, 0, r2, c3, c0, 0		@ Set domain register
+#endif
+#endif
+	ret     lr
+
 /*=============================================================================
  * SWI handler
  *-----------------------------------------------------------------------------
-- 
2.12.1.500.gab5fba24ee-goog

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 4/4] arm64/syscalls: Specific usage of verify_pre_usermode_state
       [not found] ` <20170322203834.67556-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
@ 2017-03-22 20:38   ` Thomas Garnier
  2017-03-22 20:44   ` [PATCH v4 1/4] syscalls: Restore address limit after a syscall Andy Lutomirski
  1 sibling, 0 replies; 11+ messages in thread
From: Thomas Garnier @ 2017-03-22 20:38 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells,
	Al Viro, Arnd Bergmann, Thomas Garnier, René Nyffenegger,
	Andrew Morton, Paul E . McKenney, Ingo Molnar, Thomas Gleixner,
	Oleg Nesterov, Pavel Tikhomirov, Stephen Smalley, Ingo Molnar,
	H . Peter Anvin, Andy Lutomirski, Paolo Bonzini
  Cc: linux-s390-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	kernel-hardening-ZwoEplunGu1jrUoiu81ncdBPR1lH4CV8

Implement specific usage of verify_pre_usermode_state for user-mode
returns for arm64.
---
Based on next-20170322
---
 arch/arm64/Kconfig        |  1 +
 arch/arm64/kernel/entry.S | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f2b0b528037d..0e86d87259f4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -24,6 +24,7 @@ config ARM64
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 43512d4d7df2..685b43771ac9 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -744,6 +744,10 @@ ENDPROC(cpu_switch_to)
 ret_fast_syscall:
 	disable_irq				// disable interrupts
 	str	x0, [sp, #S_X0]			// returned x0
+	ldr	x2, [tsk, #TSK_TI_ADDR_LIMIT]	// check addr limit change
+	mov	x1, #TASK_SIZE_64
+	cmp	x2, x1
+	b.ne	addr_limit_fail
 	ldr	x1, [tsk, #TSK_TI_FLAGS]	// re-check for syscall tracing
 	and	x2, x1, #_TIF_SYSCALL_WORK
 	cbnz	x2, ret_fast_syscall_trace
@@ -771,6 +775,11 @@ work_pending:
  */
 ret_to_user:
 	disable_irq				// disable interrupts
+	ldr	x2, [tsk, #TSK_TI_ADDR_LIMIT]	// check addr limit change
+	mov	x1, #TASK_SIZE_64
+	cmp	x2, x1
+	b.ne	addr_limit_fail
+
 	ldr	x1, [tsk, #TSK_TI_FLAGS]
 	and	x2, x1, #_TIF_WORK_MASK
 	cbnz	x2, work_pending
@@ -779,6 +788,22 @@ finish_ret_to_user:
 	kernel_exit 0
 ENDPROC(ret_to_user)
 
+addr_limit_fail:
+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
+	stp	x0, lr, [sp,#-16]!
+	bl	verify_pre_usermode_state
+	ldp	x0, lr, [sp],#16
+#else
+	/*
+	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
+	 * warning.
+	 */
+	mov	x2, #TASK_SIZE_64
+	str	x2, [tsk, #TSK_TI_ADDR_LIMIT]
+	ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO)
+#endif
+	ret	lr
+
 /*
  * This is how we return from a fork.
  */
-- 
2.12.1.500.gab5fba24ee-goog

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v4 1/4] syscalls: Restore address limit after a syscall
       [not found] ` <20170322203834.67556-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
  2017-03-22 20:38   ` [PATCH v4 4/4] arm64/syscalls: " Thomas Garnier
@ 2017-03-22 20:44   ` Andy Lutomirski
  2017-03-22 20:49     ` Thomas Garnier
  2017-03-22 20:54     ` H. Peter Anvin
  1 sibling, 2 replies; 11+ messages in thread
From: Andy Lutomirski @ 2017-03-22 20:44 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells,
	Al Viro, Arnd Bergmann, René Nyffenegger, Andrew Morton,
	Paul E . McKenney, Ingo Molnar, Thomas Gleixner, Oleg Nesterov,
	Pavel Tikhomirov, Stephen Smalley, Ingo Molnar, H . Peter Anvin,
	Andy Lutomirski, Paolo Bonzini, Rik van Riel

On Wed, Mar 22, 2017 at 1:38 PM, Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
> This patch ensures a syscall does not return to user-mode with a kernel
> address limit. If that happened, a process can corrupt kernel-mode
> memory and elevate privileges.
>
> For example, it would mitigation this bug:
>
> - https://bugs.chromium.org/p/project-zero/issues/detail?id=990
>
> If the CONFIG_BUG_ON_DATA_CORRUPTION option is enabled, an incorrect
> state will result in a BUG_ON.

I'm a bit confused about this choice of configurability.  I can see
two sensible choices:

1. Enable this hardening feature: BUG if there's an exploitable bug.

2. Don't enable it at all.

While it's possible that silently papering over the bug is slightly
faster than BUGging, it will allow bugs to continue to exist
undetected.

--Andy

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4 1/4] syscalls: Restore address limit after a syscall
  2017-03-22 20:44   ` [PATCH v4 1/4] syscalls: Restore address limit after a syscall Andy Lutomirski
@ 2017-03-22 20:49     ` Thomas Garnier
  2017-03-22 20:54       ` H. Peter Anvin
  2017-03-22 20:54     ` H. Peter Anvin
  1 sibling, 1 reply; 11+ messages in thread
From: Thomas Garnier @ 2017-03-22 20:49 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells,
	Al Viro, Arnd Bergmann, René Nyffenegger, Andrew Morton,
	Paul E . McKenney, Ingo Molnar, Thomas Gleixner, Oleg Nesterov,
	Pavel Tikhomirov, Stephen Smalley, Ingo Molnar, H . Peter Anvin,
	Andy Lutomirski, Paolo Bonzini, Rik van Riel, Kees Cook,
	Josh Poimboeuf, Borislav Petkov, Brian Gerst

On Wed, Mar 22, 2017 at 1:44 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Wed, Mar 22, 2017 at 1:38 PM, Thomas Garnier <thgarnie@google.com> wrote:
>> This patch ensures a syscall does not return to user-mode with a kernel
>> address limit. If that happened, a process can corrupt kernel-mode
>> memory and elevate privileges.
>>
>> For example, it would mitigation this bug:
>>
>> - https://bugs.chromium.org/p/project-zero/issues/detail?id=990
>>
>> If the CONFIG_BUG_ON_DATA_CORRUPTION option is enabled, an incorrect
>> state will result in a BUG_ON.
>
> I'm a bit confused about this choice of configurability.  I can see
> two sensible choices:
>
> 1. Enable this hardening feature: BUG if there's an exploitable bug.
>
> 2. Don't enable it at all.
>
> While it's possible that silently papering over the bug is slightly
> faster than BUGging, it will allow bugs to continue to exist
> undetected.

We can default to BUGging. I think my approach was avoiding doing a
BUG_ON just to avoid breaking people.

>
> --Andy



-- 
Thomas

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4 1/4] syscalls: Restore address limit after a syscall
  2017-03-22 20:44   ` [PATCH v4 1/4] syscalls: Restore address limit after a syscall Andy Lutomirski
  2017-03-22 20:49     ` Thomas Garnier
@ 2017-03-22 20:54     ` H. Peter Anvin
  1 sibling, 0 replies; 11+ messages in thread
From: H. Peter Anvin @ 2017-03-22 20:54 UTC (permalink / raw)
  To: Andy Lutomirski, Thomas Garnier
  Cc: Mark Rutland, kernel-hardening, Catalin Marinas, Heiko Carstens,
	linux-kernel, David Howells, Dave Hansen, René Nyffenegger,
	Pavel Tikhomirov, linux-s390, X86 ML, Russell King, Will Deacon,
	Ingo Molnar, Christian Borntraeger, Ingo Molnar,
	Paul E . McKenney, Stephen Smalley, Rik van Riel,
	Vladimir Murzin

On 03/22/17 13:44, Andy Lutomirski wrote:
> 
> While it's possible that silently papering over the bug is slightly
> faster than BUGging, it will allow bugs to continue to exist
> undetected.
> 

It would also allow the test to be inlined (at least on architectures
which have a one-site implementation) and have only the failure case out
of line, with a __noreturn annotation (which allows it to be jumped to
rather than called, which is usually available as a conditional
operation whereas call often isn't.)

That is...

extern void __noreturn __pre_usermode_state_invalid(void);

static void verify_pre_usermode_state(void)
{
	if (unlikely(!segment_eq(get_fs(), USER_DS))
		__pre_usermode_state_invalid();
}

	-hpa

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4 1/4] syscalls: Restore address limit after a syscall
  2017-03-22 20:49     ` Thomas Garnier
@ 2017-03-22 20:54       ` H. Peter Anvin
  2017-03-23 15:14         ` Thomas Garnier
  0 siblings, 1 reply; 11+ messages in thread
From: H. Peter Anvin @ 2017-03-22 20:54 UTC (permalink / raw)
  To: Thomas Garnier, Andy Lutomirski
  Cc: Mark Rutland, kernel-hardening, Catalin Marinas, Heiko Carstens,
	linux-kernel, David Howells, Dave Hansen, René Nyffenegger,
	Pavel Tikhomirov, linux-s390, X86 ML, Russell King, Will Deacon,
	Ingo Molnar, Christian Borntraeger, Ingo Molnar,
	Paul E . McKenney, Stephen Smalley, Rik van Riel,
	Vladimir Murzin

On 03/22/17 13:49, Thomas Garnier wrote:
> 
> We can default to BUGging. I think my approach was avoiding doing a
> BUG_ON just to avoid breaking people.
> 

Breaking on a potentially-exploitable bug is a feature.

	-hpa

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4 1/4] syscalls: Restore address limit after a syscall
  2017-03-22 20:54       ` H. Peter Anvin
@ 2017-03-23 15:14         ` Thomas Garnier
  2017-03-23 15:32           ` Borislav Petkov
  0 siblings, 1 reply; 11+ messages in thread
From: Thomas Garnier @ 2017-03-23 15:14 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Martin Schwidefsky, Heiko Carstens, Dave Hansen,
	David Howells, Al Viro, Arnd Bergmann, René Nyffenegger,
	Andrew Morton, Paul E . McKenney, Ingo Molnar, Thomas Gleixner,
	Oleg Nesterov, Pavel Tikhomirov, Stephen Smalley, Ingo Molnar,
	Andy Lutomirski, Paolo Bonzini, Rik van Riel

Okay well then people are fine with a BUG_ON approach. I will do a
next iteration tailored to that. I will also try to add the static
inline suggestion from Peter.

On Wed, Mar 22, 2017 at 1:54 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 03/22/17 13:49, Thomas Garnier wrote:
>>
>> We can default to BUGging. I think my approach was avoiding doing a
>> BUG_ON just to avoid breaking people.
>>
>
> Breaking on a potentially-exploitable bug is a feature.
>
>         -hpa
>
>



-- 
Thomas

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4 1/4] syscalls: Restore address limit after a syscall
  2017-03-23 15:14         ` Thomas Garnier
@ 2017-03-23 15:32           ` Borislav Petkov
       [not found]             ` <20170323153204.4lxglq6jlvvxp6ev-fF5Pk5pvG8Y@public.gmane.org>
  0 siblings, 1 reply; 11+ messages in thread
From: Borislav Petkov @ 2017-03-23 15:32 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: H. Peter Anvin, Andy Lutomirski, Martin Schwidefsky,
	Heiko Carstens, Dave Hansen, David Howells, Al Viro,
	Arnd Bergmann, René Nyffenegger, Andrew Morton,
	Paul E . McKenney, Ingo Molnar, Thomas Gleixner, Oleg Nesterov,
	Pavel Tikhomirov, Stephen Smalley, Ingo Molnar, Andy Lutomirski,
	Paolo Bonzini

On Thu, Mar 23, 2017 at 08:14:44AM -0700, Thomas Garnier wrote:
> Okay well then people are fine with a BUG_ON approach. I will do a
> next iteration tailored to that. I will also try to add the static
> inline suggestion from Peter.

Would it be possible, please, to refrain from top-posting when replying
on the ML?

You sometimes reply inline and after the text and sometimes at the top.
This subthread has both variants and it is really annoying to people
like me who try to follow the discussion.

Thanks.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4 1/4] syscalls: Restore address limit after a syscall
       [not found]             ` <20170323153204.4lxglq6jlvvxp6ev-fF5Pk5pvG8Y@public.gmane.org>
@ 2017-03-23 15:40               ` Thomas Garnier
  0 siblings, 0 replies; 11+ messages in thread
From: Thomas Garnier @ 2017-03-23 15:40 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: H. Peter Anvin, Andy Lutomirski, Martin Schwidefsky,
	Heiko Carstens, Dave Hansen, David Howells, Al Viro,
	Arnd Bergmann, René Nyffenegger, Andrew Morton,
	Paul E . McKenney, Ingo Molnar, Thomas Gleixner, Oleg Nesterov,
	Pavel Tikhomirov, Stephen Smalley, Ingo Molnar, Andy Lutomirski,
	Paolo Bonzini

On Thu, Mar 23, 2017 at 8:32 AM, Borislav Petkov <bp-Gina5bIWoIWzQB+pC5nmwQ@public.gmane.org> wrote:
> On Thu, Mar 23, 2017 at 08:14:44AM -0700, Thomas Garnier wrote:
>> Okay well then people are fine with a BUG_ON approach. I will do a
>> next iteration tailored to that. I will also try to add the static
>> inline suggestion from Peter.
>
> Would it be possible, please, to refrain from top-posting when replying
> on the ML?
>
> You sometimes reply inline and after the text and sometimes at the top.
> This subthread has both variants and it is really annoying to people
> like me who try to follow the discussion.

Sure, I will try to always reply inline. Sorry bad habits.

-- 
Thomas

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-03-23 15:40 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-22 20:38 [PATCH v4 1/4] syscalls: Restore address limit after a syscall Thomas Garnier
2017-03-22 20:38 ` [PATCH v4 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state Thomas Garnier
2017-03-22 20:38 ` [PATCH v4 3/4] arm/syscalls: " Thomas Garnier
     [not found] ` <20170322203834.67556-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
2017-03-22 20:38   ` [PATCH v4 4/4] arm64/syscalls: " Thomas Garnier
2017-03-22 20:44   ` [PATCH v4 1/4] syscalls: Restore address limit after a syscall Andy Lutomirski
2017-03-22 20:49     ` Thomas Garnier
2017-03-22 20:54       ` H. Peter Anvin
2017-03-23 15:14         ` Thomas Garnier
2017-03-23 15:32           ` Borislav Petkov
     [not found]             ` <20170323153204.4lxglq6jlvvxp6ev-fF5Pk5pvG8Y@public.gmane.org>
2017-03-23 15:40               ` Thomas Garnier
2017-03-22 20:54     ` H. Peter Anvin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).