* [PATCH v4 1/4] syscalls: Restore address limit after a syscall @ 2017-03-22 20:38 Thomas Garnier 2017-03-22 20:38 ` [PATCH v4 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state Thomas Garnier ` (2 more replies) 0 siblings, 3 replies; 11+ messages in thread From: Thomas Garnier @ 2017-03-22 20:38 UTC (permalink / raw) To: Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells, Al Viro, Arnd Bergmann, Thomas Garnier, René Nyffenegger, Andrew Morton, Paul E . McKenney, Ingo Molnar, Thomas Gleixner, Oleg Nesterov, Pavel Tikhomirov, Stephen Smalley, Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Paolo Bonzini Cc: linux-s390-u79uwXL29TY76Z2rM5mHXA, linux-kernel-u79uwXL29TY76Z2rM5mHXA, linux-api-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A, linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, kernel-hardening-ZwoEplunGu1jrUoiu81ncdBPR1lH4CV8 This patch ensures a syscall does not return to user-mode with a kernel address limit. If that happened, a process can corrupt kernel-mode memory and elevate privileges. For example, it would mitigation this bug: - https://bugs.chromium.org/p/project-zero/issues/detail?id=990 If the CONFIG_BUG_ON_DATA_CORRUPTION option is enabled, an incorrect state will result in a BUG_ON. The CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE option is also added so each architecture can optimize this change. Signed-off-by: Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> --- Based on next-20170322 --- arch/s390/Kconfig | 1 + include/linux/syscalls.h | 18 +++++++++++++++++- init/Kconfig | 7 +++++++ kernel/sys.c | 8 ++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a2dcef0aacc7..b73f5b87bc99 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -103,6 +103,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 980c3c9b06f8..e659076adf6c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -191,6 +191,19 @@ extern struct trace_event_functions exit_syscall_print_funcs; SYSCALL_METADATA(sname, x, __VA_ARGS__) \ __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) +asmlinkage void verify_pre_usermode_state(void); + +#ifndef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE +#define __CHECK_USER_CALLER() \ + bool user_caller = segment_eq(get_fs(), USER_DS) +#define __VERIFY_PRE_USERMODE_STATE() \ + if (user_caller) verify_pre_usermode_state() +#else +#define __CHECK_USER_CALLER() +#define __VERIFY_PRE_USERMODE_STATE() +#endif + + #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__) #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ @@ -199,7 +212,10 @@ extern struct trace_event_functions exit_syscall_print_funcs; asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ - long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ + long ret; \ + __CHECK_USER_CALLER(); \ + ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ + __VERIFY_PRE_USERMODE_STATE(); \ __MAP(x,__SC_TEST,__VA_ARGS__); \ __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ return ret; \ diff --git a/init/Kconfig b/init/Kconfig index c859c993c26f..c4efc3a95e4a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1929,6 +1929,13 @@ config PROFILING config TRACEPOINTS bool +# +# Set by each architecture that want to optimize how verify_pre_usermode_state +# is called. +# +config ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE + bool + source "arch/Kconfig" endmenu # General setup diff --git a/kernel/sys.c b/kernel/sys.c index 196c7134bee6..411163ac9dc3 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2459,3 +2459,11 @@ COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) return 0; } #endif /* CONFIG_COMPAT */ + +/* Called before coming back to user-mode */ +asmlinkage void verify_pre_usermode_state(void) +{ + if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS), + "incorrect get_fs() on user-mode return")) + set_fs(USER_DS); +} -- 2.12.1.500.gab5fba24ee-goog ^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v4 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state 2017-03-22 20:38 [PATCH v4 1/4] syscalls: Restore address limit after a syscall Thomas Garnier @ 2017-03-22 20:38 ` Thomas Garnier 2017-03-22 20:38 ` [PATCH v4 3/4] arm/syscalls: " Thomas Garnier [not found] ` <20170322203834.67556-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> 2 siblings, 0 replies; 11+ messages in thread From: Thomas Garnier @ 2017-03-22 20:38 UTC (permalink / raw) To: Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells, Al Viro, Arnd Bergmann, Thomas Garnier, René Nyffenegger, Andrew Morton, Paul E . McKenney, Ingo Molnar, Thomas Gleixner, Oleg Nesterov, Pavel Tikhomirov, Stephen Smalley, Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Paolo Bonzini, Rik van Riel, Kees Cook, Josh Poimboeuf, Borislav Petkov Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel, kernel-hardening Implement specific usage of verify_pre_usermode_state for user-mode returns for x86. --- Based on next-20170322 --- arch/x86/Kconfig | 1 + arch/x86/entry/common.c | 3 +++ arch/x86/entry/entry_64.S | 8 ++++++++ arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++ arch/x86/include/asm/processor.h | 11 ----------- 5 files changed, 23 insertions(+), 11 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0e1bdadc8222..f48c96b834b5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -63,6 +63,7 @@ config X86 select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index cdefcfdd9e63..76ef050255c9 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -23,6 +23,7 @@ #include <linux/user-return-notifier.h> #include <linux/uprobes.h> #include <linux/livepatch.h> +#include <linux/syscalls.h> #include <asm/desc.h> #include <asm/traps.h> @@ -183,6 +184,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) struct thread_info *ti = current_thread_info(); u32 cached_flags; + verify_pre_usermode_state(); + if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) local_irq_disable(); diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d2b2a2948ffe..c079b010205c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -218,6 +218,14 @@ entry_SYSCALL_64_fastpath: testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) jnz 1f + /* + * If address limit is not based on user-mode, jump to slow path for + * additional security checks. + */ + movq $TASK_SIZE_MAX, %rcx + cmp %rcx, TASK_addr_limit(%r11) + jnz 1f + LOCKDEP_SYS_EXIT TRACE_IRQS_ON /* user mode is traced as IRQs on */ movq RIP(%rsp), %rcx diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 516593e66bd6..12fa851c7fa8 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -78,4 +78,15 @@ typedef struct { pteval_t pte; } pte_t; #define EARLY_DYNAMIC_PAGE_TABLES 64 +/* + * User space process size. 47bits minus one guard page. The guard + * page is necessary on Intel CPUs: if a SYSCALL instruction is at + * the highest possible canonical userspace address, then that + * syscall will enter the kernel with a non-canonical return + * address, and SYSRET will explode dangerously. We avoid this + * particular problem by preventing anything from being mapped + * at the maximum canonical address. + */ +#define TASK_SIZE_MAX ((_AC(1, UL) << 47) - PAGE_SIZE) + #endif /* _ASM_X86_PGTABLE_64_DEFS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3cada998a402..e80822582d3e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -825,17 +825,6 @@ static inline void spin_lock_prefetch(const void *x) #define KSTK_ESP(task) (task_pt_regs(task)->sp) #else -/* - * User space process size. 47bits minus one guard page. The guard - * page is necessary on Intel CPUs: if a SYSCALL instruction is at - * the highest possible canonical userspace address, then that - * syscall will enter the kernel with a non-canonical return - * address, and SYSRET will explode dangerously. We avoid this - * particular problem by preventing anything from being mapped - * at the maximum canonical address. - */ -#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) - /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -- 2.12.1.500.gab5fba24ee-goog ^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v4 3/4] arm/syscalls: Specific usage of verify_pre_usermode_state 2017-03-22 20:38 [PATCH v4 1/4] syscalls: Restore address limit after a syscall Thomas Garnier 2017-03-22 20:38 ` [PATCH v4 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state Thomas Garnier @ 2017-03-22 20:38 ` Thomas Garnier [not found] ` <20170322203834.67556-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> 2 siblings, 0 replies; 11+ messages in thread From: Thomas Garnier @ 2017-03-22 20:38 UTC (permalink / raw) To: Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells, Al Viro, Arnd Bergmann, Thomas Garnier, René Nyffenegger, Andrew Morton, Paul E . McKenney, Ingo Molnar, Thomas Gleixner, Oleg Nesterov, Pavel Tikhomirov, Stephen Smalley, Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Paolo Bonzini, Rik van Riel, Kees Cook, Josh Poimboeuf, Borislav Petkov Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel, kernel-hardening Implement specific usage of verify_pre_usermode_state for user-mode returns for arm. --- Based on next-20170322 --- arch/arm/Kconfig | 1 + arch/arm/include/asm/domain.h | 3 +++ arch/arm/kernel/entry-common.S | 32 +++++++++++++++++++++++++++++++- 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fcbc5ef1ec69..10c6dc3dfff9 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -12,6 +12,7 @@ config ARM select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 select ARCH_SUPPORTS_ATOMIC_RMW diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 99d9f630d6b6..fd1cd24ca9fe 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -58,6 +58,9 @@ #define domain_mask(dom) ((3) << (2 * (dom))) #define domain_val(dom,type) ((type) << (2 * (dom))) +#define DOMAIN_KERNEL_MASK domain_mask(DOMAIN_KERNEL) +#define DOMAIN_CLIENT_VALUE domain_val(DOMAIN_KERNEL, DOMAIN_CLIENT) + #ifdef CONFIG_CPU_SW_DOMAIN_PAN #define DACR_INIT \ (domain_val(DOMAIN_USER, DOMAIN_NOACCESS) | \ diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index eb5cd77bf1d8..8d9bfe9ff313 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -12,6 +12,7 @@ #include <asm/unistd.h> #include <asm/ftrace.h> #include <asm/unwind.h> +#include <asm/memory.h> #ifdef CONFIG_AEABI #include <asm/unistd-oabi.h> #endif @@ -27,7 +28,6 @@ #include "entry-header.S" - .align 5 #if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING)) /* @@ -40,9 +40,12 @@ ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) disable_irq_notrace @ disable interrupts + ldr r2, [tsk, #TI_ADDR_LIMIT] ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK bne fast_work_pending + cmp r2, #TASK_SIZE + blne addr_limit_fail /* perform architecture specific actions before user return */ arch_ret_to_user r1, lr @@ -66,6 +69,7 @@ ret_fast_syscall: UNWIND(.cantunwind ) str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 disable_irq_notrace @ disable interrupts + ldr r2, [tsk, #TI_ADDR_LIMIT] ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK beq no_work_pending @@ -82,6 +86,7 @@ slow_work_pending: mov r2, why @ 'syscall' bl do_work_pending cmp r0, #0 + ldreq r2, [tsk, #TI_ADDR_LIMIT] beq no_work_pending movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE) ldmia sp, {r0 - r6} @ have to reload r0 - r6 @@ -99,9 +104,12 @@ ret_slow_syscall: disable_irq_notrace @ disable interrupts ENTRY(ret_to_user_from_irq) ldr r1, [tsk, #TI_FLAGS] + ldr r2, [tsk, #TI_ADDR_LIMIT] tst r1, #_TIF_WORK_MASK bne slow_work_pending no_work_pending: + cmp r2, #TASK_SIZE + blne addr_limit_fail asm_trace_hardirqs_on save = 0 /* perform architecture specific actions before user return */ @@ -125,6 +133,28 @@ ENTRY(ret_from_fork) b ret_slow_syscall ENDPROC(ret_from_fork) +addr_limit_fail: +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION + stmfd sp!, {r0, lr} + bl verify_pre_usermode_state + ldmfd sp!, {r0, lr} +#else + /* + * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a + * warning. + */ + mov r2, #TASK_SIZE + str r2, [tsk, #TI_ADDR_LIMIT] +#ifdef CONFIG_CPU_USE_DOMAINS + /* Switch domain like modify_domain(DOMAIN_KERNEL, DOMAIN_CLIENT) */ + mrc p15, 0, r2, c3, c0, 0 @ Get domain register + bic r2, r2, #DOMAIN_KERNEL_MASK @ Clear the domain part + orr r2, r2, #DOMAIN_CLIENT_VALUE @ Enforce DOMAIN_CLIENT + mcr p15, 0, r2, c3, c0, 0 @ Set domain register +#endif +#endif + ret lr + /*============================================================================= * SWI handler *----------------------------------------------------------------------------- -- 2.12.1.500.gab5fba24ee-goog ^ permalink raw reply related [flat|nested] 11+ messages in thread
[parent not found: <20170322203834.67556-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>]
* [PATCH v4 4/4] arm64/syscalls: Specific usage of verify_pre_usermode_state [not found] ` <20170322203834.67556-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> @ 2017-03-22 20:38 ` Thomas Garnier 2017-03-22 20:44 ` [PATCH v4 1/4] syscalls: Restore address limit after a syscall Andy Lutomirski 1 sibling, 0 replies; 11+ messages in thread From: Thomas Garnier @ 2017-03-22 20:38 UTC (permalink / raw) To: Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells, Al Viro, Arnd Bergmann, Thomas Garnier, René Nyffenegger, Andrew Morton, Paul E . McKenney, Ingo Molnar, Thomas Gleixner, Oleg Nesterov, Pavel Tikhomirov, Stephen Smalley, Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Paolo Bonzini Cc: linux-s390-u79uwXL29TY76Z2rM5mHXA, linux-kernel-u79uwXL29TY76Z2rM5mHXA, linux-api-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A, linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, kernel-hardening-ZwoEplunGu1jrUoiu81ncdBPR1lH4CV8 Implement specific usage of verify_pre_usermode_state for user-mode returns for arm64. --- Based on next-20170322 --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/entry.S | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f2b0b528037d..0e86d87259f4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -24,6 +24,7 @@ config ARM64 select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE select ARM_AMBA select ARM_ARCH_TIMER select ARM_GIC diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 43512d4d7df2..685b43771ac9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -744,6 +744,10 @@ ENDPROC(cpu_switch_to) ret_fast_syscall: disable_irq // disable interrupts str x0, [sp, #S_X0] // returned x0 + ldr x2, [tsk, #TSK_TI_ADDR_LIMIT] // check addr limit change + mov x1, #TASK_SIZE_64 + cmp x2, x1 + b.ne addr_limit_fail ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing and x2, x1, #_TIF_SYSCALL_WORK cbnz x2, ret_fast_syscall_trace @@ -771,6 +775,11 @@ work_pending: */ ret_to_user: disable_irq // disable interrupts + ldr x2, [tsk, #TSK_TI_ADDR_LIMIT] // check addr limit change + mov x1, #TASK_SIZE_64 + cmp x2, x1 + b.ne addr_limit_fail + ldr x1, [tsk, #TSK_TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending @@ -779,6 +788,22 @@ finish_ret_to_user: kernel_exit 0 ENDPROC(ret_to_user) +addr_limit_fail: +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION + stp x0, lr, [sp,#-16]! + bl verify_pre_usermode_state + ldp x0, lr, [sp],#16 +#else + /* + * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a + * warning. + */ + mov x2, #TASK_SIZE_64 + str x2, [tsk, #TSK_TI_ADDR_LIMIT] + ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO) +#endif + ret lr + /* * This is how we return from a fork. */ -- 2.12.1.500.gab5fba24ee-goog ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v4 1/4] syscalls: Restore address limit after a syscall [not found] ` <20170322203834.67556-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> 2017-03-22 20:38 ` [PATCH v4 4/4] arm64/syscalls: " Thomas Garnier @ 2017-03-22 20:44 ` Andy Lutomirski 2017-03-22 20:49 ` Thomas Garnier 2017-03-22 20:54 ` H. Peter Anvin 1 sibling, 2 replies; 11+ messages in thread From: Andy Lutomirski @ 2017-03-22 20:44 UTC (permalink / raw) To: Thomas Garnier Cc: Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells, Al Viro, Arnd Bergmann, René Nyffenegger, Andrew Morton, Paul E . McKenney, Ingo Molnar, Thomas Gleixner, Oleg Nesterov, Pavel Tikhomirov, Stephen Smalley, Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Paolo Bonzini, Rik van Riel On Wed, Mar 22, 2017 at 1:38 PM, Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote: > This patch ensures a syscall does not return to user-mode with a kernel > address limit. If that happened, a process can corrupt kernel-mode > memory and elevate privileges. > > For example, it would mitigation this bug: > > - https://bugs.chromium.org/p/project-zero/issues/detail?id=990 > > If the CONFIG_BUG_ON_DATA_CORRUPTION option is enabled, an incorrect > state will result in a BUG_ON. I'm a bit confused about this choice of configurability. I can see two sensible choices: 1. Enable this hardening feature: BUG if there's an exploitable bug. 2. Don't enable it at all. While it's possible that silently papering over the bug is slightly faster than BUGging, it will allow bugs to continue to exist undetected. --Andy ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v4 1/4] syscalls: Restore address limit after a syscall 2017-03-22 20:44 ` [PATCH v4 1/4] syscalls: Restore address limit after a syscall Andy Lutomirski @ 2017-03-22 20:49 ` Thomas Garnier 2017-03-22 20:54 ` H. Peter Anvin 2017-03-22 20:54 ` H. Peter Anvin 1 sibling, 1 reply; 11+ messages in thread From: Thomas Garnier @ 2017-03-22 20:49 UTC (permalink / raw) To: Andy Lutomirski Cc: Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells, Al Viro, Arnd Bergmann, René Nyffenegger, Andrew Morton, Paul E . McKenney, Ingo Molnar, Thomas Gleixner, Oleg Nesterov, Pavel Tikhomirov, Stephen Smalley, Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Paolo Bonzini, Rik van Riel, Kees Cook, Josh Poimboeuf, Borislav Petkov, Brian Gerst On Wed, Mar 22, 2017 at 1:44 PM, Andy Lutomirski <luto@amacapital.net> wrote: > On Wed, Mar 22, 2017 at 1:38 PM, Thomas Garnier <thgarnie@google.com> wrote: >> This patch ensures a syscall does not return to user-mode with a kernel >> address limit. If that happened, a process can corrupt kernel-mode >> memory and elevate privileges. >> >> For example, it would mitigation this bug: >> >> - https://bugs.chromium.org/p/project-zero/issues/detail?id=990 >> >> If the CONFIG_BUG_ON_DATA_CORRUPTION option is enabled, an incorrect >> state will result in a BUG_ON. > > I'm a bit confused about this choice of configurability. I can see > two sensible choices: > > 1. Enable this hardening feature: BUG if there's an exploitable bug. > > 2. Don't enable it at all. > > While it's possible that silently papering over the bug is slightly > faster than BUGging, it will allow bugs to continue to exist > undetected. We can default to BUGging. I think my approach was avoiding doing a BUG_ON just to avoid breaking people. > > --Andy -- Thomas ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v4 1/4] syscalls: Restore address limit after a syscall 2017-03-22 20:49 ` Thomas Garnier @ 2017-03-22 20:54 ` H. Peter Anvin 2017-03-23 15:14 ` Thomas Garnier 0 siblings, 1 reply; 11+ messages in thread From: H. Peter Anvin @ 2017-03-22 20:54 UTC (permalink / raw) To: Thomas Garnier, Andy Lutomirski Cc: Mark Rutland, kernel-hardening, Catalin Marinas, Heiko Carstens, linux-kernel, David Howells, Dave Hansen, René Nyffenegger, Pavel Tikhomirov, linux-s390, X86 ML, Russell King, Will Deacon, Ingo Molnar, Christian Borntraeger, Ingo Molnar, Paul E . McKenney, Stephen Smalley, Rik van Riel, Vladimir Murzin On 03/22/17 13:49, Thomas Garnier wrote: > > We can default to BUGging. I think my approach was avoiding doing a > BUG_ON just to avoid breaking people. > Breaking on a potentially-exploitable bug is a feature. -hpa ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v4 1/4] syscalls: Restore address limit after a syscall 2017-03-22 20:54 ` H. Peter Anvin @ 2017-03-23 15:14 ` Thomas Garnier 2017-03-23 15:32 ` Borislav Petkov 0 siblings, 1 reply; 11+ messages in thread From: Thomas Garnier @ 2017-03-23 15:14 UTC (permalink / raw) To: H. Peter Anvin Cc: Andy Lutomirski, Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells, Al Viro, Arnd Bergmann, René Nyffenegger, Andrew Morton, Paul E . McKenney, Ingo Molnar, Thomas Gleixner, Oleg Nesterov, Pavel Tikhomirov, Stephen Smalley, Ingo Molnar, Andy Lutomirski, Paolo Bonzini, Rik van Riel Okay well then people are fine with a BUG_ON approach. I will do a next iteration tailored to that. I will also try to add the static inline suggestion from Peter. On Wed, Mar 22, 2017 at 1:54 PM, H. Peter Anvin <hpa@zytor.com> wrote: > On 03/22/17 13:49, Thomas Garnier wrote: >> >> We can default to BUGging. I think my approach was avoiding doing a >> BUG_ON just to avoid breaking people. >> > > Breaking on a potentially-exploitable bug is a feature. > > -hpa > > -- Thomas ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v4 1/4] syscalls: Restore address limit after a syscall 2017-03-23 15:14 ` Thomas Garnier @ 2017-03-23 15:32 ` Borislav Petkov [not found] ` <20170323153204.4lxglq6jlvvxp6ev-fF5Pk5pvG8Y@public.gmane.org> 0 siblings, 1 reply; 11+ messages in thread From: Borislav Petkov @ 2017-03-23 15:32 UTC (permalink / raw) To: Thomas Garnier Cc: H. Peter Anvin, Andy Lutomirski, Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells, Al Viro, Arnd Bergmann, René Nyffenegger, Andrew Morton, Paul E . McKenney, Ingo Molnar, Thomas Gleixner, Oleg Nesterov, Pavel Tikhomirov, Stephen Smalley, Ingo Molnar, Andy Lutomirski, Paolo Bonzini On Thu, Mar 23, 2017 at 08:14:44AM -0700, Thomas Garnier wrote: > Okay well then people are fine with a BUG_ON approach. I will do a > next iteration tailored to that. I will also try to add the static > inline suggestion from Peter. Would it be possible, please, to refrain from top-posting when replying on the ML? You sometimes reply inline and after the text and sometimes at the top. This subthread has both variants and it is really annoying to people like me who try to follow the discussion. Thanks. -- Regards/Gruss, Boris. Good mailing practices for 400: avoid top-posting and trim the reply. ^ permalink raw reply [flat|nested] 11+ messages in thread
[parent not found: <20170323153204.4lxglq6jlvvxp6ev-fF5Pk5pvG8Y@public.gmane.org>]
* Re: [PATCH v4 1/4] syscalls: Restore address limit after a syscall [not found] ` <20170323153204.4lxglq6jlvvxp6ev-fF5Pk5pvG8Y@public.gmane.org> @ 2017-03-23 15:40 ` Thomas Garnier 0 siblings, 0 replies; 11+ messages in thread From: Thomas Garnier @ 2017-03-23 15:40 UTC (permalink / raw) To: Borislav Petkov Cc: H. Peter Anvin, Andy Lutomirski, Martin Schwidefsky, Heiko Carstens, Dave Hansen, David Howells, Al Viro, Arnd Bergmann, René Nyffenegger, Andrew Morton, Paul E . McKenney, Ingo Molnar, Thomas Gleixner, Oleg Nesterov, Pavel Tikhomirov, Stephen Smalley, Ingo Molnar, Andy Lutomirski, Paolo Bonzini On Thu, Mar 23, 2017 at 8:32 AM, Borislav Petkov <bp-Gina5bIWoIWzQB+pC5nmwQ@public.gmane.org> wrote: > On Thu, Mar 23, 2017 at 08:14:44AM -0700, Thomas Garnier wrote: >> Okay well then people are fine with a BUG_ON approach. I will do a >> next iteration tailored to that. I will also try to add the static >> inline suggestion from Peter. > > Would it be possible, please, to refrain from top-posting when replying > on the ML? > > You sometimes reply inline and after the text and sometimes at the top. > This subthread has both variants and it is really annoying to people > like me who try to follow the discussion. Sure, I will try to always reply inline. Sorry bad habits. -- Thomas ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v4 1/4] syscalls: Restore address limit after a syscall 2017-03-22 20:44 ` [PATCH v4 1/4] syscalls: Restore address limit after a syscall Andy Lutomirski 2017-03-22 20:49 ` Thomas Garnier @ 2017-03-22 20:54 ` H. Peter Anvin 1 sibling, 0 replies; 11+ messages in thread From: H. Peter Anvin @ 2017-03-22 20:54 UTC (permalink / raw) To: Andy Lutomirski, Thomas Garnier Cc: Mark Rutland, kernel-hardening, Catalin Marinas, Heiko Carstens, linux-kernel, David Howells, Dave Hansen, René Nyffenegger, Pavel Tikhomirov, linux-s390, X86 ML, Russell King, Will Deacon, Ingo Molnar, Christian Borntraeger, Ingo Molnar, Paul E . McKenney, Stephen Smalley, Rik van Riel, Vladimir Murzin On 03/22/17 13:44, Andy Lutomirski wrote: > > While it's possible that silently papering over the bug is slightly > faster than BUGging, it will allow bugs to continue to exist > undetected. > It would also allow the test to be inlined (at least on architectures which have a one-site implementation) and have only the failure case out of line, with a __noreturn annotation (which allows it to be jumped to rather than called, which is usually available as a conditional operation whereas call often isn't.) That is... extern void __noreturn __pre_usermode_state_invalid(void); static void verify_pre_usermode_state(void) { if (unlikely(!segment_eq(get_fs(), USER_DS)) __pre_usermode_state_invalid(); } -hpa ^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2017-03-23 15:40 UTC | newest] Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2017-03-22 20:38 [PATCH v4 1/4] syscalls: Restore address limit after a syscall Thomas Garnier 2017-03-22 20:38 ` [PATCH v4 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state Thomas Garnier 2017-03-22 20:38 ` [PATCH v4 3/4] arm/syscalls: " Thomas Garnier [not found] ` <20170322203834.67556-1-thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> 2017-03-22 20:38 ` [PATCH v4 4/4] arm64/syscalls: " Thomas Garnier 2017-03-22 20:44 ` [PATCH v4 1/4] syscalls: Restore address limit after a syscall Andy Lutomirski 2017-03-22 20:49 ` Thomas Garnier 2017-03-22 20:54 ` H. Peter Anvin 2017-03-23 15:14 ` Thomas Garnier 2017-03-23 15:32 ` Borislav Petkov [not found] ` <20170323153204.4lxglq6jlvvxp6ev-fF5Pk5pvG8Y@public.gmane.org> 2017-03-23 15:40 ` Thomas Garnier 2017-03-22 20:54 ` H. Peter Anvin
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).