All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/4] syscalls: Restore address limit after a syscall
@ 2017-03-11  0:04 ` Thomas Garnier
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-11  0:04 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, Thomas Garnier, René Nyffenegger,
	Andrew Morton, Kees Cook, Paul E . McKenney, Andy Lutomirski,
	Ard Biesheuvel, Nicolas Pitre, Petr Mladek,
	Sebastian Andrzej Siewior, Sergey Senozhatsky, Helge Deller,
	Rik van Riel, Ingo Molnar, John Stultz, Thomas Gleixner
  Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

This patch ensures a syscall does not return to user-mode with a kernel
address limit. If that happened, a process can corrupt kernel-mode
memory and elevate privileges.

For example, it would mitigation this bug:

- https://bugs.chromium.org/p/project-zero/issues/detail?id=990

If the CONFIG_BUG_ON_DATA_CORRUPTION option is enabled, an incorrect
state will result in a BUG_ON.

The CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE option is also
added so each architecture can optimize this change.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170308
---
 arch/s390/Kconfig        |  1 +
 include/linux/syscalls.h | 18 +++++++++++++++++-
 init/Kconfig             |  7 +++++++
 kernel/sys.c             |  8 ++++++++
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a2dcef0aacc7..b73f5b87bc99 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -103,6 +103,7 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_BH
 	select ARCH_INLINE_WRITE_UNLOCK_IRQ
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_NUMA_BALANCING
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 980c3c9b06f8..e659076adf6c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -191,6 +191,19 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 	SYSCALL_METADATA(sname, x, __VA_ARGS__)			\
 	__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
 
+asmlinkage void verify_pre_usermode_state(void);
+
+#ifndef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
+#define __CHECK_USER_CALLER() \
+	bool user_caller = segment_eq(get_fs(), USER_DS)
+#define __VERIFY_PRE_USERMODE_STATE() \
+	if (user_caller) verify_pre_usermode_state()
+#else
+#define __CHECK_USER_CALLER()
+#define __VERIFY_PRE_USERMODE_STATE()
+#endif
+
+
 #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
 #define __SYSCALL_DEFINEx(x, name, ...)					\
 	asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))	\
@@ -199,7 +212,10 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
 	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
 	{								\
-		long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
+		long ret;						\
+		__CHECK_USER_CALLER();					\
+		ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
+		__VERIFY_PRE_USERMODE_STATE();				\
 		__MAP(x,__SC_TEST,__VA_ARGS__);				\
 		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));	\
 		return ret;						\
diff --git a/init/Kconfig b/init/Kconfig
index c859c993c26f..c4efc3a95e4a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1929,6 +1929,13 @@ config PROFILING
 config TRACEPOINTS
 	bool
 
+#
+# Set by each architecture that want to optimize how verify_pre_usermode_state
+# is called.
+#
+config ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
+	bool
+
 source "arch/Kconfig"
 
 endmenu		# General setup
diff --git a/kernel/sys.c b/kernel/sys.c
index 196c7134bee6..411163ac9dc3 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2459,3 +2459,11 @@ COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
 	return 0;
 }
 #endif /* CONFIG_COMPAT */
+
+/* Called before coming back to user-mode */
+asmlinkage void verify_pre_usermode_state(void)
+{
+	if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS),
+				  "incorrect get_fs() on user-mode return"))
+		set_fs(USER_DS);
+}
-- 
2.12.0.246.ga2ecc84866-goog

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [kernel-hardening] [PATCH v3 1/4] syscalls: Restore address limit after a syscall
@ 2017-03-11  0:04 ` Thomas Garnier
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-11  0:04 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, Thomas Garnier, René Nyffenegger,
	Andrew Morton, Kees Cook, Paul E . McKenney, Andy Lutomirski,
	Ard Biesheuvel, Nicolas Pitre, Petr Mladek,
	Sebastian Andrzej Siewior, Sergey Senozhatsky, Helge Deller,
	Rik van Riel, Ingo Molnar, John Stultz, Thomas Gleixner,
	Oleg Nesterov, Stephen Smalley, Pavel Tikhomirov,
	Frederic Weisbecker, Stanislav Kinsburskiy, Ingo Molnar,
	H . Peter Anvin, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf
  Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

This patch ensures a syscall does not return to user-mode with a kernel
address limit. If that happened, a process can corrupt kernel-mode
memory and elevate privileges.

For example, it would mitigation this bug:

- https://bugs.chromium.org/p/project-zero/issues/detail?id=990

If the CONFIG_BUG_ON_DATA_CORRUPTION option is enabled, an incorrect
state will result in a BUG_ON.

The CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE option is also
added so each architecture can optimize this change.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170308
---
 arch/s390/Kconfig        |  1 +
 include/linux/syscalls.h | 18 +++++++++++++++++-
 init/Kconfig             |  7 +++++++
 kernel/sys.c             |  8 ++++++++
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a2dcef0aacc7..b73f5b87bc99 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -103,6 +103,7 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_BH
 	select ARCH_INLINE_WRITE_UNLOCK_IRQ
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_NUMA_BALANCING
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 980c3c9b06f8..e659076adf6c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -191,6 +191,19 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 	SYSCALL_METADATA(sname, x, __VA_ARGS__)			\
 	__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
 
+asmlinkage void verify_pre_usermode_state(void);
+
+#ifndef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
+#define __CHECK_USER_CALLER() \
+	bool user_caller = segment_eq(get_fs(), USER_DS)
+#define __VERIFY_PRE_USERMODE_STATE() \
+	if (user_caller) verify_pre_usermode_state()
+#else
+#define __CHECK_USER_CALLER()
+#define __VERIFY_PRE_USERMODE_STATE()
+#endif
+
+
 #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
 #define __SYSCALL_DEFINEx(x, name, ...)					\
 	asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))	\
@@ -199,7 +212,10 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
 	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
 	{								\
-		long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
+		long ret;						\
+		__CHECK_USER_CALLER();					\
+		ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
+		__VERIFY_PRE_USERMODE_STATE();				\
 		__MAP(x,__SC_TEST,__VA_ARGS__);				\
 		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));	\
 		return ret;						\
diff --git a/init/Kconfig b/init/Kconfig
index c859c993c26f..c4efc3a95e4a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1929,6 +1929,13 @@ config PROFILING
 config TRACEPOINTS
 	bool
 
+#
+# Set by each architecture that want to optimize how verify_pre_usermode_state
+# is called.
+#
+config ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
+	bool
+
 source "arch/Kconfig"
 
 endmenu		# General setup
diff --git a/kernel/sys.c b/kernel/sys.c
index 196c7134bee6..411163ac9dc3 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2459,3 +2459,11 @@ COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
 	return 0;
 }
 #endif /* CONFIG_COMPAT */
+
+/* Called before coming back to user-mode */
+asmlinkage void verify_pre_usermode_state(void)
+{
+	if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS),
+				  "incorrect get_fs() on user-mode return"))
+		set_fs(USER_DS);
+}
-- 
2.12.0.246.ga2ecc84866-goog

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-11  0:04 ` [kernel-hardening] " Thomas Garnier
@ 2017-03-11  0:04   ` Thomas Garnier
  -1 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-11  0:04 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, Thomas Garnier, René Nyffenegger,
	Andrew Morton, Kees Cook, Paul E . McKenney, Andy Lutomirski,
	Ard Biesheuvel, Nicolas Pitre, Petr Mladek,
	Sebastian Andrzej Siewior, Sergey Senozhatsky, Helge Deller,
	Rik van Riel, Ingo Molnar, John Stultz, Thomas Gleixner
  Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

Implement specific usage of verify_pre_usermode_state for user-mode
returns for x86.
---
Based on next-20170308
---
 arch/x86/Kconfig                        |  1 +
 arch/x86/entry/common.c                 |  3 +++
 arch/x86/entry/entry_64.S               | 19 +++++++++++++++++++
 arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
 arch/x86/include/asm/processor.h        | 11 -----------
 5 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 005df7c825f5..6d48e18e6f09 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -63,6 +63,7 @@ config X86
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
 	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 370c42c7f046..525edbb77f03 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -22,6 +22,7 @@
 #include <linux/context_tracking.h>
 #include <linux/user-return-notifier.h>
 #include <linux/uprobes.h>
+#include <linux/syscalls.h>
 
 #include <asm/desc.h>
 #include <asm/traps.h>
@@ -180,6 +181,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	struct thread_info *ti = current_thread_info();
 	u32 cached_flags;
 
+	verify_pre_usermode_state();
+
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
 		local_irq_disable();
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d2b2a2948ffe..04db589be466 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -218,6 +218,25 @@ entry_SYSCALL_64_fastpath:
 	testl	$_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
 	jnz	1f
 
+	/*
+	 * Check user-mode state on fast path return, the same check is done
+	 * under the slow path through syscall_return_slowpath.
+	 */
+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
+	call	verify_pre_usermode_state
+#else
+	/*
+	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
+	 * warning.
+	 */
+	movq	PER_CPU_VAR(current_task), %rax
+	movq	$TASK_SIZE_MAX, %rcx
+	cmp	%rcx, TASK_addr_limit(%rax)
+	jz	1f
+	movq	%rcx, TASK_addr_limit(%rax)
+1:
+#endif
+
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
 	movq	RIP(%rsp), %rcx
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 3a264200c62f..0fbbb79d058c 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -76,4 +76,15 @@ typedef struct { pteval_t pte; } pte_t;
 
 #define EARLY_DYNAMIC_PAGE_TABLES	64
 
+/*
+ * User space process size. 47bits minus one guard page.  The guard
+ * page is necessary on Intel CPUs: if a SYSCALL instruction is at
+ * the highest possible canonical userspace address, then that
+ * syscall will enter the kernel with a non-canonical return
+ * address, and SYSRET will explode dangerously.  We avoid this
+ * particular problem by preventing anything from being mapped
+ * at the maximum canonical address.
+ */
+#define TASK_SIZE_MAX	((_AC(1, UL) << 47) - PAGE_SIZE)
+
 #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f385eca5407a..9bc99d37133e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -829,17 +829,6 @@ static inline void spin_lock_prefetch(const void *x)
 #define KSTK_ESP(task)		(task_pt_regs(task)->sp)
 
 #else
-/*
- * User space process size. 47bits minus one guard page.  The guard
- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
- * the highest possible canonical userspace address, then that
- * syscall will enter the kernel with a non-canonical return
- * address, and SYSRET will explode dangerously.  We avoid this
- * particular problem by preventing anything from being mapped
- * at the maximum canonical address.
- */
-#define TASK_SIZE_MAX	((1UL << 47) - PAGE_SIZE)
-
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-- 
2.12.0.246.ga2ecc84866-goog

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [kernel-hardening] [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-11  0:04   ` Thomas Garnier
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-11  0:04 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, Thomas Garnier, René Nyffenegger,
	Andrew Morton, Kees Cook, Paul E . McKenney, Andy Lutomirski,
	Ard Biesheuvel, Nicolas Pitre, Petr Mladek,
	Sebastian Andrzej Siewior, Sergey Senozhatsky, Helge Deller,
	Rik van Riel, Ingo Molnar, John Stultz, Thomas Gleixner,
	Oleg Nesterov, Stephen Smalley, Pavel Tikhomirov,
	Frederic Weisbecker, Stanislav Kinsburskiy, Ingo Molnar,
	H . Peter Anvin, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf
  Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

Implement specific usage of verify_pre_usermode_state for user-mode
returns for x86.
---
Based on next-20170308
---
 arch/x86/Kconfig                        |  1 +
 arch/x86/entry/common.c                 |  3 +++
 arch/x86/entry/entry_64.S               | 19 +++++++++++++++++++
 arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
 arch/x86/include/asm/processor.h        | 11 -----------
 5 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 005df7c825f5..6d48e18e6f09 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -63,6 +63,7 @@ config X86
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
 	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 370c42c7f046..525edbb77f03 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -22,6 +22,7 @@
 #include <linux/context_tracking.h>
 #include <linux/user-return-notifier.h>
 #include <linux/uprobes.h>
+#include <linux/syscalls.h>
 
 #include <asm/desc.h>
 #include <asm/traps.h>
@@ -180,6 +181,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	struct thread_info *ti = current_thread_info();
 	u32 cached_flags;
 
+	verify_pre_usermode_state();
+
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
 		local_irq_disable();
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d2b2a2948ffe..04db589be466 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -218,6 +218,25 @@ entry_SYSCALL_64_fastpath:
 	testl	$_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
 	jnz	1f
 
+	/*
+	 * Check user-mode state on fast path return, the same check is done
+	 * under the slow path through syscall_return_slowpath.
+	 */
+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
+	call	verify_pre_usermode_state
+#else
+	/*
+	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
+	 * warning.
+	 */
+	movq	PER_CPU_VAR(current_task), %rax
+	movq	$TASK_SIZE_MAX, %rcx
+	cmp	%rcx, TASK_addr_limit(%rax)
+	jz	1f
+	movq	%rcx, TASK_addr_limit(%rax)
+1:
+#endif
+
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
 	movq	RIP(%rsp), %rcx
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 3a264200c62f..0fbbb79d058c 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -76,4 +76,15 @@ typedef struct { pteval_t pte; } pte_t;
 
 #define EARLY_DYNAMIC_PAGE_TABLES	64
 
+/*
+ * User space process size. 47bits minus one guard page.  The guard
+ * page is necessary on Intel CPUs: if a SYSCALL instruction is at
+ * the highest possible canonical userspace address, then that
+ * syscall will enter the kernel with a non-canonical return
+ * address, and SYSRET will explode dangerously.  We avoid this
+ * particular problem by preventing anything from being mapped
+ * at the maximum canonical address.
+ */
+#define TASK_SIZE_MAX	((_AC(1, UL) << 47) - PAGE_SIZE)
+
 #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f385eca5407a..9bc99d37133e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -829,17 +829,6 @@ static inline void spin_lock_prefetch(const void *x)
 #define KSTK_ESP(task)		(task_pt_regs(task)->sp)
 
 #else
-/*
- * User space process size. 47bits minus one guard page.  The guard
- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
- * the highest possible canonical userspace address, then that
- * syscall will enter the kernel with a non-canonical return
- * address, and SYSRET will explode dangerously.  We avoid this
- * particular problem by preventing anything from being mapped
- * at the maximum canonical address.
- */
-#define TASK_SIZE_MAX	((1UL << 47) - PAGE_SIZE)
-
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-- 
2.12.0.246.ga2ecc84866-goog

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH v3 3/4] arm/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-11  0:04 ` [kernel-hardening] " Thomas Garnier
@ 2017-03-11  0:05   ` Thomas Garnier
  -1 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-11  0:05 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, Thomas Garnier, René Nyffenegger,
	Andrew Morton, Kees Cook, Paul E . McKenney, Andy Lutomirski,
	Ard Biesheuvel, Nicolas Pitre, Petr Mladek,
	Sebastian Andrzej Siewior, Sergey Senozhatsky, Helge Deller,
	Rik van Riel, Ingo Molnar, John Stultz, Thomas Gleixner
  Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

Implement specific usage of verify_pre_usermode_state for user-mode
returns for arm.
---
Based on next-20170308
---
 arch/arm/Kconfig               |  1 +
 arch/arm/include/asm/domain.h  |  3 +++
 arch/arm/kernel/entry-common.S | 32 +++++++++++++++++++++++++++++++-
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0d4e71b42c77..704fd8f197fa 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -12,6 +12,7 @@ config ARM
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
 	select ARCH_SUPPORTS_ATOMIC_RMW
diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index 99d9f630d6b6..fd1cd24ca9fe 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -58,6 +58,9 @@
 #define domain_mask(dom)	((3) << (2 * (dom)))
 #define domain_val(dom,type)	((type) << (2 * (dom)))
 
+#define DOMAIN_KERNEL_MASK	domain_mask(DOMAIN_KERNEL)
+#define DOMAIN_CLIENT_VALUE	domain_val(DOMAIN_KERNEL, DOMAIN_CLIENT)
+
 #ifdef CONFIG_CPU_SW_DOMAIN_PAN
 #define DACR_INIT \
 	(domain_val(DOMAIN_USER, DOMAIN_NOACCESS) | \
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index eb5cd77bf1d8..8d9bfe9ff313 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -12,6 +12,7 @@
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
 #include <asm/unwind.h>
+#include <asm/memory.h>
 #ifdef CONFIG_AEABI
 #include <asm/unistd-oabi.h>
 #endif
@@ -27,7 +28,6 @@
 
 #include "entry-header.S"
 
-
 	.align	5
 #if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING))
 /*
@@ -40,9 +40,12 @@ ret_fast_syscall:
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	bne	fast_work_pending
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_fail
 
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
@@ -66,6 +69,7 @@ ret_fast_syscall:
  UNWIND(.cantunwind	)
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	beq	no_work_pending
@@ -82,6 +86,7 @@ slow_work_pending:
 	mov	r2, why				@ 'syscall'
 	bl	do_work_pending
 	cmp	r0, #0
+	ldreq   r2, [tsk, #TI_ADDR_LIMIT]
 	beq	no_work_pending
 	movlt	scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
 	ldmia	sp, {r0 - r6}			@ have to reload r0 - r6
@@ -99,9 +104,12 @@ ret_slow_syscall:
 	disable_irq_notrace			@ disable interrupts
 ENTRY(ret_to_user_from_irq)
 	ldr	r1, [tsk, #TI_FLAGS]
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
 	tst	r1, #_TIF_WORK_MASK
 	bne	slow_work_pending
 no_work_pending:
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_fail
 	asm_trace_hardirqs_on save = 0
 
 	/* perform architecture specific actions before user return */
@@ -125,6 +133,28 @@ ENTRY(ret_from_fork)
 	b	ret_slow_syscall
 ENDPROC(ret_from_fork)
 
+addr_limit_fail:
+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
+	stmfd	sp!, {r0, lr}
+	bl	verify_pre_usermode_state
+	ldmfd	sp!, {r0, lr}
+#else
+	/*
+	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
+	 * warning.
+	 */
+	mov	r2, #TASK_SIZE
+	str	r2, [tsk, #TI_ADDR_LIMIT]
+#ifdef CONFIG_CPU_USE_DOMAINS
+	/* Switch domain like modify_domain(DOMAIN_KERNEL, DOMAIN_CLIENT) */
+	mrc	p15, 0, r2, c3, c0, 0		@ Get domain register
+	bic	r2, r2, #DOMAIN_KERNEL_MASK	@ Clear the domain part
+	orr	r2, r2, #DOMAIN_CLIENT_VALUE	@ Enforce DOMAIN_CLIENT
+	mcr	p15, 0, r2, c3, c0, 0		@ Set domain register
+#endif
+#endif
+	ret     lr
+
 /*=============================================================================
  * SWI handler
  *-----------------------------------------------------------------------------
-- 
2.12.0.246.ga2ecc84866-goog

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [kernel-hardening] [PATCH v3 3/4] arm/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-11  0:05   ` Thomas Garnier
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-11  0:05 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, Thomas Garnier, René Nyffenegger,
	Andrew Morton, Kees Cook, Paul E . McKenney, Andy Lutomirski,
	Ard Biesheuvel, Nicolas Pitre, Petr Mladek,
	Sebastian Andrzej Siewior, Sergey Senozhatsky, Helge Deller,
	Rik van Riel, Ingo Molnar, John Stultz, Thomas Gleixner,
	Oleg Nesterov, Stephen Smalley, Pavel Tikhomirov,
	Frederic Weisbecker, Stanislav Kinsburskiy, Ingo Molnar,
	H . Peter Anvin, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf
  Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

Implement specific usage of verify_pre_usermode_state for user-mode
returns for arm.
---
Based on next-20170308
---
 arch/arm/Kconfig               |  1 +
 arch/arm/include/asm/domain.h  |  3 +++
 arch/arm/kernel/entry-common.S | 32 +++++++++++++++++++++++++++++++-
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0d4e71b42c77..704fd8f197fa 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -12,6 +12,7 @@ config ARM
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
 	select ARCH_SUPPORTS_ATOMIC_RMW
diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index 99d9f630d6b6..fd1cd24ca9fe 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -58,6 +58,9 @@
 #define domain_mask(dom)	((3) << (2 * (dom)))
 #define domain_val(dom,type)	((type) << (2 * (dom)))
 
+#define DOMAIN_KERNEL_MASK	domain_mask(DOMAIN_KERNEL)
+#define DOMAIN_CLIENT_VALUE	domain_val(DOMAIN_KERNEL, DOMAIN_CLIENT)
+
 #ifdef CONFIG_CPU_SW_DOMAIN_PAN
 #define DACR_INIT \
 	(domain_val(DOMAIN_USER, DOMAIN_NOACCESS) | \
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index eb5cd77bf1d8..8d9bfe9ff313 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -12,6 +12,7 @@
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
 #include <asm/unwind.h>
+#include <asm/memory.h>
 #ifdef CONFIG_AEABI
 #include <asm/unistd-oabi.h>
 #endif
@@ -27,7 +28,6 @@
 
 #include "entry-header.S"
 
-
 	.align	5
 #if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING))
 /*
@@ -40,9 +40,12 @@ ret_fast_syscall:
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	bne	fast_work_pending
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_fail
 
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
@@ -66,6 +69,7 @@ ret_fast_syscall:
  UNWIND(.cantunwind	)
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	beq	no_work_pending
@@ -82,6 +86,7 @@ slow_work_pending:
 	mov	r2, why				@ 'syscall'
 	bl	do_work_pending
 	cmp	r0, #0
+	ldreq   r2, [tsk, #TI_ADDR_LIMIT]
 	beq	no_work_pending
 	movlt	scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
 	ldmia	sp, {r0 - r6}			@ have to reload r0 - r6
@@ -99,9 +104,12 @@ ret_slow_syscall:
 	disable_irq_notrace			@ disable interrupts
 ENTRY(ret_to_user_from_irq)
 	ldr	r1, [tsk, #TI_FLAGS]
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
 	tst	r1, #_TIF_WORK_MASK
 	bne	slow_work_pending
 no_work_pending:
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_fail
 	asm_trace_hardirqs_on save = 0
 
 	/* perform architecture specific actions before user return */
@@ -125,6 +133,28 @@ ENTRY(ret_from_fork)
 	b	ret_slow_syscall
 ENDPROC(ret_from_fork)
 
+addr_limit_fail:
+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
+	stmfd	sp!, {r0, lr}
+	bl	verify_pre_usermode_state
+	ldmfd	sp!, {r0, lr}
+#else
+	/*
+	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
+	 * warning.
+	 */
+	mov	r2, #TASK_SIZE
+	str	r2, [tsk, #TI_ADDR_LIMIT]
+#ifdef CONFIG_CPU_USE_DOMAINS
+	/* Switch domain like modify_domain(DOMAIN_KERNEL, DOMAIN_CLIENT) */
+	mrc	p15, 0, r2, c3, c0, 0		@ Get domain register
+	bic	r2, r2, #DOMAIN_KERNEL_MASK	@ Clear the domain part
+	orr	r2, r2, #DOMAIN_CLIENT_VALUE	@ Enforce DOMAIN_CLIENT
+	mcr	p15, 0, r2, c3, c0, 0		@ Set domain register
+#endif
+#endif
+	ret     lr
+
 /*=============================================================================
  * SWI handler
  *-----------------------------------------------------------------------------
-- 
2.12.0.246.ga2ecc84866-goog

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH v3 4/4] arm64/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-11  0:04 ` [kernel-hardening] " Thomas Garnier
@ 2017-03-11  0:05   ` Thomas Garnier
  -1 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-11  0:05 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, Thomas Garnier, René Nyffenegger,
	Andrew Morton, Kees Cook, Paul E . McKenney, Andy Lutomirski,
	Ard Biesheuvel, Nicolas Pitre, Petr Mladek,
	Sebastian Andrzej Siewior, Sergey Senozhatsky, Helge Deller,
	Rik van Riel, Ingo Molnar, John Stultz, Thomas Gleixner
  Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

Implement specific usage of verify_pre_usermode_state for user-mode
returns for arm64.
---
Based on next-20170308
---
 arch/arm64/Kconfig        |  1 +
 arch/arm64/kernel/entry.S | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 896eba61e5ed..da54774838d8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -24,6 +24,7 @@ config ARM64
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 43512d4d7df2..685b43771ac9 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -744,6 +744,10 @@ ENDPROC(cpu_switch_to)
 ret_fast_syscall:
 	disable_irq				// disable interrupts
 	str	x0, [sp, #S_X0]			// returned x0
+	ldr	x2, [tsk, #TSK_TI_ADDR_LIMIT]	// check addr limit change
+	mov	x1, #TASK_SIZE_64
+	cmp	x2, x1
+	b.ne	addr_limit_fail
 	ldr	x1, [tsk, #TSK_TI_FLAGS]	// re-check for syscall tracing
 	and	x2, x1, #_TIF_SYSCALL_WORK
 	cbnz	x2, ret_fast_syscall_trace
@@ -771,6 +775,11 @@ work_pending:
  */
 ret_to_user:
 	disable_irq				// disable interrupts
+	ldr	x2, [tsk, #TSK_TI_ADDR_LIMIT]	// check addr limit change
+	mov	x1, #TASK_SIZE_64
+	cmp	x2, x1
+	b.ne	addr_limit_fail
+
 	ldr	x1, [tsk, #TSK_TI_FLAGS]
 	and	x2, x1, #_TIF_WORK_MASK
 	cbnz	x2, work_pending
@@ -779,6 +788,22 @@ finish_ret_to_user:
 	kernel_exit 0
 ENDPROC(ret_to_user)
 
+addr_limit_fail:
+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
+	stp	x0, lr, [sp,#-16]!
+	bl	verify_pre_usermode_state
+	ldp	x0, lr, [sp],#16
+#else
+	/*
+	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
+	 * warning.
+	 */
+	mov	x2, #TASK_SIZE_64
+	str	x2, [tsk, #TSK_TI_ADDR_LIMIT]
+	ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO)
+#endif
+	ret	lr
+
 /*
  * This is how we return from a fork.
  */
-- 
2.12.0.246.ga2ecc84866-goog

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [kernel-hardening] [PATCH v3 4/4] arm64/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-11  0:05   ` Thomas Garnier
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-11  0:05 UTC (permalink / raw)
  To: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, Thomas Garnier, René Nyffenegger,
	Andrew Morton, Kees Cook, Paul E . McKenney, Andy Lutomirski,
	Ard Biesheuvel, Nicolas Pitre, Petr Mladek,
	Sebastian Andrzej Siewior, Sergey Senozhatsky, Helge Deller,
	Rik van Riel, Ingo Molnar, John Stultz, Thomas Gleixner,
	Oleg Nesterov, Stephen Smalley, Pavel Tikhomirov,
	Frederic Weisbecker, Stanislav Kinsburskiy, Ingo Molnar,
	H . Peter Anvin, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf
  Cc: linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

Implement specific usage of verify_pre_usermode_state for user-mode
returns for arm64.
---
Based on next-20170308
---
 arch/arm64/Kconfig        |  1 +
 arch/arm64/kernel/entry.S | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 896eba61e5ed..da54774838d8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -24,6 +24,7 @@ config ARM64
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 43512d4d7df2..685b43771ac9 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -744,6 +744,10 @@ ENDPROC(cpu_switch_to)
 ret_fast_syscall:
 	disable_irq				// disable interrupts
 	str	x0, [sp, #S_X0]			// returned x0
+	ldr	x2, [tsk, #TSK_TI_ADDR_LIMIT]	// check addr limit change
+	mov	x1, #TASK_SIZE_64
+	cmp	x2, x1
+	b.ne	addr_limit_fail
 	ldr	x1, [tsk, #TSK_TI_FLAGS]	// re-check for syscall tracing
 	and	x2, x1, #_TIF_SYSCALL_WORK
 	cbnz	x2, ret_fast_syscall_trace
@@ -771,6 +775,11 @@ work_pending:
  */
 ret_to_user:
 	disable_irq				// disable interrupts
+	ldr	x2, [tsk, #TSK_TI_ADDR_LIMIT]	// check addr limit change
+	mov	x1, #TASK_SIZE_64
+	cmp	x2, x1
+	b.ne	addr_limit_fail
+
 	ldr	x1, [tsk, #TSK_TI_FLAGS]
 	and	x2, x1, #_TIF_WORK_MASK
 	cbnz	x2, work_pending
@@ -779,6 +788,22 @@ finish_ret_to_user:
 	kernel_exit 0
 ENDPROC(ret_to_user)
 
+addr_limit_fail:
+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
+	stp	x0, lr, [sp,#-16]!
+	bl	verify_pre_usermode_state
+	ldp	x0, lr, [sp],#16
+#else
+	/*
+	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
+	 * warning.
+	 */
+	mov	x2, #TASK_SIZE_64
+	str	x2, [tsk, #TSK_TI_ADDR_LIMIT]
+	ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO)
+#endif
+	ret	lr
+
 /*
  * This is how we return from a fork.
  */
-- 
2.12.0.246.ga2ecc84866-goog

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-11  0:04   ` [kernel-hardening] " Thomas Garnier
@ 2017-03-11  9:42     ` Ingo Molnar
  -1 siblings, 0 replies; 46+ messages in thread
From: Ingo Molnar @ 2017-03-11  9:42 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, René Nyffenegger, Andrew Morton,
	Kees Cook, Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley


* Thomas Garnier <thgarnie@google.com> wrote:

> Implement specific usage of verify_pre_usermode_state for user-mode
> returns for x86.
> ---
> Based on next-20170308
> ---
>  arch/x86/Kconfig                        |  1 +
>  arch/x86/entry/common.c                 |  3 +++
>  arch/x86/entry/entry_64.S               | 19 +++++++++++++++++++
>  arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
>  arch/x86/include/asm/processor.h        | 11 -----------
>  5 files changed, 34 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 005df7c825f5..6d48e18e6f09 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -63,6 +63,7 @@ config X86
>  	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
>  	select ARCH_MIGHT_HAVE_PC_PARPORT
>  	select ARCH_MIGHT_HAVE_PC_SERIO
> +	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>  	select ARCH_SUPPORTS_ATOMIC_RMW
>  	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
>  	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index 370c42c7f046..525edbb77f03 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -22,6 +22,7 @@
>  #include <linux/context_tracking.h>
>  #include <linux/user-return-notifier.h>
>  #include <linux/uprobes.h>
> +#include <linux/syscalls.h>
>  
>  #include <asm/desc.h>
>  #include <asm/traps.h>
> @@ -180,6 +181,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
>  	struct thread_info *ti = current_thread_info();
>  	u32 cached_flags;
>  
> +	verify_pre_usermode_state();
> +
>  	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
>  		local_irq_disable();
>  
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index d2b2a2948ffe..04db589be466 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -218,6 +218,25 @@ entry_SYSCALL_64_fastpath:
>  	testl	$_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
>  	jnz	1f
>  
> +	/*
> +	 * Check user-mode state on fast path return, the same check is done
> +	 * under the slow path through syscall_return_slowpath.
> +	 */
> +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
> +	call	verify_pre_usermode_state
> +#else
> +	/*
> +	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
> +	 * warning.
> +	 */
> +	movq	PER_CPU_VAR(current_task), %rax
> +	movq	$TASK_SIZE_MAX, %rcx
> +	cmp	%rcx, TASK_addr_limit(%rax)
> +	jz	1f
> +	movq	%rcx, TASK_addr_limit(%rax)
> +1:
> +#endif
> +
>  	LOCKDEP_SYS_EXIT
>  	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
>  	movq	RIP(%rsp), %rcx

Ugh, so you call an assembly function just to ... call another function.

Plus why is it in assembly to begin with? Is this some older code that got
written when the x86 entry code was in assembly, and never properly
converted to C?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-11  9:42     ` Ingo Molnar
  0 siblings, 0 replies; 46+ messages in thread
From: Ingo Molnar @ 2017-03-11  9:42 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, René Nyffenegger, Andrew Morton,
	Kees Cook, Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav Kinsburskiy,
	Ingo Molnar, H . Peter Anvin, Paolo Bonzini, Dmitry Safonov,
	Borislav Petkov, Josh Poimboeuf, Brian Gerst, Jan Beulich,
	Christian Borntraeger, Fenghua Yu, He Chen, Russell King,
	Vladimir Murzin, Will Deacon, Catalin Marinas, Mark Rutland,
	James Morse, David A . Long, Pratyush Anand, Laura Abbott,
	Andre Przywara, Chris Metcalf, linux-s390, linux-kernel,
	linux-api, x86, linux-arm-kernel, kernel-hardening


* Thomas Garnier <thgarnie@google.com> wrote:

> Implement specific usage of verify_pre_usermode_state for user-mode
> returns for x86.
> ---
> Based on next-20170308
> ---
>  arch/x86/Kconfig                        |  1 +
>  arch/x86/entry/common.c                 |  3 +++
>  arch/x86/entry/entry_64.S               | 19 +++++++++++++++++++
>  arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
>  arch/x86/include/asm/processor.h        | 11 -----------
>  5 files changed, 34 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 005df7c825f5..6d48e18e6f09 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -63,6 +63,7 @@ config X86
>  	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
>  	select ARCH_MIGHT_HAVE_PC_PARPORT
>  	select ARCH_MIGHT_HAVE_PC_SERIO
> +	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>  	select ARCH_SUPPORTS_ATOMIC_RMW
>  	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
>  	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index 370c42c7f046..525edbb77f03 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -22,6 +22,7 @@
>  #include <linux/context_tracking.h>
>  #include <linux/user-return-notifier.h>
>  #include <linux/uprobes.h>
> +#include <linux/syscalls.h>
>  
>  #include <asm/desc.h>
>  #include <asm/traps.h>
> @@ -180,6 +181,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
>  	struct thread_info *ti = current_thread_info();
>  	u32 cached_flags;
>  
> +	verify_pre_usermode_state();
> +
>  	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
>  		local_irq_disable();
>  
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index d2b2a2948ffe..04db589be466 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -218,6 +218,25 @@ entry_SYSCALL_64_fastpath:
>  	testl	$_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
>  	jnz	1f
>  
> +	/*
> +	 * Check user-mode state on fast path return, the same check is done
> +	 * under the slow path through syscall_return_slowpath.
> +	 */
> +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
> +	call	verify_pre_usermode_state
> +#else
> +	/*
> +	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
> +	 * warning.
> +	 */
> +	movq	PER_CPU_VAR(current_task), %rax
> +	movq	$TASK_SIZE_MAX, %rcx
> +	cmp	%rcx, TASK_addr_limit(%rax)
> +	jz	1f
> +	movq	%rcx, TASK_addr_limit(%rax)
> +1:
> +#endif
> +
>  	LOCKDEP_SYS_EXIT
>  	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
>  	movq	RIP(%rsp), %rcx

Ugh, so you call an assembly function just to ... call another function.

Plus why is it in assembly to begin with? Is this some older code that got
written when the x86 entry code was in assembly, and never properly
converted to C?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-11  9:42     ` [kernel-hardening] " Ingo Molnar
@ 2017-03-13 15:53       ` Thomas Garnier
  -1 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-13 15:53 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, René Nyffenegger, Andrew Morton,
	Kees Cook, Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley

On Sat, Mar 11, 2017 at 1:42 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> Ugh, so you call an assembly function just to ... call another function.
>

The verify_pre_usermode_state function is the architecture independent
checker. By default it is added on each syscall handler so calling it
here save code size.

> Plus why is it in assembly to begin with? Is this some older code that got
> written when the x86 entry code was in assembly, and never properly
> converted to C?

I wrote the assembly to make it faster and save a call on each syscall
return. I can just call verify_pre_usermode_state if you prefer.

-- 
Thomas

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-13 15:53       ` Thomas Garnier
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-13 15:53 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, René Nyffenegger, Andrew Morton,
	Kees Cook, Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav Kinsburskiy,
	Ingo Molnar, H . Peter Anvin, Paolo Bonzini, Dmitry Safonov,
	Borislav Petkov, Josh Poimboeuf, Brian Gerst, Jan Beulich,
	Christian Borntraeger, Fenghua Yu, He Chen, Russell King,
	Vladimir Murzin, Will Deacon, Catalin Marinas, Mark Rutland,
	James Morse, David A . Long, Pratyush Anand, Laura Abbott,
	Andre Przywara, Chris Metcalf, linux-s390, LKML, Linux API,
	the arch/x86 maintainers, linux-arm-kernel, Kernel Hardening

On Sat, Mar 11, 2017 at 1:42 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> Ugh, so you call an assembly function just to ... call another function.
>

The verify_pre_usermode_state function is the architecture independent
checker. By default it is added on each syscall handler so calling it
here save code size.

> Plus why is it in assembly to begin with? Is this some older code that got
> written when the x86 entry code was in assembly, and never properly
> converted to C?

I wrote the assembly to make it faster and save a call on each syscall
return. I can just call verify_pre_usermode_state if you prefer.

-- 
Thomas

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-13 21:48       ` H. Peter Anvin
  0 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-13 21:48 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Garnier
  Cc: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, René Nyffenegger, Andrew Morton,
	Kees Cook, Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav.Kinsburskiy

<skinsbursky@virtuozzo.com>,Ingo Molnar <mingo@redhat.com>,Paolo Bonzini <pbonzini@redhat.com>,Dmitry Safonov <dsafonov@virtuozzo.com>,Borislav Petkov <bp@alien8.de>,Josh Poimboeuf <jpoimboe@redhat.com>,Brian Gerst <brgerst@gmail.com>,Jan Beulich <JBeulich@suse.com>,Christian Borntraeger <borntraeger@de.ibm.com>,Fenghua Yu <fenghua.yu@intel.com>,He Chen <he.chen@linux.intel.com>,Russell King <linux@armlinux.org.uk>,Vladimir Murzin <vladimir.murzin@arm.com>,Will Deacon <will.deacon@arm.com>,Catalin Marinas <catalin.marinas@arm.com>,Mark Rutland <mark.rutland@arm.com>,James Morse <james.morse@arm.com>,"David A . Long" <dave.long@linaro.org>,Pratyush Anand <panand@redhat.com>,Laura Abbott <labbott@redhat.com>,Andre Przywara <andre.przywara@arm.com>,Chris Metcalf <cmetcalf@mellanox.com>,linux-s390@vger.kernel.org,linux-kernel@vger.kernel.org,linux-api@vger.kernel.org,x86@kernel.org,linux-arm-kernel@lists.infradead.org,kernel-hardening@lists.openwall.com
From: hpa@zytor.com
Message-ID: <BB78ABF9-382E-43E8-BAC6-1EA6416A30DB@zytor.com>

On March 11, 2017 1:42:00 AM PST, Ingo Molnar <mingo@kernel.org> wrote:
>
>* Thomas Garnier <thgarnie@google.com> wrote:
>
>> Implement specific usage of verify_pre_usermode_state for user-mode
>> returns for x86.
>> ---
>> Based on next-20170308
>> ---
>>  arch/x86/Kconfig                        |  1 +
>>  arch/x86/entry/common.c                 |  3 +++
>>  arch/x86/entry/entry_64.S               | 19 +++++++++++++++++++
>>  arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
>>  arch/x86/include/asm/processor.h        | 11 -----------
>>  5 files changed, 34 insertions(+), 11 deletions(-)
>> 
>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>> index 005df7c825f5..6d48e18e6f09 100644
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -63,6 +63,7 @@ config X86
>>  	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
>>  	select ARCH_MIGHT_HAVE_PC_PARPORT
>>  	select ARCH_MIGHT_HAVE_PC_SERIO
>> +	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>>  	select ARCH_SUPPORTS_ATOMIC_RMW
>>  	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
>>  	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
>> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
>> index 370c42c7f046..525edbb77f03 100644
>> --- a/arch/x86/entry/common.c
>> +++ b/arch/x86/entry/common.c
>> @@ -22,6 +22,7 @@
>>  #include <linux/context_tracking.h>
>>  #include <linux/user-return-notifier.h>
>>  #include <linux/uprobes.h>
>> +#include <linux/syscalls.h>
>>  
>>  #include <asm/desc.h>
>>  #include <asm/traps.h>
>> @@ -180,6 +181,8 @@ __visible inline void
>prepare_exit_to_usermode(struct pt_regs *regs)
>>  	struct thread_info *ti = current_thread_info();
>>  	u32 cached_flags;
>>  
>> +	verify_pre_usermode_state();
>> +
>>  	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
>>  		local_irq_disable();
>>  
>> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>> index d2b2a2948ffe..04db589be466 100644
>> --- a/arch/x86/entry/entry_64.S
>> +++ b/arch/x86/entry/entry_64.S
>> @@ -218,6 +218,25 @@ entry_SYSCALL_64_fastpath:
>>  	testl	$_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
>>  	jnz	1f
>>  
>> +	/*
>> +	 * Check user-mode state on fast path return, the same check is
>done
>> +	 * under the slow path through syscall_return_slowpath.
>> +	 */
>> +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>> +	call	verify_pre_usermode_state
>> +#else
>> +	/*
>> +	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without
>a
>> +	 * warning.
>> +	 */
>> +	movq	PER_CPU_VAR(current_task), %rax
>> +	movq	$TASK_SIZE_MAX, %rcx
>> +	cmp	%rcx, TASK_addr_limit(%rax)
>> +	jz	1f
>> +	movq	%rcx, TASK_addr_limit(%rax)
>> +1:
>> +#endif
>> +
>>  	LOCKDEP_SYS_EXIT
>>  	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
>>  	movq	RIP(%rsp), %rcx
>
>Ugh, so you call an assembly function just to ... call another
>function.
>
>Plus why is it in assembly to begin with? Is this some older code that
>got
>written when the x86 entry code was in assembly, and never properly
>converted to C?
>
>Thanks,
>
>	Ingo

The code does a compare to jump around a store.  It would be much cleaner and faster to simply clobber the value unconditionally.  If there is a test it should be to avoid the function call, not (only) the assignment.
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-13 21:48       ` H. Peter Anvin
  0 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-13 21:48 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Garnier
  Cc: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, René Nyffenegger, Andrew Morton,
	Kees Cook, Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel

<skinsbursky-5HdwGun5lf+gSpxsJD1C4w@public.gmane.org>,Ingo Molnar <mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,Paolo Bonzini <pbonzini-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,Dmitry Safonov <dsafonov-5HdwGun5lf+gSpxsJD1C4w@public.gmane.org>,Borislav Petkov <bp-Gina5bIWoIWzQB+pC5nmwQ@public.gmane.org>,Josh Poimboeuf <jpoimboe-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,Brian Gerst <brgerst-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>,Jan Beulich <JBeulich-IBi9RG/b67k@public.gmane.org>,Christian Borntraeger <borntraeger-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>,Fenghua Yu <fenghua.yu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>,He Chen <he.chen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>,Russell King <linux-I+IVW8TIWO2tmTQ+vhA3Yw@public.gmane.org>,Vladimir Murzin <vladimir.murzin-5wv7dgnIgG8@public.gmane.org>,Will Deacon <will.deacon-5wv7dgnIgG8@public.gmane.org>,Catalin Marinas <catalin.marinas-5wv7dgnIgG8@public.gmane.org>,Mark Rutland <mark.rutland-5wv7dgnIgG8@public.gmane.org>,James Morse <james.morse-5wv7dgnIgG8@public.gmane.org>,"David A . Long" <dave.long-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>,Pratyush Anand <panand-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,Laura Abbott <labbott-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,Andre Przywara <andre.przywara-5wv7dgnIgG8@public.gmane.org>,Chris Metcalf <cmetcalf-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>,linux-
 s390-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,x86-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org,kernel-hardening-ZwoEplunGu1jrUoiu81ncdBPR1lH4CV8@public.gmane.org
From: hpa-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org
Message-ID: <BB78ABF9-382E-43E8-BAC6-1EA6416A30DB-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org>

On March 11, 2017 1:42:00 AM PST, Ingo Molnar <mingo-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>
>* Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
>
>> Implement specific usage of verify_pre_usermode_state for user-mode
>> returns for x86.
>> ---
>> Based on next-20170308
>> ---
>>  arch/x86/Kconfig                        |  1 +
>>  arch/x86/entry/common.c                 |  3 +++
>>  arch/x86/entry/entry_64.S               | 19 +++++++++++++++++++
>>  arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
>>  arch/x86/include/asm/processor.h        | 11 -----------
>>  5 files changed, 34 insertions(+), 11 deletions(-)
>> 
>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>> index 005df7c825f5..6d48e18e6f09 100644
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -63,6 +63,7 @@ config X86
>>  	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
>>  	select ARCH_MIGHT_HAVE_PC_PARPORT
>>  	select ARCH_MIGHT_HAVE_PC_SERIO
>> +	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>>  	select ARCH_SUPPORTS_ATOMIC_RMW
>>  	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
>>  	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
>> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
>> index 370c42c7f046..525edbb77f03 100644
>> --- a/arch/x86/entry/common.c
>> +++ b/arch/x86/entry/common.c
>> @@ -22,6 +22,7 @@
>>  #include <linux/context_tracking.h>
>>  #include <linux/user-return-notifier.h>
>>  #include <linux/uprobes.h>
>> +#include <linux/syscalls.h>
>>  
>>  #include <asm/desc.h>
>>  #include <asm/traps.h>
>> @@ -180,6 +181,8 @@ __visible inline void
>prepare_exit_to_usermode(struct pt_regs *regs)
>>  	struct thread_info *ti = current_thread_info();
>>  	u32 cached_flags;
>>  
>> +	verify_pre_usermode_state();
>> +
>>  	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
>>  		local_irq_disable();
>>  
>> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>> index d2b2a2948ffe..04db589be466 100644
>> --- a/arch/x86/entry/entry_64.S
>> +++ b/arch/x86/entry/entry_64.S
>> @@ -218,6 +218,25 @@ entry_SYSCALL_64_fastpath:
>>  	testl	$_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
>>  	jnz	1f
>>  
>> +	/*
>> +	 * Check user-mode state on fast path return, the same check is
>done
>> +	 * under the slow path through syscall_return_slowpath.
>> +	 */
>> +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>> +	call	verify_pre_usermode_state
>> +#else
>> +	/*
>> +	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without
>a
>> +	 * warning.
>> +	 */
>> +	movq	PER_CPU_VAR(current_task), %rax
>> +	movq	$TASK_SIZE_MAX, %rcx
>> +	cmp	%rcx, TASK_addr_limit(%rax)
>> +	jz	1f
>> +	movq	%rcx, TASK_addr_limit(%rax)
>> +1:
>> +#endif
>> +
>>  	LOCKDEP_SYS_EXIT
>>  	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
>>  	movq	RIP(%rsp), %rcx
>
>Ugh, so you call an assembly function just to ... call another
>function.
>
>Plus why is it in assembly to begin with? Is this some older code that
>got
>written when the x86 entry code was in assembly, and never properly
>converted to C?
>
>Thanks,
>
>	Ingo

The code does a compare to jump around a store.  It would be much cleaner and faster to simply clobber the value unconditionally.  If there is a test it should be to avoid the function call, not (only) the assignment.
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-11  9:42     ` [kernel-hardening] " Ingo Molnar
@ 2017-03-14  0:04       ` H. Peter Anvin
  -1 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-14  0:04 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Garnier
  Cc: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, René Nyffenegger, Andrew Morton,
	Kees Cook, Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz

On 03/11/17 01:42, Ingo Molnar wrote:
>>  
>> +	/*
>> +	 * Check user-mode state on fast path return, the same check is done
>> +	 * under the slow path through syscall_return_slowpath.
>> +	 */
>> +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>> +	call	verify_pre_usermode_state
>> +#else
>> +	/*
>> +	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
>> +	 * warning.
>> +	 */
>> +	movq	PER_CPU_VAR(current_task), %rax
>> +	movq	$TASK_SIZE_MAX, %rcx
>> +	cmp	%rcx, TASK_addr_limit(%rax)
>> +	jz	1f
>> +	movq	%rcx, TASK_addr_limit(%rax)
>> +1:
>> +#endif
>> +

How about simply doing...

	movq	PER_CPU_VAR(current_task), %rax
	movq	$TASK_SIZE_MAX, %rcx
#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
	cmpq	%rcx, TASK_addr_limit(%rax)
	jne	syscall_return_slowpath
#else
	movq	%rcx, TASK_addr_limit(%rax)
#endif

... and let the slow path take care of BUG.  This should be much faster,
even with the BUG, and is simpler to boot.

	-hpa

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-14  0:04       ` H. Peter Anvin
  0 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-14  0:04 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Garnier
  Cc: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, René Nyffenegger, Andrew Morton,
	Kees Cook, Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav Kinsburskiy,
	Ingo Molnar, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf,
	linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

On 03/11/17 01:42, Ingo Molnar wrote:
>>  
>> +	/*
>> +	 * Check user-mode state on fast path return, the same check is done
>> +	 * under the slow path through syscall_return_slowpath.
>> +	 */
>> +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>> +	call	verify_pre_usermode_state
>> +#else
>> +	/*
>> +	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
>> +	 * warning.
>> +	 */
>> +	movq	PER_CPU_VAR(current_task), %rax
>> +	movq	$TASK_SIZE_MAX, %rcx
>> +	cmp	%rcx, TASK_addr_limit(%rax)
>> +	jz	1f
>> +	movq	%rcx, TASK_addr_limit(%rax)
>> +1:
>> +#endif
>> +

How about simply doing...

	movq	PER_CPU_VAR(current_task), %rax
	movq	$TASK_SIZE_MAX, %rcx
#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
	cmpq	%rcx, TASK_addr_limit(%rax)
	jne	syscall_return_slowpath
#else
	movq	%rcx, TASK_addr_limit(%rax)
#endif

... and let the slow path take care of BUG.  This should be much faster,
even with the BUG, and is simpler to boot.

	-hpa

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-14  0:04       ` [kernel-hardening] " H. Peter Anvin
@ 2017-03-14  9:40         ` H. Peter Anvin
  -1 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-14  9:40 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Garnier
  Cc: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, René Nyffenegger, Andrew Morton,
	Kees Cook, Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz

On 03/13/17 17:04, H. Peter Anvin wrote:
> On 03/11/17 01:42, Ingo Molnar wrote:
>>>  
>>> +	/*
>>> +	 * Check user-mode state on fast path return, the same check is done
>>> +	 * under the slow path through syscall_return_slowpath.
>>> +	 */
>>> +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>>> +	call	verify_pre_usermode_state
>>> +#else
>>> +	/*
>>> +	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
>>> +	 * warning.
>>> +	 */
>>> +	movq	PER_CPU_VAR(current_task), %rax
>>> +	movq	$TASK_SIZE_MAX, %rcx
>>> +	cmp	%rcx, TASK_addr_limit(%rax)
>>> +	jz	1f
>>> +	movq	%rcx, TASK_addr_limit(%rax)
>>> +1:
>>> +#endif
>>> +
> 
> How about simply doing...
> 
> 	movq	PER_CPU_VAR(current_task), %rax
> 	movq	$TASK_SIZE_MAX, %rcx
> #ifdef CONFIG_BUG_ON_DATA_CORRUPTION
> 	cmpq	%rcx, TASK_addr_limit(%rax)
> 	jne	syscall_return_slowpath
> #else
> 	movq	%rcx, TASK_addr_limit(%rax)
> #endif
> 
> ... and let the slow path take care of BUG.  This should be much faster,
> even with the BUG, and is simpler to boot.
> 

In fact, we could even to the cmpq/jne unconditionally.  I'm guessing
the occasional branch mispredict will be offset by occasionally touching
a clean cacheline in the case of an unconditional store.

Since this is something that should never happen, performance doesn't
matter.

	-hpa

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-14  9:40         ` H. Peter Anvin
  0 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-14  9:40 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Garnier
  Cc: Martin Schwidefsky, Heiko Carstens, David Howells, Arnd Bergmann,
	Al Viro, Dave Hansen, René Nyffenegger, Andrew Morton,
	Kees Cook, Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav Kinsburskiy,
	Ingo Molnar, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf,
	linux-s390, linux-kernel, linux-api, x86, linux-arm-kernel,
	kernel-hardening

On 03/13/17 17:04, H. Peter Anvin wrote:
> On 03/11/17 01:42, Ingo Molnar wrote:
>>>  
>>> +	/*
>>> +	 * Check user-mode state on fast path return, the same check is done
>>> +	 * under the slow path through syscall_return_slowpath.
>>> +	 */
>>> +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>>> +	call	verify_pre_usermode_state
>>> +#else
>>> +	/*
>>> +	 * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
>>> +	 * warning.
>>> +	 */
>>> +	movq	PER_CPU_VAR(current_task), %rax
>>> +	movq	$TASK_SIZE_MAX, %rcx
>>> +	cmp	%rcx, TASK_addr_limit(%rax)
>>> +	jz	1f
>>> +	movq	%rcx, TASK_addr_limit(%rax)
>>> +1:
>>> +#endif
>>> +
> 
> How about simply doing...
> 
> 	movq	PER_CPU_VAR(current_task), %rax
> 	movq	$TASK_SIZE_MAX, %rcx
> #ifdef CONFIG_BUG_ON_DATA_CORRUPTION
> 	cmpq	%rcx, TASK_addr_limit(%rax)
> 	jne	syscall_return_slowpath
> #else
> 	movq	%rcx, TASK_addr_limit(%rax)
> #endif
> 
> ... and let the slow path take care of BUG.  This should be much faster,
> even with the BUG, and is simpler to boot.
> 

In fact, we could even to the cmpq/jne unconditionally.  I'm guessing
the occasional branch mispredict will be offset by occasionally touching
a clean cacheline in the case of an unconditional store.

Since this is something that should never happen, performance doesn't
matter.

	-hpa

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-14  9:40         ` [kernel-hardening] " H. Peter Anvin
@ 2017-03-14 15:17           ` Thomas Garnier
  -1 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-14 15:17 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ingo Molnar, Martin Schwidefsky, Heiko Carstens, David Howells,
	Arnd Bergmann, Al Viro, Dave Hansen, René Nyffenegger,
	Andrew Morton, Kees Cook, Paul E . McKenney, Andy Lutomirski,
	Ard Biesheuvel, Nicolas Pitre, Petr Mladek,
	Sebastian Andrzej Siewior, Sergey Senozhatsky, Helge Deller,
	Rik van Riel, John Stultz, Thomas Gleixner, Oleg Nesterov

On Tue, Mar 14, 2017 at 2:40 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 03/13/17 17:04, H. Peter Anvin wrote:
>> On 03/11/17 01:42, Ingo Molnar wrote:
>>>>
>>>> +   /*
>>>> +    * Check user-mode state on fast path return, the same check is done
>>>> +    * under the slow path through syscall_return_slowpath.
>>>> +    */
>>>> +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>>>> +   call    verify_pre_usermode_state
>>>> +#else
>>>> +   /*
>>>> +    * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
>>>> +    * warning.
>>>> +    */
>>>> +   movq    PER_CPU_VAR(current_task), %rax
>>>> +   movq    $TASK_SIZE_MAX, %rcx
>>>> +   cmp     %rcx, TASK_addr_limit(%rax)
>>>> +   jz      1f
>>>> +   movq    %rcx, TASK_addr_limit(%rax)
>>>> +1:
>>>> +#endif
>>>> +
>>
>> How about simply doing...
>>
>>       movq    PER_CPU_VAR(current_task), %rax
>>       movq    $TASK_SIZE_MAX, %rcx
>> #ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>>       cmpq    %rcx, TASK_addr_limit(%rax)
>>       jne     syscall_return_slowpath
>> #else
>>       movq    %rcx, TASK_addr_limit(%rax)
>> #endif
>>
>> ... and let the slow path take care of BUG.  This should be much faster,
>> even with the BUG, and is simpler to boot.
>>
>
> In fact, we could even to the cmpq/jne unconditionally.  I'm guessing
> the occasional branch mispredict will be offset by occasionally touching
> a clean cacheline in the case of an unconditional store.
>
> Since this is something that should never happen, performance doesn't
> matter.

Ingo: Which approach do you favor? I want to keep the fast path as
fast as possible obviously.

>
>         -hpa
>
>



-- 
Thomas

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-14 15:17           ` Thomas Garnier
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-14 15:17 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ingo Molnar, Martin Schwidefsky, Heiko Carstens, David Howells,
	Arnd Bergmann, Al Viro, Dave Hansen, René Nyffenegger,
	Andrew Morton, Kees Cook, Paul E . McKenney, Andy Lutomirski,
	Ard Biesheuvel, Nicolas Pitre, Petr Mladek,
	Sebastian Andrzej Siewior, Sergey Senozhatsky, Helge Deller,
	Rik van Riel, John Stultz, Thomas Gleixner, Oleg Nesterov,
	Stephen Smalley, Pavel Tikhomirov, Frederic Weisbecker,
	Stanislav Kinsburskiy, Ingo Molnar, Paolo Bonzini,
	Dmitry Safonov, Borislav Petkov, Josh Poimboeuf, Brian Gerst,
	Jan Beulich, Christian Borntraeger, Fenghua Yu, He Chen,
	Russell King, Vladimir Murzin, Will Deacon, Catalin Marinas,
	Mark Rutland, James Morse, David A . Long, Pratyush Anand,
	Laura Abbott, Andre Przywara, Chris Metcalf, linux-s390, LKML,
	Linux API, the arch/x86 maintainers, linux-arm-kernel,
	Kernel Hardening

On Tue, Mar 14, 2017 at 2:40 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 03/13/17 17:04, H. Peter Anvin wrote:
>> On 03/11/17 01:42, Ingo Molnar wrote:
>>>>
>>>> +   /*
>>>> +    * Check user-mode state on fast path return, the same check is done
>>>> +    * under the slow path through syscall_return_slowpath.
>>>> +    */
>>>> +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>>>> +   call    verify_pre_usermode_state
>>>> +#else
>>>> +   /*
>>>> +    * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
>>>> +    * warning.
>>>> +    */
>>>> +   movq    PER_CPU_VAR(current_task), %rax
>>>> +   movq    $TASK_SIZE_MAX, %rcx
>>>> +   cmp     %rcx, TASK_addr_limit(%rax)
>>>> +   jz      1f
>>>> +   movq    %rcx, TASK_addr_limit(%rax)
>>>> +1:
>>>> +#endif
>>>> +
>>
>> How about simply doing...
>>
>>       movq    PER_CPU_VAR(current_task), %rax
>>       movq    $TASK_SIZE_MAX, %rcx
>> #ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>>       cmpq    %rcx, TASK_addr_limit(%rax)
>>       jne     syscall_return_slowpath
>> #else
>>       movq    %rcx, TASK_addr_limit(%rax)
>> #endif
>>
>> ... and let the slow path take care of BUG.  This should be much faster,
>> even with the BUG, and is simpler to boot.
>>
>
> In fact, we could even to the cmpq/jne unconditionally.  I'm guessing
> the occasional branch mispredict will be offset by occasionally touching
> a clean cacheline in the case of an unconditional store.
>
> Since this is something that should never happen, performance doesn't
> matter.

Ingo: Which approach do you favor? I want to keep the fast path as
fast as possible obviously.

>
>         -hpa
>
>



-- 
Thomas

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-14 15:17           ` [kernel-hardening] " Thomas Garnier
@ 2017-03-14 15:39             ` Andy Lutomirski
  -1 siblings, 0 replies; 46+ messages in thread
From: Andy Lutomirski @ 2017-03-14 15:39 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: H. Peter Anvin, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner

On Tue, Mar 14, 2017 at 8:17 AM, Thomas Garnier <thgarnie@google.com> wrote:
> On Tue, Mar 14, 2017 at 2:40 AM, H. Peter Anvin <hpa@zytor.com> wrote:
>> On 03/13/17 17:04, H. Peter Anvin wrote:
>>> On 03/11/17 01:42, Ingo Molnar wrote:
>>>>>
>>>>> +   /*
>>>>> +    * Check user-mode state on fast path return, the same check is done
>>>>> +    * under the slow path through syscall_return_slowpath.
>>>>> +    */
>>>>> +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>>>>> +   call    verify_pre_usermode_state
>>>>> +#else
>>>>> +   /*
>>>>> +    * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
>>>>> +    * warning.
>>>>> +    */
>>>>> +   movq    PER_CPU_VAR(current_task), %rax
>>>>> +   movq    $TASK_SIZE_MAX, %rcx
>>>>> +   cmp     %rcx, TASK_addr_limit(%rax)
>>>>> +   jz      1f
>>>>> +   movq    %rcx, TASK_addr_limit(%rax)
>>>>> +1:
>>>>> +#endif
>>>>> +
>>>
>>> How about simply doing...
>>>
>>>       movq    PER_CPU_VAR(current_task), %rax
>>>       movq    $TASK_SIZE_MAX, %rcx
>>> #ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>>>       cmpq    %rcx, TASK_addr_limit(%rax)
>>>       jne     syscall_return_slowpath
>>> #else
>>>       movq    %rcx, TASK_addr_limit(%rax)
>>> #endif
>>>
>>> ... and let the slow path take care of BUG.  This should be much faster,
>>> even with the BUG, and is simpler to boot.
>>>
>>
>> In fact, we could even to the cmpq/jne unconditionally.  I'm guessing
>> the occasional branch mispredict will be offset by occasionally touching
>> a clean cacheline in the case of an unconditional store.
>>
>> Since this is something that should never happen, performance doesn't
>> matter.
>
> Ingo: Which approach do you favor? I want to keep the fast path as
> fast as possible obviously.

Even though my name isn't Ingo, Linus keeps trying to get me to be the
actual maintainer of this file.  :)  How about (sorry about whitespace
damage):

#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
       movq    PER_CPU_VAR(current_task), %rax
       bt $63, TASK_addr_limit(%rax)
       jc     syscall_return_slowpath
#endif

Now the kernel is totally unchanged if the config option is off and
it's fast and simple if the option is on.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-14 15:39             ` Andy Lutomirski
  0 siblings, 0 replies; 46+ messages in thread
From: Andy Lutomirski @ 2017-03-14 15:39 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: H. Peter Anvin, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav Kinsburskiy,
	Ingo Molnar, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf,
	linux-s390, LKML, Linux API, the arch/x86 maintainers,
	linux-arm-kernel, Kernel Hardening

On Tue, Mar 14, 2017 at 8:17 AM, Thomas Garnier <thgarnie@google.com> wrote:
> On Tue, Mar 14, 2017 at 2:40 AM, H. Peter Anvin <hpa@zytor.com> wrote:
>> On 03/13/17 17:04, H. Peter Anvin wrote:
>>> On 03/11/17 01:42, Ingo Molnar wrote:
>>>>>
>>>>> +   /*
>>>>> +    * Check user-mode state on fast path return, the same check is done
>>>>> +    * under the slow path through syscall_return_slowpath.
>>>>> +    */
>>>>> +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>>>>> +   call    verify_pre_usermode_state
>>>>> +#else
>>>>> +   /*
>>>>> +    * Similar to set_fs(USER_DS) in verify_pre_usermode_state without a
>>>>> +    * warning.
>>>>> +    */
>>>>> +   movq    PER_CPU_VAR(current_task), %rax
>>>>> +   movq    $TASK_SIZE_MAX, %rcx
>>>>> +   cmp     %rcx, TASK_addr_limit(%rax)
>>>>> +   jz      1f
>>>>> +   movq    %rcx, TASK_addr_limit(%rax)
>>>>> +1:
>>>>> +#endif
>>>>> +
>>>
>>> How about simply doing...
>>>
>>>       movq    PER_CPU_VAR(current_task), %rax
>>>       movq    $TASK_SIZE_MAX, %rcx
>>> #ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>>>       cmpq    %rcx, TASK_addr_limit(%rax)
>>>       jne     syscall_return_slowpath
>>> #else
>>>       movq    %rcx, TASK_addr_limit(%rax)
>>> #endif
>>>
>>> ... and let the slow path take care of BUG.  This should be much faster,
>>> even with the BUG, and is simpler to boot.
>>>
>>
>> In fact, we could even to the cmpq/jne unconditionally.  I'm guessing
>> the occasional branch mispredict will be offset by occasionally touching
>> a clean cacheline in the case of an unconditional store.
>>
>> Since this is something that should never happen, performance doesn't
>> matter.
>
> Ingo: Which approach do you favor? I want to keep the fast path as
> fast as possible obviously.

Even though my name isn't Ingo, Linus keeps trying to get me to be the
actual maintainer of this file.  :)  How about (sorry about whitespace
damage):

#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
       movq    PER_CPU_VAR(current_task), %rax
       bt $63, TASK_addr_limit(%rax)
       jc     syscall_return_slowpath
#endif

Now the kernel is totally unchanged if the config option is off and
it's fast and simple if the option is on.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-14 15:39             ` [kernel-hardening] " Andy Lutomirski
@ 2017-03-14 16:29               ` Thomas Garnier
  -1 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-14 16:29 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: H. Peter Anvin, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner

On Tue, Mar 14, 2017 at 8:39 AM, Andy Lutomirski <luto@amacapital.net> wrote:
> Even though my name isn't Ingo, Linus keeps trying to get me to be the
> actual maintainer of this file.  :)  How about (sorry about whitespace
> damage):

:D

>
> #ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>        movq    PER_CPU_VAR(current_task), %rax
>        bt $63, TASK_addr_limit(%rax)
>        jc     syscall_return_slowpath
> #endif
>
> Now the kernel is totally unchanged if the config option is off and
> it's fast and simple if the option is on.

I like using bt for fast comparison.

We want to enforce the address limit by default, not only when
CONFIG_BUG_ON_DATA_CORRUPTION is enabled. I tested this one:

/* Check user-mode state on fast path return. */
movq PER_CPU_VAR(current_task), %rax
btq $63, TASK_addr_limit(%rax)
jnc 1f
#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
call syscall_return_slowpath
jmp return_from_SYSCALL_64
#else
movq $TASK_SIZE_MAX, %rcx
movq %rcx, TASK_addr_limit(%rax)
#endif
1:

I saw that syscall_return_slowpath is supposed to be called not jumped
to. I could just call verify_pre_usermode_state that would be about
the same.

If we want to avoid if/def then I guess this one is the best I can think of:

/* Check user-mode state on fast path return. */
movq PER_CPU_VAR(current_task), %rax
btq $63, TASK_addr_limit(%rax)
jnc 1f
call verify_pre_usermode_state
1:

The check is fast and the call will happen only on corruption.

What do you think?

-- 
Thomas

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-14 16:29               ` Thomas Garnier
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-14 16:29 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: H. Peter Anvin, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav Kinsburskiy,
	Ingo Molnar, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf,
	linux-s390, LKML, Linux API, the arch/x86 maintainers,
	linux-arm-kernel, Kernel Hardening

On Tue, Mar 14, 2017 at 8:39 AM, Andy Lutomirski <luto@amacapital.net> wrote:
> Even though my name isn't Ingo, Linus keeps trying to get me to be the
> actual maintainer of this file.  :)  How about (sorry about whitespace
> damage):

:D

>
> #ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>        movq    PER_CPU_VAR(current_task), %rax
>        bt $63, TASK_addr_limit(%rax)
>        jc     syscall_return_slowpath
> #endif
>
> Now the kernel is totally unchanged if the config option is off and
> it's fast and simple if the option is on.

I like using bt for fast comparison.

We want to enforce the address limit by default, not only when
CONFIG_BUG_ON_DATA_CORRUPTION is enabled. I tested this one:

/* Check user-mode state on fast path return. */
movq PER_CPU_VAR(current_task), %rax
btq $63, TASK_addr_limit(%rax)
jnc 1f
#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
call syscall_return_slowpath
jmp return_from_SYSCALL_64
#else
movq $TASK_SIZE_MAX, %rcx
movq %rcx, TASK_addr_limit(%rax)
#endif
1:

I saw that syscall_return_slowpath is supposed to be called not jumped
to. I could just call verify_pre_usermode_state that would be about
the same.

If we want to avoid if/def then I guess this one is the best I can think of:

/* Check user-mode state on fast path return. */
movq PER_CPU_VAR(current_task), %rax
btq $63, TASK_addr_limit(%rax)
jnc 1f
call verify_pre_usermode_state
1:

The check is fast and the call will happen only on corruption.

What do you think?

-- 
Thomas

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-14 15:39             ` [kernel-hardening] " Andy Lutomirski
@ 2017-03-14 16:30               ` H. Peter Anvin
  -1 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-14 16:30 UTC (permalink / raw)
  To: Andy Lutomirski, Thomas Garnier
  Cc: Ingo Molnar, Martin Schwidefsky, Heiko Carstens, David Howells,
	Arnd Bergmann, Al Viro, Dave Hansen, René Nyffenegger,
	Andrew Morton, Kees Cook, Paul E . McKenney, Andy Lutomirski,
	Ard Biesheuvel, Nicolas Pitre, Petr Mladek,
	Sebastian Andrzej Siewior, Sergey Senozhatsky, Helge Deller,
	Rik van Riel

On 03/14/17 08:39, Andy Lutomirski wrote:
>>
>> Ingo: Which approach do you favor? I want to keep the fast path as
>> fast as possible obviously.
> 
> Even though my name isn't Ingo, Linus keeps trying to get me to be the
> actual maintainer of this file.  :)  How about (sorry about whitespace
> damage):
> 
> #ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>        movq    PER_CPU_VAR(current_task), %rax
>        bt $63, TASK_addr_limit(%rax)
>        jc     syscall_return_slowpath
> #endif
> 
> Now the kernel is totally unchanged if the config option is off and
> it's fast and simple if the option is on.
> 

The idea as far as I understand was that the option was about whether or
not to clobber the broken value or BUG on it, not to remove the check.
My point, though, was that we can bail out to the slow path if there is
a discrepancy and worry about BUG or not there; performance doesn't
matter one iota if this triggers regardless of the remediation.

It isn't clear that using bt would be faster, though; although it saves
an instruction that instruction can be hoisted arbitrarily and so is
extremely likely to be hidden in the pipeline.  cmp (which is really a
variant of sub) is one of the basic ALU instructions that are
super-optimized on every CPU, whereas bt is substantially slower on some
implementations.

This version is also "slightly less secure" since it would make it
possible to overwrite the guard page at the end of TASK_SIZE_MAX if one
could figure out a way to put an arbitrary value into this variable, but
I doubt that matters in any way.

	-hpa

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-14 16:30               ` H. Peter Anvin
  0 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-14 16:30 UTC (permalink / raw)
  To: Andy Lutomirski, Thomas Garnier
  Cc: Ingo Molnar, Martin Schwidefsky, Heiko Carstens, David Howells,
	Arnd Bergmann, Al Viro, Dave Hansen, René Nyffenegger,
	Andrew Morton, Kees Cook, Paul E . McKenney, Andy Lutomirski,
	Ard Biesheuvel, Nicolas Pitre, Petr Mladek,
	Sebastian Andrzej Siewior, Sergey Senozhatsky, Helge Deller,
	Rik van Riel, John Stultz, Thomas Gleixner, Oleg Nesterov,
	Stephen Smalley, Pavel Tikhomirov, Frederic Weisbecker,
	Stanislav Kinsburskiy, Ingo Molnar, Paolo Bonzini,
	Dmitry Safonov, Borislav Petkov, Josh Poimboeuf, Brian Gerst,
	Jan Beulich, Christian Borntraeger, Fenghua Yu, He Chen,
	Russell King, Vladimir Murzin, Will Deacon, Catalin Marinas,
	Mark Rutland, James Morse, David A . Long, Pratyush Anand,
	Laura Abbott, Andre Przywara, Chris Metcalf, linux-s390, LKML,
	Linux API, the arch/x86 maintainers, linux-arm-kernel,
	Kernel Hardening

On 03/14/17 08:39, Andy Lutomirski wrote:
>>
>> Ingo: Which approach do you favor? I want to keep the fast path as
>> fast as possible obviously.
> 
> Even though my name isn't Ingo, Linus keeps trying to get me to be the
> actual maintainer of this file.  :)  How about (sorry about whitespace
> damage):
> 
> #ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>        movq    PER_CPU_VAR(current_task), %rax
>        bt $63, TASK_addr_limit(%rax)
>        jc     syscall_return_slowpath
> #endif
> 
> Now the kernel is totally unchanged if the config option is off and
> it's fast and simple if the option is on.
> 

The idea as far as I understand was that the option was about whether or
not to clobber the broken value or BUG on it, not to remove the check.
My point, though, was that we can bail out to the slow path if there is
a discrepancy and worry about BUG or not there; performance doesn't
matter one iota if this triggers regardless of the remediation.

It isn't clear that using bt would be faster, though; although it saves
an instruction that instruction can be hoisted arbitrarily and so is
extremely likely to be hidden in the pipeline.  cmp (which is really a
variant of sub) is one of the basic ALU instructions that are
super-optimized on every CPU, whereas bt is substantially slower on some
implementations.

This version is also "slightly less secure" since it would make it
possible to overwrite the guard page at the end of TASK_SIZE_MAX if one
could figure out a way to put an arbitrary value into this variable, but
I doubt that matters in any way.

	-hpa

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-14 16:29               ` [kernel-hardening] " Thomas Garnier
@ 2017-03-14 16:44                 ` H. Peter Anvin
  -1 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-14 16:44 UTC (permalink / raw)
  To: Thomas Garnier, Andy Lutomirski
  Cc: Ingo Molnar, Martin Schwidefsky, Heiko Carstens, David Howells,
	Arnd Bergmann, Al Viro, Dave Hansen, René Nyffenegger,
	Andrew Morton, Kees Cook, Paul E . McKenney, Andy Lutomirski,
	Ard Biesheuvel, Nicolas Pitre, Petr Mladek,
	Sebastian Andrzej Siewior, Sergey Senozhatsky, Helge Deller,
	Rik van Riel

On 03/14/17 09:29, Thomas Garnier wrote:
> 
> We want to enforce the address limit by default, not only when
> CONFIG_BUG_ON_DATA_CORRUPTION is enabled. I tested this one:
> 
> /* Check user-mode state on fast path return. */
> movq PER_CPU_VAR(current_task), %rax
> btq $63, TASK_addr_limit(%rax)
> jnc 1f
> #ifdef CONFIG_BUG_ON_DATA_CORRUPTION
> call syscall_return_slowpath
> jmp return_from_SYSCALL_64
> #else
> movq $TASK_SIZE_MAX, %rcx
> movq %rcx, TASK_addr_limit(%rax)
> #endif
> 1:
> 
> I saw that syscall_return_slowpath is supposed to be called not jumped
> to. I could just call verify_pre_usermode_state that would be about
> the same.

I wanted to comment on that thing: why on earth isn't
verify_pre_usermode_state() an inline?  Making it an out-of-line
function adds pointless extra overhead to the C code when we are talking
about a few instructions.

Second, you never do a branch-around to handle an exceptional condition
on the fast path: you jump *out of line* to handle the special
condition; a forward branch is preferred since it is slightly more
likely to be predicted not taken.

Now, I finally had a chance to actually look at the full file (I was
preoccupied yesterday), and am a bit disappointed, to say the least.

First of all, the jump target you need is only a handful of instructions
further down the code path; you need to do *exactly* that is done when
the test of _TIF_ALLWORK_MASK right above is tested!  Not only that, but
you already have PER_CPU_VAR(current_task) in %r11 just ready to be
used!  This was all in the three instructions immediately prior to the
code you modified...

So, all you'd need would be:

	movq $TASK_SIZE_MAX, %rcx
	cmpq %rcx, TASK_addr_limit(%r11)
	jne 1f

We even get a short jump instruction!

(Using bt saves one more instruction, but see previous caveats about it.)

	-hpa

	

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-14 16:44                 ` H. Peter Anvin
  0 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-14 16:44 UTC (permalink / raw)
  To: Thomas Garnier, Andy Lutomirski
  Cc: Ingo Molnar, Martin Schwidefsky, Heiko Carstens, David Howells,
	Arnd Bergmann, Al Viro, Dave Hansen, René Nyffenegger,
	Andrew Morton, Kees Cook, Paul E . McKenney, Andy Lutomirski,
	Ard Biesheuvel, Nicolas Pitre, Petr Mladek,
	Sebastian Andrzej Siewior, Sergey Senozhatsky, Helge Deller,
	Rik van Riel, John Stultz, Thomas Gleixner, Oleg Nesterov,
	Stephen Smalley, Pavel Tikhomirov, Frederic Weisbecker,
	Stanislav Kinsburskiy, Ingo Molnar, Paolo Bonzini,
	Dmitry Safonov, Borislav Petkov, Josh Poimboeuf, Brian Gerst,
	Jan Beulich, Christian Borntraeger, Fenghua Yu, He Chen,
	Russell King, Vladimir Murzin, Will Deacon, Catalin Marinas,
	Mark Rutland, James Morse, David A . Long, Pratyush Anand,
	Laura Abbott, Andre Przywara, Chris Metcalf, linux-s390, LKML,
	Linux API, the arch/x86 maintainers, linux-arm-kernel,
	Kernel Hardening

On 03/14/17 09:29, Thomas Garnier wrote:
> 
> We want to enforce the address limit by default, not only when
> CONFIG_BUG_ON_DATA_CORRUPTION is enabled. I tested this one:
> 
> /* Check user-mode state on fast path return. */
> movq PER_CPU_VAR(current_task), %rax
> btq $63, TASK_addr_limit(%rax)
> jnc 1f
> #ifdef CONFIG_BUG_ON_DATA_CORRUPTION
> call syscall_return_slowpath
> jmp return_from_SYSCALL_64
> #else
> movq $TASK_SIZE_MAX, %rcx
> movq %rcx, TASK_addr_limit(%rax)
> #endif
> 1:
> 
> I saw that syscall_return_slowpath is supposed to be called not jumped
> to. I could just call verify_pre_usermode_state that would be about
> the same.

I wanted to comment on that thing: why on earth isn't
verify_pre_usermode_state() an inline?  Making it an out-of-line
function adds pointless extra overhead to the C code when we are talking
about a few instructions.

Second, you never do a branch-around to handle an exceptional condition
on the fast path: you jump *out of line* to handle the special
condition; a forward branch is preferred since it is slightly more
likely to be predicted not taken.

Now, I finally had a chance to actually look at the full file (I was
preoccupied yesterday), and am a bit disappointed, to say the least.

First of all, the jump target you need is only a handful of instructions
further down the code path; you need to do *exactly* that is done when
the test of _TIF_ALLWORK_MASK right above is tested!  Not only that, but
you already have PER_CPU_VAR(current_task) in %r11 just ready to be
used!  This was all in the three instructions immediately prior to the
code you modified...

So, all you'd need would be:

	movq $TASK_SIZE_MAX, %rcx
	cmpq %rcx, TASK_addr_limit(%r11)
	jne 1f

We even get a short jump instruction!

(Using bt saves one more instruction, but see previous caveats about it.)

	-hpa

	

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-14 16:44                 ` [kernel-hardening] " H. Peter Anvin
@ 2017-03-14 16:51                   ` Thomas Garnier
  -1 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-14 16:51 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner

On Tue, Mar 14, 2017 at 9:44 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 03/14/17 09:29, Thomas Garnier wrote:
>>
>> We want to enforce the address limit by default, not only when
>> CONFIG_BUG_ON_DATA_CORRUPTION is enabled. I tested this one:
>>
>> /* Check user-mode state on fast path return. */
>> movq PER_CPU_VAR(current_task), %rax
>> btq $63, TASK_addr_limit(%rax)
>> jnc 1f
>> #ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>> call syscall_return_slowpath
>> jmp return_from_SYSCALL_64
>> #else
>> movq $TASK_SIZE_MAX, %rcx
>> movq %rcx, TASK_addr_limit(%rax)
>> #endif
>> 1:
>>
>> I saw that syscall_return_slowpath is supposed to be called not jumped
>> to. I could just call verify_pre_usermode_state that would be about
>> the same.
>
> I wanted to comment on that thing: why on earth isn't
> verify_pre_usermode_state() an inline?  Making it an out-of-line
> function adds pointless extra overhead to the C code when we are talking
> about a few instructions.

Because outside of arch specific implementation it is called by each
syscall handler. it will increase the code size a lot.

>
> Second, you never do a branch-around to handle an exceptional condition
> on the fast path: you jump *out of line* to handle the special
> condition; a forward branch is preferred since it is slightly more
> likely to be predicted not taken.
>
> Now, I finally had a chance to actually look at the full file (I was
> preoccupied yesterday), and am a bit disappointed, to say the least.
>
> First of all, the jump target you need is only a handful of instructions
> further down the code path; you need to do *exactly* that is done when
> the test of _TIF_ALLWORK_MASK right above is tested!  Not only that, but
> you already have PER_CPU_VAR(current_task) in %r11 just ready to be
> used!  This was all in the three instructions immediately prior to the
> code you modified...

Correct.

>
> So, all you'd need would be:
>
>         movq $TASK_SIZE_MAX, %rcx
>         cmpq %rcx, TASK_addr_limit(%r11)
>         jne 1f
>
> We even get a short jump instruction!
>
> (Using bt saves one more instruction, but see previous caveats about it.)

Okay that seems fair to me. Andy what do you think? (Given you
suggested the bt). Ingo?

Thanks for the feedback.

>
>         -hpa
>
>
>
>



-- 
Thomas

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-14 16:51                   ` Thomas Garnier
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-14 16:51 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav Kinsburskiy,
	Ingo Molnar, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf,
	linux-s390, LKML, Linux API, the arch/x86 maintainers,
	linux-arm-kernel, Kernel Hardening

On Tue, Mar 14, 2017 at 9:44 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 03/14/17 09:29, Thomas Garnier wrote:
>>
>> We want to enforce the address limit by default, not only when
>> CONFIG_BUG_ON_DATA_CORRUPTION is enabled. I tested this one:
>>
>> /* Check user-mode state on fast path return. */
>> movq PER_CPU_VAR(current_task), %rax
>> btq $63, TASK_addr_limit(%rax)
>> jnc 1f
>> #ifdef CONFIG_BUG_ON_DATA_CORRUPTION
>> call syscall_return_slowpath
>> jmp return_from_SYSCALL_64
>> #else
>> movq $TASK_SIZE_MAX, %rcx
>> movq %rcx, TASK_addr_limit(%rax)
>> #endif
>> 1:
>>
>> I saw that syscall_return_slowpath is supposed to be called not jumped
>> to. I could just call verify_pre_usermode_state that would be about
>> the same.
>
> I wanted to comment on that thing: why on earth isn't
> verify_pre_usermode_state() an inline?  Making it an out-of-line
> function adds pointless extra overhead to the C code when we are talking
> about a few instructions.

Because outside of arch specific implementation it is called by each
syscall handler. it will increase the code size a lot.

>
> Second, you never do a branch-around to handle an exceptional condition
> on the fast path: you jump *out of line* to handle the special
> condition; a forward branch is preferred since it is slightly more
> likely to be predicted not taken.
>
> Now, I finally had a chance to actually look at the full file (I was
> preoccupied yesterday), and am a bit disappointed, to say the least.
>
> First of all, the jump target you need is only a handful of instructions
> further down the code path; you need to do *exactly* that is done when
> the test of _TIF_ALLWORK_MASK right above is tested!  Not only that, but
> you already have PER_CPU_VAR(current_task) in %r11 just ready to be
> used!  This was all in the three instructions immediately prior to the
> code you modified...

Correct.

>
> So, all you'd need would be:
>
>         movq $TASK_SIZE_MAX, %rcx
>         cmpq %rcx, TASK_addr_limit(%r11)
>         jne 1f
>
> We even get a short jump instruction!
>
> (Using bt saves one more instruction, but see previous caveats about it.)

Okay that seems fair to me. Andy what do you think? (Given you
suggested the bt). Ingo?

Thanks for the feedback.

>
>         -hpa
>
>
>
>



-- 
Thomas

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-14 16:51                   ` [kernel-hardening] " Thomas Garnier
@ 2017-03-14 17:53                     ` H. Peter Anvin
  -1 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-14 17:53 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller

On 03/14/17 09:51, Thomas Garnier wrote:
>>
>> I wanted to comment on that thing: why on earth isn't
>> verify_pre_usermode_state() an inline?  Making it an out-of-line
>> function adds pointless extra overhead to the C code when we are talking
>> about a few instructions.
> 
> Because outside of arch specific implementation it is called by each
> syscall handler. it will increase the code size a lot.
> 

Don't assume that.  On a lot of architectures a function call can be
more expensive than a simple compare and branch, because the compiler
has to assume a whole bunch of registers are lost at that point.

Either way, don't penalize the common architectures for it.  Not okay.

	-hpa

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-14 17:53                     ` H. Peter Anvin
  0 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-14 17:53 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav Kinsburskiy,
	Ingo Molnar, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf,
	linux-s390, LKML, Linux API, the arch/x86 maintainers,
	linux-arm-kernel, Kernel Hardening

On 03/14/17 09:51, Thomas Garnier wrote:
>>
>> I wanted to comment on that thing: why on earth isn't
>> verify_pre_usermode_state() an inline?  Making it an out-of-line
>> function adds pointless extra overhead to the C code when we are talking
>> about a few instructions.
> 
> Because outside of arch specific implementation it is called by each
> syscall handler. it will increase the code size a lot.
> 

Don't assume that.  On a lot of architectures a function call can be
more expensive than a simple compare and branch, because the compiler
has to assume a whole bunch of registers are lost at that point.

Either way, don't penalize the common architectures for it.  Not okay.

	-hpa

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-14 17:53                     ` [kernel-hardening] " H. Peter Anvin
@ 2017-03-15 17:43                       ` Thomas Garnier
  -1 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-15 17:43 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner

Thanks for the feedback. I will look into inlining by default (looking
at code size on different arch), the updated patch for x86 in the
meantime:
===========

Implement specific usage of verify_pre_usermode_state for user-mode
returns for x86.
---
Based on next-20170308
---
 arch/x86/Kconfig                        |  1 +
 arch/x86/entry/common.c                 |  3 +++
 arch/x86/entry/entry_64.S               |  8 ++++++++
 arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
 arch/x86/include/asm/processor.h        | 11 -----------
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 005df7c825f5..6d48e18e6f09 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -63,6 +63,7 @@ config X86
  select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
  select ARCH_MIGHT_HAVE_PC_PARPORT
  select ARCH_MIGHT_HAVE_PC_SERIO
+ select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
  select ARCH_SUPPORTS_ATOMIC_RMW
  select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
  select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 370c42c7f046..525edbb77f03 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -22,6 +22,7 @@
 #include <linux/context_tracking.h>
 #include <linux/user-return-notifier.h>
 #include <linux/uprobes.h>
+#include <linux/syscalls.h>

 #include <asm/desc.h>
 #include <asm/traps.h>
@@ -180,6 +181,8 @@ __visible inline void
prepare_exit_to_usermode(struct pt_regs *regs)
  struct thread_info *ti = current_thread_info();
  u32 cached_flags;

+ verify_pre_usermode_state();
+
  if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
  local_irq_disable();

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d2b2a2948ffe..c079b010205c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -218,6 +218,14 @@ entry_SYSCALL_64_fastpath:
  testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
  jnz 1f

+ /*
+ * If address limit is not based on user-mode, jump to slow path for
+ * additional security checks.
+ */
+ movq $TASK_SIZE_MAX, %rcx
+ cmp %rcx, TASK_addr_limit(%r11)
+ jnz 1f
+
  LOCKDEP_SYS_EXIT
  TRACE_IRQS_ON /* user mode is traced as IRQs on */
  movq RIP(%rsp), %rcx
diff --git a/arch/x86/include/asm/pgtable_64_types.h
b/arch/x86/include/asm/pgtable_64_types.h
index 3a264200c62f..0fbbb79d058c 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -76,4 +76,15 @@ typedef struct { pteval_t pte; } pte_t;

 #define EARLY_DYNAMIC_PAGE_TABLES 64

+/*
+ * User space process size. 47bits minus one guard page.  The guard
+ * page is necessary on Intel CPUs: if a SYSCALL instruction is at
+ * the highest possible canonical userspace address, then that
+ * syscall will enter the kernel with a non-canonical return
+ * address, and SYSRET will explode dangerously.  We avoid this
+ * particular problem by preventing anything from being mapped
+ * at the maximum canonical address.
+ */
+#define TASK_SIZE_MAX ((_AC(1, UL) << 47) - PAGE_SIZE)
+
 #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f385eca5407a..9bc99d37133e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -829,17 +829,6 @@ static inline void spin_lock_prefetch(const void *x)
 #define KSTK_ESP(task) (task_pt_regs(task)->sp)

 #else
-/*
- * User space process size. 47bits minus one guard page.  The guard
- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
- * the highest possible canonical userspace address, then that
- * syscall will enter the kernel with a non-canonical return
- * address, and SYSRET will explode dangerously.  We avoid this
- * particular problem by preventing anything from being mapped
- * at the maximum canonical address.
- */
-#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
-
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-- 
2.12.0.367.g23dc2f6d3c-goog


On Tue, Mar 14, 2017 at 10:53 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 03/14/17 09:51, Thomas Garnier wrote:
>>>
>>> I wanted to comment on that thing: why on earth isn't
>>> verify_pre_usermode_state() an inline?  Making it an out-of-line
>>> function adds pointless extra overhead to the C code when we are talking
>>> about a few instructions.
>>
>> Because outside of arch specific implementation it is called by each
>> syscall handler. it will increase the code size a lot.
>>
>
> Don't assume that.  On a lot of architectures a function call can be
> more expensive than a simple compare and branch, because the compiler
> has to assume a whole bunch of registers are lost at that point.
>
> Either way, don't penalize the common architectures for it.  Not okay.
>
>         -hpa
>



-- 
Thomas

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-15 17:43                       ` Thomas Garnier
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-15 17:43 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav Kinsburskiy,
	Ingo Molnar, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf,
	linux-s390, LKML, Linux API, the arch/x86 maintainers,
	linux-arm-kernel, Kernel Hardening

Thanks for the feedback. I will look into inlining by default (looking
at code size on different arch), the updated patch for x86 in the
meantime:
===========

Implement specific usage of verify_pre_usermode_state for user-mode
returns for x86.
---
Based on next-20170308
---
 arch/x86/Kconfig                        |  1 +
 arch/x86/entry/common.c                 |  3 +++
 arch/x86/entry/entry_64.S               |  8 ++++++++
 arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
 arch/x86/include/asm/processor.h        | 11 -----------
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 005df7c825f5..6d48e18e6f09 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -63,6 +63,7 @@ config X86
  select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
  select ARCH_MIGHT_HAVE_PC_PARPORT
  select ARCH_MIGHT_HAVE_PC_SERIO
+ select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
  select ARCH_SUPPORTS_ATOMIC_RMW
  select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
  select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 370c42c7f046..525edbb77f03 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -22,6 +22,7 @@
 #include <linux/context_tracking.h>
 #include <linux/user-return-notifier.h>
 #include <linux/uprobes.h>
+#include <linux/syscalls.h>

 #include <asm/desc.h>
 #include <asm/traps.h>
@@ -180,6 +181,8 @@ __visible inline void
prepare_exit_to_usermode(struct pt_regs *regs)
  struct thread_info *ti = current_thread_info();
  u32 cached_flags;

+ verify_pre_usermode_state();
+
  if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
  local_irq_disable();

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d2b2a2948ffe..c079b010205c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -218,6 +218,14 @@ entry_SYSCALL_64_fastpath:
  testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
  jnz 1f

+ /*
+ * If address limit is not based on user-mode, jump to slow path for
+ * additional security checks.
+ */
+ movq $TASK_SIZE_MAX, %rcx
+ cmp %rcx, TASK_addr_limit(%r11)
+ jnz 1f
+
  LOCKDEP_SYS_EXIT
  TRACE_IRQS_ON /* user mode is traced as IRQs on */
  movq RIP(%rsp), %rcx
diff --git a/arch/x86/include/asm/pgtable_64_types.h
b/arch/x86/include/asm/pgtable_64_types.h
index 3a264200c62f..0fbbb79d058c 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -76,4 +76,15 @@ typedef struct { pteval_t pte; } pte_t;

 #define EARLY_DYNAMIC_PAGE_TABLES 64

+/*
+ * User space process size. 47bits minus one guard page.  The guard
+ * page is necessary on Intel CPUs: if a SYSCALL instruction is at
+ * the highest possible canonical userspace address, then that
+ * syscall will enter the kernel with a non-canonical return
+ * address, and SYSRET will explode dangerously.  We avoid this
+ * particular problem by preventing anything from being mapped
+ * at the maximum canonical address.
+ */
+#define TASK_SIZE_MAX ((_AC(1, UL) << 47) - PAGE_SIZE)
+
 #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f385eca5407a..9bc99d37133e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -829,17 +829,6 @@ static inline void spin_lock_prefetch(const void *x)
 #define KSTK_ESP(task) (task_pt_regs(task)->sp)

 #else
-/*
- * User space process size. 47bits minus one guard page.  The guard
- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
- * the highest possible canonical userspace address, then that
- * syscall will enter the kernel with a non-canonical return
- * address, and SYSRET will explode dangerously.  We avoid this
- * particular problem by preventing anything from being mapped
- * at the maximum canonical address.
- */
-#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
-
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-- 
2.12.0.367.g23dc2f6d3c-goog


On Tue, Mar 14, 2017 at 10:53 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 03/14/17 09:51, Thomas Garnier wrote:
>>>
>>> I wanted to comment on that thing: why on earth isn't
>>> verify_pre_usermode_state() an inline?  Making it an out-of-line
>>> function adds pointless extra overhead to the C code when we are talking
>>> about a few instructions.
>>
>> Because outside of arch specific implementation it is called by each
>> syscall handler. it will increase the code size a lot.
>>
>
> Don't assume that.  On a lot of architectures a function call can be
> more expensive than a simple compare and branch, because the compiler
> has to assume a whole bunch of registers are lost at that point.
>
> Either way, don't penalize the common architectures for it.  Not okay.
>
>         -hpa
>



-- 
Thomas

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-15 17:43                       ` [kernel-hardening] " Thomas Garnier
@ 2017-03-22 19:15                         ` Thomas Garnier
  -1 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-22 19:15 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner

On Wed, Mar 15, 2017 at 10:43 AM, Thomas Garnier <thgarnie@google.com> wrote:
> Thanks for the feedback. I will look into inlining by default (looking
> at code size on different arch), the updated patch for x86 in the
> meantime:

I did couple checks and it doesn't seem worth it. I will send a v4
with the change below for additional feedback.

> ===========
>
> Implement specific usage of verify_pre_usermode_state for user-mode
> returns for x86.
> ---
> Based on next-20170308
> ---
>  arch/x86/Kconfig                        |  1 +
>  arch/x86/entry/common.c                 |  3 +++
>  arch/x86/entry/entry_64.S               |  8 ++++++++
>  arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
>  arch/x86/include/asm/processor.h        | 11 -----------
>  5 files changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 005df7c825f5..6d48e18e6f09 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -63,6 +63,7 @@ config X86
>   select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
>   select ARCH_MIGHT_HAVE_PC_PARPORT
>   select ARCH_MIGHT_HAVE_PC_SERIO
> + select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>   select ARCH_SUPPORTS_ATOMIC_RMW
>   select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
>   select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index 370c42c7f046..525edbb77f03 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -22,6 +22,7 @@
>  #include <linux/context_tracking.h>
>  #include <linux/user-return-notifier.h>
>  #include <linux/uprobes.h>
> +#include <linux/syscalls.h>
>
>  #include <asm/desc.h>
>  #include <asm/traps.h>
> @@ -180,6 +181,8 @@ __visible inline void
> prepare_exit_to_usermode(struct pt_regs *regs)
>   struct thread_info *ti = current_thread_info();
>   u32 cached_flags;
>
> + verify_pre_usermode_state();
> +
>   if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
>   local_irq_disable();
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index d2b2a2948ffe..c079b010205c 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -218,6 +218,14 @@ entry_SYSCALL_64_fastpath:
>   testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
>   jnz 1f
>
> + /*
> + * If address limit is not based on user-mode, jump to slow path for
> + * additional security checks.
> + */
> + movq $TASK_SIZE_MAX, %rcx
> + cmp %rcx, TASK_addr_limit(%r11)
> + jnz 1f
> +
>   LOCKDEP_SYS_EXIT
>   TRACE_IRQS_ON /* user mode is traced as IRQs on */
>   movq RIP(%rsp), %rcx
> diff --git a/arch/x86/include/asm/pgtable_64_types.h
> b/arch/x86/include/asm/pgtable_64_types.h
> index 3a264200c62f..0fbbb79d058c 100644
> --- a/arch/x86/include/asm/pgtable_64_types.h
> +++ b/arch/x86/include/asm/pgtable_64_types.h
> @@ -76,4 +76,15 @@ typedef struct { pteval_t pte; } pte_t;
>
>  #define EARLY_DYNAMIC_PAGE_TABLES 64
>
> +/*
> + * User space process size. 47bits minus one guard page.  The guard
> + * page is necessary on Intel CPUs: if a SYSCALL instruction is at
> + * the highest possible canonical userspace address, then that
> + * syscall will enter the kernel with a non-canonical return
> + * address, and SYSRET will explode dangerously.  We avoid this
> + * particular problem by preventing anything from being mapped
> + * at the maximum canonical address.
> + */
> +#define TASK_SIZE_MAX ((_AC(1, UL) << 47) - PAGE_SIZE)
> +
>  #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index f385eca5407a..9bc99d37133e 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -829,17 +829,6 @@ static inline void spin_lock_prefetch(const void *x)
>  #define KSTK_ESP(task) (task_pt_regs(task)->sp)
>
>  #else
> -/*
> - * User space process size. 47bits minus one guard page.  The guard
> - * page is necessary on Intel CPUs: if a SYSCALL instruction is at
> - * the highest possible canonical userspace address, then that
> - * syscall will enter the kernel with a non-canonical return
> - * address, and SYSRET will explode dangerously.  We avoid this
> - * particular problem by preventing anything from being mapped
> - * at the maximum canonical address.
> - */
> -#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
> -
>  /* This decides where the kernel will search for a free chunk of vm
>   * space during mmap's.
>   */
> --
> 2.12.0.367.g23dc2f6d3c-goog
>
>
> On Tue, Mar 14, 2017 at 10:53 AM, H. Peter Anvin <hpa@zytor.com> wrote:
>> On 03/14/17 09:51, Thomas Garnier wrote:
>>>>
>>>> I wanted to comment on that thing: why on earth isn't
>>>> verify_pre_usermode_state() an inline?  Making it an out-of-line
>>>> function adds pointless extra overhead to the C code when we are talking
>>>> about a few instructions.
>>>
>>> Because outside of arch specific implementation it is called by each
>>> syscall handler. it will increase the code size a lot.
>>>
>>
>> Don't assume that.  On a lot of architectures a function call can be
>> more expensive than a simple compare and branch, because the compiler
>> has to assume a whole bunch of registers are lost at that point.
>>
>> Either way, don't penalize the common architectures for it.  Not okay.
>>
>>         -hpa
>>
>
>
>
> --
> Thomas



-- 
Thomas

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-22 19:15                         ` Thomas Garnier
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-22 19:15 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav Kinsburskiy,
	Ingo Molnar, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf,
	linux-s390, LKML, Linux API, the arch/x86 maintainers,
	linux-arm-kernel, Kernel Hardening

On Wed, Mar 15, 2017 at 10:43 AM, Thomas Garnier <thgarnie@google.com> wrote:
> Thanks for the feedback. I will look into inlining by default (looking
> at code size on different arch), the updated patch for x86 in the
> meantime:

I did couple checks and it doesn't seem worth it. I will send a v4
with the change below for additional feedback.

> ===========
>
> Implement specific usage of verify_pre_usermode_state for user-mode
> returns for x86.
> ---
> Based on next-20170308
> ---
>  arch/x86/Kconfig                        |  1 +
>  arch/x86/entry/common.c                 |  3 +++
>  arch/x86/entry/entry_64.S               |  8 ++++++++
>  arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
>  arch/x86/include/asm/processor.h        | 11 -----------
>  5 files changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 005df7c825f5..6d48e18e6f09 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -63,6 +63,7 @@ config X86
>   select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
>   select ARCH_MIGHT_HAVE_PC_PARPORT
>   select ARCH_MIGHT_HAVE_PC_SERIO
> + select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>   select ARCH_SUPPORTS_ATOMIC_RMW
>   select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
>   select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index 370c42c7f046..525edbb77f03 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -22,6 +22,7 @@
>  #include <linux/context_tracking.h>
>  #include <linux/user-return-notifier.h>
>  #include <linux/uprobes.h>
> +#include <linux/syscalls.h>
>
>  #include <asm/desc.h>
>  #include <asm/traps.h>
> @@ -180,6 +181,8 @@ __visible inline void
> prepare_exit_to_usermode(struct pt_regs *regs)
>   struct thread_info *ti = current_thread_info();
>   u32 cached_flags;
>
> + verify_pre_usermode_state();
> +
>   if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
>   local_irq_disable();
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index d2b2a2948ffe..c079b010205c 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -218,6 +218,14 @@ entry_SYSCALL_64_fastpath:
>   testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
>   jnz 1f
>
> + /*
> + * If address limit is not based on user-mode, jump to slow path for
> + * additional security checks.
> + */
> + movq $TASK_SIZE_MAX, %rcx
> + cmp %rcx, TASK_addr_limit(%r11)
> + jnz 1f
> +
>   LOCKDEP_SYS_EXIT
>   TRACE_IRQS_ON /* user mode is traced as IRQs on */
>   movq RIP(%rsp), %rcx
> diff --git a/arch/x86/include/asm/pgtable_64_types.h
> b/arch/x86/include/asm/pgtable_64_types.h
> index 3a264200c62f..0fbbb79d058c 100644
> --- a/arch/x86/include/asm/pgtable_64_types.h
> +++ b/arch/x86/include/asm/pgtable_64_types.h
> @@ -76,4 +76,15 @@ typedef struct { pteval_t pte; } pte_t;
>
>  #define EARLY_DYNAMIC_PAGE_TABLES 64
>
> +/*
> + * User space process size. 47bits minus one guard page.  The guard
> + * page is necessary on Intel CPUs: if a SYSCALL instruction is at
> + * the highest possible canonical userspace address, then that
> + * syscall will enter the kernel with a non-canonical return
> + * address, and SYSRET will explode dangerously.  We avoid this
> + * particular problem by preventing anything from being mapped
> + * at the maximum canonical address.
> + */
> +#define TASK_SIZE_MAX ((_AC(1, UL) << 47) - PAGE_SIZE)
> +
>  #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index f385eca5407a..9bc99d37133e 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -829,17 +829,6 @@ static inline void spin_lock_prefetch(const void *x)
>  #define KSTK_ESP(task) (task_pt_regs(task)->sp)
>
>  #else
> -/*
> - * User space process size. 47bits minus one guard page.  The guard
> - * page is necessary on Intel CPUs: if a SYSCALL instruction is at
> - * the highest possible canonical userspace address, then that
> - * syscall will enter the kernel with a non-canonical return
> - * address, and SYSRET will explode dangerously.  We avoid this
> - * particular problem by preventing anything from being mapped
> - * at the maximum canonical address.
> - */
> -#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
> -
>  /* This decides where the kernel will search for a free chunk of vm
>   * space during mmap's.
>   */
> --
> 2.12.0.367.g23dc2f6d3c-goog
>
>
> On Tue, Mar 14, 2017 at 10:53 AM, H. Peter Anvin <hpa@zytor.com> wrote:
>> On 03/14/17 09:51, Thomas Garnier wrote:
>>>>
>>>> I wanted to comment on that thing: why on earth isn't
>>>> verify_pre_usermode_state() an inline?  Making it an out-of-line
>>>> function adds pointless extra overhead to the C code when we are talking
>>>> about a few instructions.
>>>
>>> Because outside of arch specific implementation it is called by each
>>> syscall handler. it will increase the code size a lot.
>>>
>>
>> Don't assume that.  On a lot of architectures a function call can be
>> more expensive than a simple compare and branch, because the compiler
>> has to assume a whole bunch of registers are lost at that point.
>>
>> Either way, don't penalize the common architectures for it.  Not okay.
>>
>>         -hpa
>>
>
>
>
> --
> Thomas



-- 
Thomas

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-22 19:15                         ` [kernel-hardening] " Thomas Garnier
@ 2017-03-22 20:21                           ` H. Peter Anvin
  -1 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-22 20:21 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller

On 03/22/17 12:15, Thomas Garnier wrote:
> On Wed, Mar 15, 2017 at 10:43 AM, Thomas Garnier <thgarnie@google.com> wrote:
>> Thanks for the feedback. I will look into inlining by default (looking
>> at code size on different arch), the updated patch for x86 in the
>> meantime:
> 
> I did couple checks and it doesn't seem worth it. I will send a v4
> with the change below for additional feedback.

Can you specify what that means?

On x86, where there is only one caller of this, it really seems like it
ought to reduce the overhead to almost zero (since it most likely is
hidden in the pipeline.)

I would like to suggest defining it inline if
CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE is set; I really don't
care about an architecture which doesn't have it.

Note that the upcoming 5-level paging (LA57) support will make
TASK_SIZE_MAX dependent on the CPU.  The best way to handle that is
probably to tag this immediate with an assembly alternative rather than
loading it from a memory variable (most likely taking a cache hit.)

	-hpa

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-22 20:21                           ` H. Peter Anvin
  0 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-22 20:21 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav Kinsburskiy,
	Ingo Molnar, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf,
	linux-s390, LKML, Linux API, the arch/x86 maintainers,
	linux-arm-kernel, Kernel Hardening

On 03/22/17 12:15, Thomas Garnier wrote:
> On Wed, Mar 15, 2017 at 10:43 AM, Thomas Garnier <thgarnie@google.com> wrote:
>> Thanks for the feedback. I will look into inlining by default (looking
>> at code size on different arch), the updated patch for x86 in the
>> meantime:
> 
> I did couple checks and it doesn't seem worth it. I will send a v4
> with the change below for additional feedback.

Can you specify what that means?

On x86, where there is only one caller of this, it really seems like it
ought to reduce the overhead to almost zero (since it most likely is
hidden in the pipeline.)

I would like to suggest defining it inline if
CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE is set; I really don't
care about an architecture which doesn't have it.

Note that the upcoming 5-level paging (LA57) support will make
TASK_SIZE_MAX dependent on the CPU.  The best way to handle that is
probably to tag this immediate with an assembly alternative rather than
loading it from a memory variable (most likely taking a cache hit.)

	-hpa

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-22 20:21                           ` [kernel-hardening] " H. Peter Anvin
@ 2017-03-22 20:41                             ` Thomas Garnier
  -1 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-22 20:41 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner

On Wed, Mar 22, 2017 at 1:21 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 03/22/17 12:15, Thomas Garnier wrote:
>> On Wed, Mar 15, 2017 at 10:43 AM, Thomas Garnier <thgarnie@google.com> wrote:
>>> Thanks for the feedback. I will look into inlining by default (looking
>>> at code size on different arch), the updated patch for x86 in the
>>> meantime:
>>
>> I did couple checks and it doesn't seem worth it. I will send a v4
>> with the change below for additional feedback.
>
> Can you specify what that means?

If I set inline by default, the compiler chose not to inline it on
x86. If I force inline the size impact was actually bigger (without
the architecture specific code).

>
> On x86, where there is only one caller of this, it really seems like it
> ought to reduce the overhead to almost zero (since it most likely is
> hidden in the pipeline.)
>
> I would like to suggest defining it inline if
> CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE is set; I really don't
> care about an architecture which doesn't have it.

But if there is only one caller, does the compiler is not suppose to
inline the function based on options?

The assembly will call it too, so I would need an inline and a
non-inline based on the caller.

>
> Note that the upcoming 5-level paging (LA57) support will make
> TASK_SIZE_MAX dependent on the CPU.  The best way to handle that is
> probably to tag this immediate with an assembly alternative rather than
> loading it from a memory variable (most likely taking a cache hit.)
>
>         -hpa
>



-- 
Thomas

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-22 20:41                             ` Thomas Garnier
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-22 20:41 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav Kinsburskiy,
	Ingo Molnar, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf,
	linux-s390, LKML, Linux API, the arch/x86 maintainers,
	linux-arm-kernel, Kernel Hardening

On Wed, Mar 22, 2017 at 1:21 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 03/22/17 12:15, Thomas Garnier wrote:
>> On Wed, Mar 15, 2017 at 10:43 AM, Thomas Garnier <thgarnie@google.com> wrote:
>>> Thanks for the feedback. I will look into inlining by default (looking
>>> at code size on different arch), the updated patch for x86 in the
>>> meantime:
>>
>> I did couple checks and it doesn't seem worth it. I will send a v4
>> with the change below for additional feedback.
>
> Can you specify what that means?

If I set inline by default, the compiler chose not to inline it on
x86. If I force inline the size impact was actually bigger (without
the architecture specific code).

>
> On x86, where there is only one caller of this, it really seems like it
> ought to reduce the overhead to almost zero (since it most likely is
> hidden in the pipeline.)
>
> I would like to suggest defining it inline if
> CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE is set; I really don't
> care about an architecture which doesn't have it.

But if there is only one caller, does the compiler is not suppose to
inline the function based on options?

The assembly will call it too, so I would need an inline and a
non-inline based on the caller.

>
> Note that the upcoming 5-level paging (LA57) support will make
> TASK_SIZE_MAX dependent on the CPU.  The best way to handle that is
> probably to tag this immediate with an assembly alternative rather than
> loading it from a memory variable (most likely taking a cache hit.)
>
>         -hpa
>



-- 
Thomas

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-22 20:41                             ` [kernel-hardening] " Thomas Garnier
@ 2017-03-22 20:49                               ` H. Peter Anvin
  -1 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-22 20:49 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller

On 03/22/17 13:41, Thomas Garnier wrote:
>>> with the change below for additional feedback.
>>
>> Can you specify what that means?
> 
> If I set inline by default, the compiler chose not to inline it on
> x86. If I force inline the size impact was actually bigger (without
> the architecture specific code).
> 

That's utterly bizarre.  Something strange is going on there.  I suspect
the right thing to do is to out-of-line the error case only, but even
that seems strange.  It should be something like four instructions inline.

>>
>> On x86, where there is only one caller of this, it really seems like it
>> ought to reduce the overhead to almost zero (since it most likely is
>> hidden in the pipeline.)
>>
>> I would like to suggest defining it inline if
>> CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE is set; I really don't
>> care about an architecture which doesn't have it.
> 
> But if there is only one caller, does the compiler is not suppose to
> inline the function based on options?

If it is marked static in the same file, yes, but you have it in a
different file from what I can tell.

> The assembly will call it too, so I would need an inline and a
> non-inline based on the caller.

Where?  I don't see that anywhere, at least for x86.

	-hpa

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-22 20:49                               ` H. Peter Anvin
  0 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-22 20:49 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav Kinsburskiy,
	Ingo Molnar, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf,
	linux-s390, LKML, Linux API, the arch/x86 maintainers,
	linux-arm-kernel, Kernel Hardening

On 03/22/17 13:41, Thomas Garnier wrote:
>>> with the change below for additional feedback.
>>
>> Can you specify what that means?
> 
> If I set inline by default, the compiler chose not to inline it on
> x86. If I force inline the size impact was actually bigger (without
> the architecture specific code).
> 

That's utterly bizarre.  Something strange is going on there.  I suspect
the right thing to do is to out-of-line the error case only, but even
that seems strange.  It should be something like four instructions inline.

>>
>> On x86, where there is only one caller of this, it really seems like it
>> ought to reduce the overhead to almost zero (since it most likely is
>> hidden in the pipeline.)
>>
>> I would like to suggest defining it inline if
>> CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE is set; I really don't
>> care about an architecture which doesn't have it.
> 
> But if there is only one caller, does the compiler is not suppose to
> inline the function based on options?

If it is marked static in the same file, yes, but you have it in a
different file from what I can tell.

> The assembly will call it too, so I would need an inline and a
> non-inline based on the caller.

Where?  I don't see that anywhere, at least for x86.

	-hpa

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-22 20:49                               ` [kernel-hardening] " H. Peter Anvin
@ 2017-03-22 21:11                                 ` Thomas Garnier
  -1 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-22 21:11 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner

On Wed, Mar 22, 2017 at 1:49 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 03/22/17 13:41, Thomas Garnier wrote:
>>>> with the change below for additional feedback.
>>>
>>> Can you specify what that means?
>>
>> If I set inline by default, the compiler chose not to inline it on
>> x86. If I force inline the size impact was actually bigger (without
>> the architecture specific code).
>>
>
> That's utterly bizarre.  Something strange is going on there.  I suspect
> the right thing to do is to out-of-line the error case only, but even
> that seems strange.  It should be something like four instructions inline.
>

The compiler seemed to often inline other functions called by the
syscall handlers. I assume the growth was due to changes in code
optimization because the function is much larger at the end.

>>>
>>> On x86, where there is only one caller of this, it really seems like it
>>> ought to reduce the overhead to almost zero (since it most likely is
>>> hidden in the pipeline.)
>>>
>>> I would like to suggest defining it inline if
>>> CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE is set; I really don't
>>> care about an architecture which doesn't have it.
>>
>> But if there is only one caller, does the compiler is not suppose to
>> inline the function based on options?
>
> If it is marked static in the same file, yes, but you have it in a
> different file from what I can tell.

If we do global optimization, it should. Having it as a static inline
make it easier on all types of builds.

>
>> The assembly will call it too, so I would need an inline and a
>> non-inline based on the caller.
>
> Where?  I don't see that anywhere, at least for x86.

After the latest changes on x86, yes. On arm/arm64, we call it with
the CHECK_DATA_CORRUPTION config.

>
>         -hpa
>



-- 
Thomas

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [kernel-hardening] Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
@ 2017-03-22 21:11                                 ` Thomas Garnier
  0 siblings, 0 replies; 46+ messages in thread
From: Thomas Garnier @ 2017-03-22 21:11 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic Weisbecker, Stanislav Kinsburskiy,
	Ingo Molnar, Paolo Bonzini, Dmitry Safonov, Borislav Petkov,
	Josh Poimboeuf, Brian Gerst, Jan Beulich, Christian Borntraeger,
	Fenghua Yu, He Chen, Russell King, Vladimir Murzin, Will Deacon,
	Catalin Marinas, Mark Rutland, James Morse, David A . Long,
	Pratyush Anand, Laura Abbott, Andre Przywara, Chris Metcalf,
	linux-s390, LKML, Linux API, the arch/x86 maintainers,
	linux-arm-kernel, Kernel Hardening

On Wed, Mar 22, 2017 at 1:49 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 03/22/17 13:41, Thomas Garnier wrote:
>>>> with the change below for additional feedback.
>>>
>>> Can you specify what that means?
>>
>> If I set inline by default, the compiler chose not to inline it on
>> x86. If I force inline the size impact was actually bigger (without
>> the architecture specific code).
>>
>
> That's utterly bizarre.  Something strange is going on there.  I suspect
> the right thing to do is to out-of-line the error case only, but even
> that seems strange.  It should be something like four instructions inline.
>

The compiler seemed to often inline other functions called by the
syscall handlers. I assume the growth was due to changes in code
optimization because the function is much larger at the end.

>>>
>>> On x86, where there is only one caller of this, it really seems like it
>>> ought to reduce the overhead to almost zero (since it most likely is
>>> hidden in the pipeline.)
>>>
>>> I would like to suggest defining it inline if
>>> CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE is set; I really don't
>>> care about an architecture which doesn't have it.
>>
>> But if there is only one caller, does the compiler is not suppose to
>> inline the function based on options?
>
> If it is marked static in the same file, yes, but you have it in a
> different file from what I can tell.

If we do global optimization, it should. Having it as a static inline
make it easier on all types of builds.

>
>> The assembly will call it too, so I would need an inline and a
>> non-inline based on the caller.
>
> Where?  I don't see that anywhere, at least for x86.

After the latest changes on x86, yes. On arm/arm64, we call it with
the CHECK_DATA_CORRUPTION config.

>
>         -hpa
>



-- 
Thomas

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
  2017-03-22 21:11                                 ` [kernel-hardening] " Thomas Garnier
  (?)
  (?)
@ 2017-03-23 19:14                                 ` H. Peter Anvin
  -1 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-23 19:14 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky, Helge Deller, Rik van Riel, John Stultz,
	Thomas Gleixner, Oleg Nesterov, Stephen Smalley,
	Pavel Tikhomirov, Frederic

Weisbecker <fweisbec@gmail.com>,Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>,Ingo Molnar <mingo@redhat.com>,Paolo Bonzini <pbonzini@redhat.com>,Dmitry Safonov <dsafonov@virtuozzo.com>,Borislav Petkov <bp@alien8.de>,Josh Poimboeuf <jpoimboe@redhat.com>,Brian Gerst <brgerst@gmail.com>,Jan Beulich <JBeulich@suse.com>,Christian Borntraeger <borntraeger@de.ibm.com>,Fenghua Yu <fenghua.yu@intel.com>,He Chen <he.chen@linux.intel.com>,Russell King <linux@armlinux.org.uk>,Vladimir Murzin <vladimir.murzin@arm.com>,Will Deacon <will.deacon@arm.com>,Catalin Marinas <catalin.marinas@arm.com>,Mark Rutland <mark.rutland@arm.com>,James Morse <james.morse@arm.com>,"David A . Long" <dave.long@linaro.org>,Pratyush Anand <panand@redhat.com>,Laura Abbott <labbott@redhat.com>,Andre Przywara <andre.przywara@arm.com>,Chris Metcalf <cmetcalf@mellanox.com>,linux-s390 <linux-s390@vger.kernel.org>,LKML <linux-kernel@vger.kernel.org>,Linux API <linux-api@vger.kernel.org>,the arch/x86 maintainers
<x86@kernel.org>,"linux-arm-kernel@lists.infradead.org" <linux-arm-kernel@lists.infradead.org>,Kernel Hardening <kernel-hardening@lists.openwall.com>
From: hpa@zytor.com
Message-ID: <E0C934E7-6789-4598-AF55-468C84B8568B@zytor.com>

On March 22, 2017 2:11:12 PM PDT, Thomas Garnier <thgarnie@google.com> wrote:
>On Wed, Mar 22, 2017 at 1:49 PM, H. Peter Anvin <hpa@zytor.com> wrote:
>> On 03/22/17 13:41, Thomas Garnier wrote:
>>>>> with the change below for additional feedback.
>>>>
>>>> Can you specify what that means?
>>>
>>> If I set inline by default, the compiler chose not to inline it on
>>> x86. If I force inline the size impact was actually bigger (without
>>> the architecture specific code).
>>>
>>
>> That's utterly bizarre.  Something strange is going on there.  I
>suspect
>> the right thing to do is to out-of-line the error case only, but even
>> that seems strange.  It should be something like four instructions
>inline.
>>
>
>The compiler seemed to often inline other functions called by the
>syscall handlers. I assume the growth was due to changes in code
>optimization because the function is much larger at the end.
>
>>>>
>>>> On x86, where there is only one caller of this, it really seems
>like it
>>>> ought to reduce the overhead to almost zero (since it most likely
>is
>>>> hidden in the pipeline.)
>>>>
>>>> I would like to suggest defining it inline if
>>>> CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE is set; I really
>don't
>>>> care about an architecture which doesn't have it.
>>>
>>> But if there is only one caller, does the compiler is not suppose to
>>> inline the function based on options?
>>
>> If it is marked static in the same file, yes, but you have it in a
>> different file from what I can tell.
>
>If we do global optimization, it should. Having it as a static inline
>make it easier on all types of builds.
>
>>
>>> The assembly will call it too, so I would need an inline and a
>>> non-inline based on the caller.
>>
>> Where?  I don't see that anywhere, at least for x86.
>
>After the latest changes on x86, yes. On arm/arm64, we call it with
>the CHECK_DATA_CORRUPTION config.
>
>>
>>         -hpa
>>

If we do global optimization, yes, but global optimization (generally called link-time optimization, LTO, on Linux) is very much the exception and not the rule for the Linux kernel at this time.
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state
       [not found]                                 ` <CAJcbSZEouZ2v+q_i-3Xiba2FNT18ipKwF09838vvfSCwEi7e4Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-03-23 19:14                                   ` H. Peter Anvin
  0 siblings, 0 replies; 46+ messages in thread
From: H. Peter Anvin @ 2017-03-23 19:14 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Andy Lutomirski, Ingo Molnar, Martin Schwidefsky, Heiko Carstens,
	David Howells, Arnd Bergmann, Al Viro, Dave Hansen,
	René Nyffenegger, Andrew Morton, Kees Cook,
	Paul E . McKenney, Andy Lutomirski, Ard Biesheuvel,
	Nicolas Pitre, Petr Mladek, Sebastian Andrzej Siewior,
	Sergey Senozhatsky

Weisbecker <fweisbec-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>,Stanislav Kinsburskiy <skinsbursky-5HdwGun5lf+gSpxsJD1C4w@public.gmane.org>,Ingo Molnar <mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,Paolo Bonzini <pbonzini-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,Dmitry Safonov <dsafonov-5HdwGun5lf+gSpxsJD1C4w@public.gmane.org>,Borislav Petkov <bp-Gina5bIWoIWzQB+pC5nmwQ@public.gmane.org>,Josh Poimboeuf <jpoimboe-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,Brian Gerst <brgerst-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>,Jan Beulich <JBeulich-IBi9RG/b67k@public.gmane.org>,Christian Borntraeger <borntraeger-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>,Fenghua Yu <fenghua.yu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>,He Chen <he.chen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>,Russell King <linux-I+IVW8TIWO2tmTQ+vhA3Yw@public.gmane.org>,Vladimir Murzin <vladimir.murzin-5wv7dgnIgG8@public.gmane.org>,Will Deacon <will.deacon-5wv7dgnIgG8@public.gmane.org>,Catalin Marinas <catalin.marinas-5wv7dgnIgG8@public.gmane.org>,Mark Rutland <mark.rutland-5wv7dgnIgG8@public.gmane.org>,James Morse <james.morse-5wv7dgnIgG8@public.gmane.org>,"David A . Long" <dave.long-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>,Pratyush Anand <panand-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,Laura Abbott <labbott-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,Andre Przywara <andre.przywara
 @arm.com>,Chris Metcalf <cmetcalf-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>,linux-s390 <linux-s390-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,LKML <linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,Linux API <linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,the arch/x86 maintainers
<x86-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>,"linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org" <linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org>,Kernel Hardening <kernel-hardening-ZwoEplunGu1jrUoiu81ncdBPR1lH4CV8@public.gmane.org>
From: hpa-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org
Message-ID: <E0C934E7-6789-4598-AF55-468C84B8568B-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org>

On March 22, 2017 2:11:12 PM PDT, Thomas Garnier <thgarnie-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
>On Wed, Mar 22, 2017 at 1:49 PM, H. Peter Anvin <hpa-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org> wrote:
>> On 03/22/17 13:41, Thomas Garnier wrote:
>>>>> with the change below for additional feedback.
>>>>
>>>> Can you specify what that means?
>>>
>>> If I set inline by default, the compiler chose not to inline it on
>>> x86. If I force inline the size impact was actually bigger (without
>>> the architecture specific code).
>>>
>>
>> That's utterly bizarre.  Something strange is going on there.  I
>suspect
>> the right thing to do is to out-of-line the error case only, but even
>> that seems strange.  It should be something like four instructions
>inline.
>>
>
>The compiler seemed to often inline other functions called by the
>syscall handlers. I assume the growth was due to changes in code
>optimization because the function is much larger at the end.
>
>>>>
>>>> On x86, where there is only one caller of this, it really seems
>like it
>>>> ought to reduce the overhead to almost zero (since it most likely
>is
>>>> hidden in the pipeline.)
>>>>
>>>> I would like to suggest defining it inline if
>>>> CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE is set; I really
>don't
>>>> care about an architecture which doesn't have it.
>>>
>>> But if there is only one caller, does the compiler is not suppose to
>>> inline the function based on options?
>>
>> If it is marked static in the same file, yes, but you have it in a
>> different file from what I can tell.
>
>If we do global optimization, it should. Having it as a static inline
>make it easier on all types of builds.
>
>>
>>> The assembly will call it too, so I would need an inline and a
>>> non-inline based on the caller.
>>
>> Where?  I don't see that anywhere, at least for x86.
>
>After the latest changes on x86, yes. On arm/arm64, we call it with
>the CHECK_DATA_CORRUPTION config.
>
>>
>>         -hpa
>>

If we do global optimization, yes, but global optimization (generally called link-time optimization, LTO, on Linux) is very much the exception and not the rule for the Linux kernel at this time.
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 46+ messages in thread

end of thread, other threads:[~2017-03-23 19:39 UTC | newest]

Thread overview: 46+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-11  0:04 [PATCH v3 1/4] syscalls: Restore address limit after a syscall Thomas Garnier
2017-03-11  0:04 ` [kernel-hardening] " Thomas Garnier
2017-03-11  0:04 ` [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state Thomas Garnier
2017-03-11  0:04   ` [kernel-hardening] " Thomas Garnier
2017-03-11  9:42   ` Ingo Molnar
2017-03-11  9:42     ` [kernel-hardening] " Ingo Molnar
2017-03-13 15:53     ` Thomas Garnier
2017-03-13 15:53       ` [kernel-hardening] " Thomas Garnier
2017-03-13 21:48     ` H. Peter Anvin
2017-03-13 21:48       ` H. Peter Anvin
2017-03-14  0:04     ` H. Peter Anvin
2017-03-14  0:04       ` [kernel-hardening] " H. Peter Anvin
2017-03-14  9:40       ` H. Peter Anvin
2017-03-14  9:40         ` [kernel-hardening] " H. Peter Anvin
2017-03-14 15:17         ` Thomas Garnier
2017-03-14 15:17           ` [kernel-hardening] " Thomas Garnier
2017-03-14 15:39           ` Andy Lutomirski
2017-03-14 15:39             ` [kernel-hardening] " Andy Lutomirski
2017-03-14 16:29             ` Thomas Garnier
2017-03-14 16:29               ` [kernel-hardening] " Thomas Garnier
2017-03-14 16:44               ` H. Peter Anvin
2017-03-14 16:44                 ` [kernel-hardening] " H. Peter Anvin
2017-03-14 16:51                 ` Thomas Garnier
2017-03-14 16:51                   ` [kernel-hardening] " Thomas Garnier
2017-03-14 17:53                   ` H. Peter Anvin
2017-03-14 17:53                     ` [kernel-hardening] " H. Peter Anvin
2017-03-15 17:43                     ` Thomas Garnier
2017-03-15 17:43                       ` [kernel-hardening] " Thomas Garnier
2017-03-22 19:15                       ` Thomas Garnier
2017-03-22 19:15                         ` [kernel-hardening] " Thomas Garnier
2017-03-22 20:21                         ` H. Peter Anvin
2017-03-22 20:21                           ` [kernel-hardening] " H. Peter Anvin
2017-03-22 20:41                           ` Thomas Garnier
2017-03-22 20:41                             ` [kernel-hardening] " Thomas Garnier
2017-03-22 20:49                             ` H. Peter Anvin
2017-03-22 20:49                               ` [kernel-hardening] " H. Peter Anvin
2017-03-22 21:11                               ` Thomas Garnier
2017-03-22 21:11                                 ` [kernel-hardening] " Thomas Garnier
     [not found]                                 ` <CAJcbSZEouZ2v+q_i-3Xiba2FNT18ipKwF09838vvfSCwEi7e4Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-03-23 19:14                                   ` H. Peter Anvin
2017-03-23 19:14                                 ` H. Peter Anvin
2017-03-14 16:30             ` H. Peter Anvin
2017-03-14 16:30               ` [kernel-hardening] " H. Peter Anvin
2017-03-11  0:05 ` [PATCH v3 3/4] arm/syscalls: " Thomas Garnier
2017-03-11  0:05   ` [kernel-hardening] " Thomas Garnier
2017-03-11  0:05 ` [PATCH v3 4/4] arm64/syscalls: " Thomas Garnier
2017-03-11  0:05   ` [kernel-hardening] " Thomas Garnier

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.