Linux-Integrity Archive on lore.kernel.org
 help / Atom feed
* [PATCH 01/12] x86_64: memset_user()
       [not found] <20181221181423.20455-1-igor.stoppa@huawei.com>
@ 2018-12-21 18:14 ` Igor Stoppa
  2018-12-21 18:25   ` Matthew Wilcox
  2018-12-21 18:14 ` [PATCH 02/12] __wr_after_init: linker section and label Igor Stoppa
                   ` (10 subsequent siblings)
  11 siblings, 1 reply; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 18:14 UTC (permalink / raw)
  To: Andy Lutomirski, Matthew Wilcox, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann
  Cc: igor.stoppa, Nadav Amit, Kees Cook, Ahmed Soliman,
	linux-integrity, kernel-hardening, linux-mm, linux-kernel

Create x86_64 specific version of memset for user space, based on
clear_user().
This will be used for implementing wr_memset() in the __wr_after_init
scenario, where write-rare variables have an alternate mapping for
writing.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>

CC: Andy Lutomirski <luto@amacapital.net>
CC: Nadav Amit <nadav.amit@gmail.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Kees Cook <keescook@chromium.org>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: Mimi Zohar <zohar@linux.vnet.ibm.com>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
CC: Ahmed Soliman <ahmedsoliman@mena.vt.edu>
CC: linux-integrity@vger.kernel.org
CC: kernel-hardening@lists.openwall.com
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 arch/x86/include/asm/uaccess_64.h |  6 ++++
 arch/x86/lib/usercopy_64.c        | 54 +++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+)

diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index a9d637bc301d..f194bfce4866 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -213,4 +213,10 @@ copy_user_handle_tail(char *to, char *from, unsigned len);
 unsigned long
 mcsafe_handle_tail(char *to, char *from, unsigned len);
 
+unsigned long __must_check
+memset_user(void __user *mem, int c, unsigned long len);
+
+unsigned long __must_check
+__memset_user(void __user *mem, int c, unsigned long len);
+
 #endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 1bd837cdc4b1..84f8f8a20b30 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -9,6 +9,60 @@
 #include <linux/uaccess.h>
 #include <linux/highmem.h>
 
+/*
+ * Memset Userspace
+ */
+
+unsigned long __memset_user(void __user *addr, int c, unsigned long size)
+{
+	long __d0;
+	unsigned long  pattern = 0;
+	int i;
+
+	for (i = 0; i < 8; i++)
+		pattern = (pattern << 8) | (0xFF & c);
+	might_fault();
+	/* no memory constraint: gcc doesn't know about this memory */
+	stac();
+	asm volatile(
+		"       movq %[val], %%rdx\n"
+		"	testq  %[size8],%[size8]\n"
+		"	jz     4f\n"
+		"0:	mov %%rdx,(%[dst])\n"
+		"	addq   $8,%[dst]\n"
+		"	decl %%ecx ; jnz   0b\n"
+		"4:	movq  %[size1],%%rcx\n"
+		"	testl %%ecx,%%ecx\n"
+		"	jz     2f\n"
+		"1:	movb   %%dl,(%[dst])\n"
+		"	incq   %[dst]\n"
+		"	decl %%ecx ; jnz  1b\n"
+		"2:\n"
+		".section .fixup,\"ax\"\n"
+		"3:	lea 0(%[size1],%[size8],8),%[size8]\n"
+		"	jmp 2b\n"
+		".previous\n"
+		_ASM_EXTABLE_UA(0b, 3b)
+		_ASM_EXTABLE_UA(1b, 2b)
+		: [size8] "=&c"(size), [dst] "=&D" (__d0)
+		: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
+		  [val] "ri"(pattern)
+		: "rdx");
+
+	clac();
+	return size;
+}
+EXPORT_SYMBOL(__memset_user);
+
+unsigned long memset_user(void __user *to, int c, unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		return __memset_user(to, c, n);
+	return n;
+}
+EXPORT_SYMBOL(memset_user);
+
+
 /*
  * Zero Userspace
  */
-- 
2.19.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 02/12] __wr_after_init: linker section and label
       [not found] <20181221181423.20455-1-igor.stoppa@huawei.com>
  2018-12-21 18:14 ` [PATCH 01/12] x86_64: memset_user() Igor Stoppa
@ 2018-12-21 18:14 ` Igor Stoppa
  2018-12-21 18:14 ` [PATCH 03/12] __wr_after_init: generic functionality Igor Stoppa
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 18:14 UTC (permalink / raw)
  To: Andy Lutomirski, Matthew Wilcox, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann
  Cc: igor.stoppa, Nadav Amit, Kees Cook, Ahmed Soliman,
	linux-integrity, kernel-hardening, linux-mm, linux-kernel

Introduce a section and a label for statically allocated write rare
data. The label is named "__wr_after_init".
As the name implies, after the init phase is completed, this section
will be modifiable only by invoking write rare functions.
The section must take up a set of full pages.

To activate both section and label, the arch must set CONFIG_ARCH_HAS_PRMEM

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>

CC: Andy Lutomirski <luto@amacapital.net>
CC: Nadav Amit <nadav.amit@gmail.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Kees Cook <keescook@chromium.org>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: Mimi Zohar <zohar@linux.vnet.ibm.com>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
CC: Ahmed Soliman <ahmedsoliman@mena.vt.edu>
CC: linux-integrity@vger.kernel.org
CC: kernel-hardening@lists.openwall.com
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 arch/Kconfig                      | 15 +++++++++++++++
 include/asm-generic/vmlinux.lds.h | 25 +++++++++++++++++++++++++
 include/linux/cache.h             | 21 +++++++++++++++++++++
 init/main.c                       |  2 ++
 4 files changed, 63 insertions(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index e1e540ffa979..8668ffec8098 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -802,6 +802,21 @@ config VMAP_STACK
 	  the stack to map directly to the KASAN shadow map using a formula
 	  that is incorrect if the stack is in vmalloc space.
 
+config ARCH_HAS_PRMEM
+	def_bool n
+	help
+	  architecture specific symbol stating that the architecture provides
+	  a back-end function for the write rare operation.
+
+config PRMEM
+	bool "Write protect critical data that doesn't need high write speed."
+	depends on ARCH_HAS_PRMEM
+	default y
+	help
+	  If the architecture supports it, statically allocated data which
+	  has been selected for hardening becomes (mostly) read-only.
+	  The selection happens by labelling the data "__wr_after_init".
+
 config ARCH_OPTIONAL_KERNEL_RWX
 	def_bool n
 
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 3d7a6a9c2370..ddb1fd608490 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -311,6 +311,30 @@
 	KEEP(*(__jump_table))						\
 	__stop___jump_table = .;
 
+/*
+ * Allow architectures to handle wr_after_init data on their
+ * own by defining an empty WR_AFTER_INIT_DATA.
+ * However, it's important that pages containing WR_RARE data do not
+ * hold anything else, to avoid both accidentally unprotecting something
+ * that is supposed to stay read-only all the time and also to protect
+ * something else that is supposed to be writeable all the time.
+ */
+#ifndef WR_AFTER_INIT_DATA
+#ifdef CONFIG_PRMEM
+#define WR_AFTER_INIT_DATA(align)					\
+	. = ALIGN(PAGE_SIZE);						\
+	__start_wr_after_init = .;					\
+	. = ALIGN(align);						\
+	*(.data..wr_after_init)						\
+	. = ALIGN(PAGE_SIZE);						\
+	__end_wr_after_init = .;					\
+	. = ALIGN(align);
+#else
+#define WR_AFTER_INIT_DATA(align)					\
+	. = ALIGN(align);
+#endif
+#endif
+
 /*
  * Allow architectures to handle ro_after_init data on their
  * own by defining an empty RO_AFTER_INIT_DATA.
@@ -332,6 +356,7 @@
 		__start_rodata = .;					\
 		*(.rodata) *(.rodata.*)					\
 		RO_AFTER_INIT_DATA	/* Read only after init */	\
+		WR_AFTER_INIT_DATA(align) /* wr after init */	\
 		KEEP(*(__vermagic))	/* Kernel version magic */	\
 		. = ALIGN(8);						\
 		__start___tracepoints_ptrs = .;				\
diff --git a/include/linux/cache.h b/include/linux/cache.h
index 750621e41d1c..09bd0b9284b6 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -31,6 +31,27 @@
 #define __ro_after_init __attribute__((__section__(".data..ro_after_init")))
 #endif
 
+/*
+ * __wr_after_init is used to mark objects that cannot be modified
+ * directly after init (i.e. after mark_rodata_ro() has been called).
+ * These objects become effectively read-only, from the perspective of
+ * performing a direct write, like a variable assignment.
+ * However, they can be altered through a dedicated function.
+ * It is intended for those objects which are occasionally modified after
+ * init, however they are modified so seldomly, that the extra cost from
+ * the indirect modification is either negligible or worth paying, for the
+ * sake of the protection gained.
+ */
+#ifndef __wr_after_init
+#ifdef CONFIG_PRMEM
+#define __wr_after_init \
+		__attribute__((__section__(".data..wr_after_init")))
+#else
+#define __wr_after_init
+#endif
+#endif
+
+
 #ifndef ____cacheline_aligned
 #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
 #endif
diff --git a/init/main.c b/init/main.c
index a461150adfb1..a36f2e54f937 100644
--- a/init/main.c
+++ b/init/main.c
@@ -498,6 +498,7 @@ void __init __weak thread_stack_cache_init(void)
 void __init __weak mem_encrypt_init(void) { }
 
 void __init __weak poking_init(void) { }
+void __init __weak wr_poking_init(void) { }
 
 bool initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
@@ -734,6 +735,7 @@ asmlinkage __visible void __init start_kernel(void)
 	delayacct_init();
 
 	poking_init();
+	wr_poking_init();
 	check_bugs();
 
 	acpi_subsystem_init();
-- 
2.19.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 03/12] __wr_after_init: generic functionality
       [not found] <20181221181423.20455-1-igor.stoppa@huawei.com>
  2018-12-21 18:14 ` [PATCH 01/12] x86_64: memset_user() Igor Stoppa
  2018-12-21 18:14 ` [PATCH 02/12] __wr_after_init: linker section and label Igor Stoppa
@ 2018-12-21 18:14 ` Igor Stoppa
  2018-12-21 18:41   ` Matthew Wilcox
  2018-12-21 18:14 ` [PATCH 04/12] __wr_after_init: debug writes Igor Stoppa
                   ` (8 subsequent siblings)
  11 siblings, 1 reply; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 18:14 UTC (permalink / raw)
  To: Andy Lutomirski, Matthew Wilcox, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann
  Cc: igor.stoppa, Nadav Amit, Kees Cook, Ahmed Soliman,
	linux-integrity, kernel-hardening, linux-mm, linux-kernel

The patch provides:
- the generic part of the write rare functionality for static data,
  based on code from Matthew Wilcox
- the dummy functionality, in case an arch doesn't support write rare or
  the functionality is disabled

The basic functions are:
- wr_memset(): write rare counterpart of memset()
- wr_memcpy(): write rare counterpart of memcpy()
- wr_assign(): write rare counterpart of the assignment ('=') operator
- wr_rcu_assign_pointer(): write rare counterpart of rcu_assign_pointer()

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>

CC: Andy Lutomirski <luto@amacapital.net>
CC: Nadav Amit <nadav.amit@gmail.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Kees Cook <keescook@chromium.org>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: Mimi Zohar <zohar@linux.vnet.ibm.com>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
CC: Ahmed Soliman <ahmedsoliman@mena.vt.edu>
CC: linux-integrity@vger.kernel.org
CC: kernel-hardening@lists.openwall.com
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 include/linux/prmem.h | 106 ++++++++++++++++++++++++++++++++++++++++++
 mm/Makefile           |   1 +
 mm/prmem.c            |  97 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 204 insertions(+)
 create mode 100644 include/linux/prmem.h
 create mode 100644 mm/prmem.c

diff --git a/include/linux/prmem.h b/include/linux/prmem.h
new file mode 100644
index 000000000000..12c1d0d1cb78
--- /dev/null
+++ b/include/linux/prmem.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * prmem.h: Header for memory protection library
+ *
+ * (C) Copyright 2018 Huawei Technologies Co. Ltd.
+ * Author: Igor Stoppa <igor.stoppa@huawei.com>
+ *
+ * Support for:
+ * - statically allocated write rare data
+ */
+
+#ifndef _LINUX_PRMEM_H
+#define _LINUX_PRMEM_H
+
+#include <linux/set_memory.h>
+#include <linux/mutex.h>
+#include <linux/compiler.h>
+
+
+/**
+ * memtst() - test len bytes starting at p to match the c value
+ * @p: beginning of the memory to test
+ * @c: byte to compare against
+ * @len: amount of bytes to test
+ *
+ * Returns 0 on success, non-zero otherwise.
+ */
+static inline int memtst(void *p, int c, __kernel_size_t len)
+{
+	__kernel_size_t i;
+
+	for (i = 0; i < len; i++) {
+		u8 d =  *(i + (u8 *)p) - (u8)c;
+
+		if (unlikely(d))
+			return d;
+	}
+	return 0;
+}
+
+
+#ifndef CONFIG_PRMEM
+
+static inline void *wr_memset(void *p, int c, __kernel_size_t len)
+{
+	return memset(p, c, len);
+}
+
+static inline void *wr_memcpy(void *p, const void *q, __kernel_size_t size)
+{
+	return memcpy(p, q, size);
+}
+
+#define wr_assign(var, val)	((var) = (val))
+#define wr_rcu_assign_pointer(p, v)	rcu_assign_pointer(p, v)
+
+#else
+
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+#include <asm/prmem.h>
+
+void *wr_memset(void *p, int c, __kernel_size_t len);
+void *wr_memcpy(void *p, const void *q, __kernel_size_t size);
+
+/**
+ * wr_assign() - sets a write-rare variable to a specified value
+ * @var: the variable to set
+ * @val: the new value
+ *
+ * Returns: the variable
+ *
+ * Note: it might be possible to optimize this, to use wr_memset in some
+ * cases (maybe with NULL?).
+ */
+
+#define wr_assign(var, val) ({			\
+	typeof(var) tmp = (typeof(var))val;	\
+						\
+	wr_memcpy(&var, &tmp, sizeof(var));	\
+	var;					\
+})
+
+/**
+ * wr_rcu_assign_pointer() - initialize a pointer in rcu mode
+ * @p: the rcu pointer - it MUST be aligned to a machine word
+ * @v: the new value
+ *
+ * Returns the value assigned to the rcu pointer.
+ *
+ * It is provided as macro, to match rcu_assign_pointer()
+ * The rcu_assign_pointer() is implemented as equivalent of:
+ *
+ * smp_mb();
+ * WRITE_ONCE();
+ */
+#define wr_rcu_assign_pointer(p, v) ({	\
+	smp_mb();			\
+	wr_assign(p, v);		\
+	p;				\
+})
+#endif
+#endif
diff --git a/mm/Makefile b/mm/Makefile
index d210cc9d6f80..ef3867c16ce0 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -58,6 +58,7 @@ obj-$(CONFIG_SPARSEMEM)	+= sparse.o
 obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
+obj-$(CONFIG_PRMEM) += prmem.o
 obj-$(CONFIG_KSM) += ksm.o
 obj-$(CONFIG_PAGE_POISONING) += page_poison.o
 obj-$(CONFIG_SLAB) += slab.o
diff --git a/mm/prmem.c b/mm/prmem.c
new file mode 100644
index 000000000000..e1c1be3a1171
--- /dev/null
+++ b/mm/prmem.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * prmem.c: Memory Protection Library
+ *
+ * (C) Copyright 2017-2018 Huawei Technologies Co. Ltd.
+ * Author: Igor Stoppa <igor.stoppa@huawei.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/compiler.h>
+#include <linux/slab.h>
+#include <linux/mmu_context.h>
+#include <linux/rcupdate.h>
+#include <linux/prmem.h>
+
+__ro_after_init bool wr_ready;
+
+/*
+ * The following two variables are statically allocated by the linker
+ * script at the the boundaries of the memory region (rounded up to
+ * multiples of PAGE_SIZE) reserved for __wr_after_init.
+ */
+extern long __start_wr_after_init;
+extern long __end_wr_after_init;
+static unsigned long start = (unsigned long)&__start_wr_after_init;
+static unsigned long end = (unsigned long)&__end_wr_after_init;
+
+static inline bool is_wr_after_init(void *p, __kernel_size_t size)
+{
+	unsigned long low = (unsigned long)p;
+	unsigned long high = low + size;
+
+	return likely(start <= low && high <= end);
+}
+
+/**
+ * wr_memcpy() - copyes size bytes from q to p
+ * @p: beginning of the memory to write to
+ * @q: beginning of the memory to read from
+ * @size: amount of bytes to copy
+ *
+ * Returns pointer to the destination
+ *
+ * The architecture code must provide:
+ *   void __wr_enable(wr_state_t *state)
+ *   void *__wr_addr(void *addr)
+ *   void *__wr_memcpy(void *p, const void *q, __kernel_size_t size)
+ *   void __wr_disable(wr_state_t *state)
+ */
+void *wr_memcpy(void *p, const void *q, __kernel_size_t size)
+{
+	wr_state_t wr_state;
+	void *wr_poking_addr = __wr_addr(p);
+
+	if (WARN_ONCE(!wr_ready, "No writable mapping available") ||
+	    WARN_ONCE(!is_wr_after_init(p, size), "Invalid WR range."))
+		return p;
+
+	local_irq_disable();
+	__wr_enable(&wr_state);
+	__wr_memcpy(wr_poking_addr, q, size);
+	__wr_disable(&wr_state);
+	local_irq_enable();
+	return p;
+}
+
+/**
+ * wr_memset() - sets len bytes of the destination p to the c value
+ * @p: beginning of the memory to write to
+ * @c: byte to replicate
+ * @len: amount of bytes to copy
+ *
+ * Returns pointer to the destination
+ *
+ * The architecture code must provide:
+ *   void __wr_enable(wr_state_t *state)
+ *   void *__wr_addr(void *addr)
+ *   void *__wr_memset(void *p, int c, __kernel_size_t len)
+ *   void __wr_disable(wr_state_t *state)
+ */
+void *wr_memset(void *p, int c, __kernel_size_t len)
+{
+	wr_state_t wr_state;
+	void *wr_poking_addr = __wr_addr(p);
+
+	if (WARN_ONCE(!wr_ready, "No writable mapping available") ||
+	    WARN_ONCE(!is_wr_after_init(p, len), "Invalid WR range."))
+		return p;
+
+	local_irq_disable();
+	__wr_enable(&wr_state);
+	__wr_memset(wr_poking_addr, c, len);
+	__wr_disable(&wr_state);
+	local_irq_enable();
+	return p;
+}
-- 
2.19.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 04/12] __wr_after_init: debug writes
       [not found] <20181221181423.20455-1-igor.stoppa@huawei.com>
                   ` (2 preceding siblings ...)
  2018-12-21 18:14 ` [PATCH 03/12] __wr_after_init: generic functionality Igor Stoppa
@ 2018-12-21 18:14 ` Igor Stoppa
  2018-12-21 18:14 ` [PATCH 05/12] __wr_after_init: x86_64: __wr_op Igor Stoppa
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 18:14 UTC (permalink / raw)
  To: Andy Lutomirski, Matthew Wilcox, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann
  Cc: igor.stoppa, Nadav Amit, Kees Cook, Ahmed Soliman,
	linux-integrity, kernel-hardening, linux-mm, linux-kernel

After each write operation, confirm that it was successful, otherwise
generate a warning.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>

CC: Andy Lutomirski <luto@amacapital.net>
CC: Nadav Amit <nadav.amit@gmail.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Kees Cook <keescook@chromium.org>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: Mimi Zohar <zohar@linux.vnet.ibm.com>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
CC: Ahmed Soliman <ahmedsoliman@mena.vt.edu>
CC: linux-integrity@vger.kernel.org
CC: kernel-hardening@lists.openwall.com
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 mm/Kconfig.debug | 8 ++++++++
 mm/prmem.c       | 6 ++++++
 2 files changed, 14 insertions(+)

diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 9a7b8b049d04..b10305cfac3c 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -94,3 +94,11 @@ config DEBUG_RODATA_TEST
     depends on STRICT_KERNEL_RWX
     ---help---
       This option enables a testcase for the setting rodata read-only.
+
+config DEBUG_PRMEM
+    bool "Verify each write rare operation."
+    depends on PRMEM
+    default n
+    help
+      After any write rare operation, compares the data written with the
+      value provided by the caller.
diff --git a/mm/prmem.c b/mm/prmem.c
index e1c1be3a1171..51f6776e2515 100644
--- a/mm/prmem.c
+++ b/mm/prmem.c
@@ -61,6 +61,9 @@ void *wr_memcpy(void *p, const void *q, __kernel_size_t size)
 	__wr_enable(&wr_state);
 	__wr_memcpy(wr_poking_addr, q, size);
 	__wr_disable(&wr_state);
+#ifdef CONFIG_DEBUG_PRMEM
+	VM_WARN_ONCE(memcmp(p, q, size), "Failed %s()", __func__);
+#endif
 	local_irq_enable();
 	return p;
 }
@@ -92,6 +95,9 @@ void *wr_memset(void *p, int c, __kernel_size_t len)
 	__wr_enable(&wr_state);
 	__wr_memset(wr_poking_addr, c, len);
 	__wr_disable(&wr_state);
+#ifdef CONFIG_DEBUG_PRMEM
+	VM_WARN_ONCE(memtst(p, c, len), "Failed %s()", __func__);
+#endif
 	local_irq_enable();
 	return p;
 }
-- 
2.19.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 05/12] __wr_after_init: x86_64: __wr_op
       [not found] <20181221181423.20455-1-igor.stoppa@huawei.com>
                   ` (3 preceding siblings ...)
  2018-12-21 18:14 ` [PATCH 04/12] __wr_after_init: debug writes Igor Stoppa
@ 2018-12-21 18:14 ` Igor Stoppa
  2018-12-21 18:14 ` [PATCH 06/12] __wr_after_init: Documentation: self-protection Igor Stoppa
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 18:14 UTC (permalink / raw)
  To: Andy Lutomirski, Matthew Wilcox, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann
  Cc: igor.stoppa, Nadav Amit, Kees Cook, Ahmed Soliman,
	linux-integrity, kernel-hardening, linux-mm, linux-kernel

Architecture-specific implementation of the core write rare
operation.

The implementation is based on code from Andy Lutomirski and Nadav Amit
for patching the text on x86 [here goes reference to commits, once merged]

The modification of write protected data is done through an alternate
mapping of the same pages, as writable.
This mapping is persistent, but active only for a core that is
performing a write rare operation. And only for the duration of said
operation.
Local interrupts are disabled, while the alternate mapping is active.

In theory, it could introduce a non-predictable delay, in a preemptible
system, however the amount of data to be altered is likely to be far
smaller than a page.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>

CC: Andy Lutomirski <luto@amacapital.net>
CC: Nadav Amit <nadav.amit@gmail.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Kees Cook <keescook@chromium.org>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: Mimi Zohar <zohar@linux.vnet.ibm.com>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
CC: Ahmed Soliman <ahmedsoliman@mena.vt.edu>
CC: linux-integrity@vger.kernel.org
CC: kernel-hardening@lists.openwall.com
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 arch/x86/Kconfig             |  1 +
 arch/x86/include/asm/prmem.h | 72 ++++++++++++++++++++++++++++++++++++
 arch/x86/mm/Makefile         |  2 +
 arch/x86/mm/prmem.c          | 69 ++++++++++++++++++++++++++++++++++
 4 files changed, 144 insertions(+)
 create mode 100644 arch/x86/include/asm/prmem.h
 create mode 100644 arch/x86/mm/prmem.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8689e794a43c..e5e4fc4fa5c2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -32,6 +32,7 @@ config X86_64
 	select SWIOTLB
 	select X86_DEV_DMA_OPS
 	select ARCH_HAS_SYSCALL_WRAPPER
+	select ARCH_HAS_PRMEM
 
 #
 # Arch settings
diff --git a/arch/x86/include/asm/prmem.h b/arch/x86/include/asm/prmem.h
new file mode 100644
index 000000000000..e1f09f881351
--- /dev/null
+++ b/arch/x86/include/asm/prmem.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * prmem.h: Header for memory protection library
+ *
+ * (C) Copyright 2018 Huawei Technologies Co. Ltd.
+ * Author: Igor Stoppa <igor.stoppa@huawei.com>
+ *
+ * Support for:
+ * - statically allocated write rare data
+ */
+
+#ifndef _ASM_X86_PRMEM_H
+#define _ASM_X86_PRMEM_H
+
+#include <linux/set_memory.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/mmu_context.h>
+
+typedef temporary_mm_state_t wr_state_t;
+
+extern __ro_after_init struct mm_struct *wr_poking_mm;
+extern __ro_after_init unsigned long wr_poking_base;
+
+static inline void *__wr_addr(void *addr)
+{
+	return (void *)(wr_poking_base + (unsigned long)addr);
+}
+
+static inline void __wr_enable(wr_state_t *state)
+{
+	*state = use_temporary_mm(wr_poking_mm);
+}
+
+static inline void __wr_disable(wr_state_t *state)
+{
+	unuse_temporary_mm(*state);
+}
+
+
+/**
+ * __wr_memset() - sets len bytes of the destination p to the c value
+ * @p: beginning of the memory to write to
+ * @c: byte to replicate
+ * @len: amount of bytes to copy
+ *
+ * Returns pointer to the destination
+ */
+static inline void *__wr_memset(void *p, int c, __kernel_size_t len)
+{
+	return (void *)memset_user((void __user *)p, (u8)c, len);
+}
+
+/**
+ * __wr_memcpy() - copyes size bytes from q to p
+ * @p: beginning of the memory to write to
+ * @q: beginning of the memory to read from
+ * @size: amount of bytes to copy
+ *
+ * Returns pointer to the destination
+ */
+static inline void *__wr_memcpy(void *p, const void *q, __kernel_size_t size)
+{
+	return (void *)copy_to_user((void __user *)p, q, size);
+}
+
+#endif
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 4b101dd6e52f..66652de1e2c7 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -53,3 +53,5 @@ obj-$(CONFIG_PAGE_TABLE_ISOLATION)		+= pti.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt_identity.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt_boot.o
+
+obj-$(CONFIG_PRMEM)		+= prmem.o
diff --git a/arch/x86/mm/prmem.c b/arch/x86/mm/prmem.c
new file mode 100644
index 000000000000..f4b36baa2f19
--- /dev/null
+++ b/arch/x86/mm/prmem.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * prmem.c: Memory Protection Library
+ *
+ * (C) Copyright 2017-2018 Huawei Technologies Co. Ltd.
+ * Author: Igor Stoppa <igor.stoppa@huawei.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/compiler.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
+#include <linux/prmem.h>
+
+extern __ro_after_init bool wr_ready;
+__ro_after_init struct mm_struct *wr_poking_mm;
+__ro_after_init unsigned long wr_poking_base;
+
+/*
+ * The following two variables are statically allocated by the linker
+ * script at the the boundaries of the memory region (rounded up to
+ * multiples of PAGE_SIZE) reserved for __wr_after_init.
+ */
+extern long __start_wr_after_init;
+extern long __end_wr_after_init;
+
+struct mm_struct *copy_init_mm(void);
+void __init wr_poking_init(void)
+{
+	unsigned long start = (unsigned long)&__start_wr_after_init;
+	unsigned long end = (unsigned long)&__end_wr_after_init;
+	unsigned long i;
+
+	wr_poking_mm = copy_init_mm();
+	if (WARN_ONCE(!wr_poking_mm, "No alternate mapping available."))
+		return;
+
+	/*
+	 * Place 64TB of kernel address space within 128TB of user address
+	 * space, at a random page aligned offset.
+	 */
+	wr_poking_base = (((unsigned long)kaslr_get_random_long("WR Poke")) &
+			  PAGE_MASK) % (64 * _BITUL(40));
+
+	/* Create alternate mapping for the entire wr_after_init range. */
+	for (i = start; i < end; i += PAGE_SIZE) {
+		struct page *page;
+		spinlock_t *ptl;
+		pte_t pte;
+		pte_t *ptep;
+		unsigned long wr_poking_addr;
+
+		page = virt_to_page(i);
+		if (WARN_ONCE(!page, "WR memory without physical page"))
+			return;
+		wr_poking_addr = i + wr_poking_base;
+
+		/* The lock is not needed, but avoids open-coding. */
+		ptep = get_locked_pte(wr_poking_mm, wr_poking_addr, &ptl);
+		if (WARN_ONCE(!ptep, "No pte for writable mapping"))
+			return;
+
+		pte = mk_pte(page, PAGE_KERNEL);
+		set_pte_at(wr_poking_mm, wr_poking_addr, ptep, pte);
+		spin_unlock(ptl);
+	}
+	wr_ready = true;
+}
-- 
2.19.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 06/12] __wr_after_init: Documentation: self-protection
       [not found] <20181221181423.20455-1-igor.stoppa@huawei.com>
                   ` (4 preceding siblings ...)
  2018-12-21 18:14 ` [PATCH 05/12] __wr_after_init: x86_64: __wr_op Igor Stoppa
@ 2018-12-21 18:14 ` Igor Stoppa
  2018-12-21 18:14 ` [PATCH 07/12] __wr_after_init: lkdtm test Igor Stoppa
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 18:14 UTC (permalink / raw)
  To: Andy Lutomirski, Matthew Wilcox, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann
  Cc: igor.stoppa, Nadav Amit, Kees Cook, Ahmed Soliman,
	linux-integrity, kernel-hardening, linux-mm, linux-kernel

Update the self-protection documentation, to mention also the use of the
__wr_after_init attribute.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>

CC: Andy Lutomirski <luto@amacapital.net>
CC: Nadav Amit <nadav.amit@gmail.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Kees Cook <keescook@chromium.org>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: Mimi Zohar <zohar@linux.vnet.ibm.com>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
CC: Ahmed Soliman <ahmedsoliman@mena.vt.edu>
CC: linux-integrity@vger.kernel.org
CC: kernel-hardening@lists.openwall.com
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 Documentation/security/self-protection.rst | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/Documentation/security/self-protection.rst b/Documentation/security/self-protection.rst
index f584fb74b4ff..df2614bc25b9 100644
--- a/Documentation/security/self-protection.rst
+++ b/Documentation/security/self-protection.rst
@@ -84,12 +84,14 @@ For variables that are initialized once at ``__init`` time, these can
 be marked with the (new and under development) ``__ro_after_init``
 attribute.
 
-What remains are variables that are updated rarely (e.g. GDT). These
-will need another infrastructure (similar to the temporary exceptions
-made to kernel code mentioned above) that allow them to spend the rest
-of their lifetime read-only. (For example, when being updated, only the
-CPU thread performing the update would be given uninterruptible write
-access to the memory.)
+Others, which are statically allocated, but still need to be updated
+rarely, can be marked with the ``__wr_after_init`` attribute.
+
+The update mechanism must avoid exposing the data to rogue alterations
+during the update. For example, only the CPU thread performing the update
+would be given uninterruptible write access to the memory.
+
+Currently there is no protection available for data allocated dynamically.
 
 Segregation of kernel memory from userspace memory
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- 
2.19.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 07/12] __wr_after_init: lkdtm test
       [not found] <20181221181423.20455-1-igor.stoppa@huawei.com>
                   ` (5 preceding siblings ...)
  2018-12-21 18:14 ` [PATCH 06/12] __wr_after_init: Documentation: self-protection Igor Stoppa
@ 2018-12-21 18:14 ` Igor Stoppa
  2018-12-21 18:14 ` [PATCH 08/12] rodata_test: refactor tests Igor Stoppa
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 18:14 UTC (permalink / raw)
  To: Andy Lutomirski, Matthew Wilcox, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann
  Cc: igor.stoppa, Nadav Amit, Kees Cook, Ahmed Soliman,
	linux-integrity, kernel-hardening, linux-mm, linux-kernel

Verify that trying to modify a variable with the __wr_after_init
attribute will cause a crash.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>

CC: Andy Lutomirski <luto@amacapital.net>
CC: Nadav Amit <nadav.amit@gmail.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Kees Cook <keescook@chromium.org>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: Mimi Zohar <zohar@linux.vnet.ibm.com>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
CC: Ahmed Soliman <ahmedsoliman@mena.vt.edu>
CC: linux-integrity@vger.kernel.org
CC: kernel-hardening@lists.openwall.com
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 drivers/misc/lkdtm/core.c  |  3 +++
 drivers/misc/lkdtm/lkdtm.h |  3 +++
 drivers/misc/lkdtm/perms.c | 29 +++++++++++++++++++++++++++++
 3 files changed, 35 insertions(+)

diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 2837dc77478e..73c34b17c433 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -155,6 +155,9 @@ static const struct crashtype crashtypes[] = {
 	CRASHTYPE(ACCESS_USERSPACE),
 	CRASHTYPE(WRITE_RO),
 	CRASHTYPE(WRITE_RO_AFTER_INIT),
+#ifdef CONFIG_PRMEM
+	CRASHTYPE(WRITE_WR_AFTER_INIT),
+#endif
 	CRASHTYPE(WRITE_KERN),
 	CRASHTYPE(REFCOUNT_INC_OVERFLOW),
 	CRASHTYPE(REFCOUNT_ADD_OVERFLOW),
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 3c6fd327e166..abba2f52ffa6 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -38,6 +38,9 @@ void lkdtm_READ_BUDDY_AFTER_FREE(void);
 void __init lkdtm_perms_init(void);
 void lkdtm_WRITE_RO(void);
 void lkdtm_WRITE_RO_AFTER_INIT(void);
+#ifdef CONFIG_PRMEM
+void lkdtm_WRITE_WR_AFTER_INIT(void);
+#endif
 void lkdtm_WRITE_KERN(void);
 void lkdtm_EXEC_DATA(void);
 void lkdtm_EXEC_STACK(void);
diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index 53b85c9d16b8..f681730aa652 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -9,6 +9,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mman.h>
 #include <linux/uaccess.h>
+#include <linux/prmem.h>
 #include <asm/cacheflush.h>
 
 /* Whether or not to fill the target memory area with do_nothing(). */
@@ -27,6 +28,10 @@ static const unsigned long rodata = 0xAA55AA55;
 /* This is marked __ro_after_init, so it should ultimately be .rodata. */
 static unsigned long ro_after_init __ro_after_init = 0x55AA5500;
 
+/* This is marked __wr_after_init, so it should be in .rodata. */
+static
+unsigned long wr_after_init __wr_after_init = 0x55AA5500;
+
 /*
  * This just returns to the caller. It is designed to be copied into
  * non-executable memory regions.
@@ -104,6 +109,28 @@ void lkdtm_WRITE_RO_AFTER_INIT(void)
 	*ptr ^= 0xabcd1234;
 }
 
+#ifdef CONFIG_PRMEM
+
+void lkdtm_WRITE_WR_AFTER_INIT(void)
+{
+	unsigned long *ptr = &wr_after_init;
+
+	/*
+	 * Verify we were written to during init. Since an Oops
+	 * is considered a "success", a failure is to just skip the
+	 * real test.
+	 */
+	if ((*ptr & 0xAA) != 0xAA) {
+		pr_info("%p was NOT written during init!?\n", ptr);
+		return;
+	}
+
+	pr_info("attempting bad wr_after_init write at %p\n", ptr);
+	*ptr ^= 0xabcd1234;
+}
+
+#endif
+
 void lkdtm_WRITE_KERN(void)
 {
 	size_t size;
@@ -200,4 +227,6 @@ void __init lkdtm_perms_init(void)
 	/* Make sure we can write to __ro_after_init values during __init */
 	ro_after_init |= 0xAA;
 
+	/* Make sure we can write to __wr_after_init during __init */
+	wr_after_init |= 0xAA;
 }
-- 
2.19.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 08/12] rodata_test: refactor tests
       [not found] <20181221181423.20455-1-igor.stoppa@huawei.com>
                   ` (6 preceding siblings ...)
  2018-12-21 18:14 ` [PATCH 07/12] __wr_after_init: lkdtm test Igor Stoppa
@ 2018-12-21 18:14 ` Igor Stoppa
  2018-12-21 18:14 ` [PATCH 09/12] rodata_test: add verification for __wr_after_init Igor Stoppa
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 18:14 UTC (permalink / raw)
  To: Andy Lutomirski, Matthew Wilcox, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann
  Cc: igor.stoppa, Nadav Amit, Kees Cook, Ahmed Soliman,
	linux-integrity, kernel-hardening, linux-mm, linux-kernel

Refactor the test cases, in preparation for using them also for testing
__wr_after_init memory, when available.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>

CC: Andy Lutomirski <luto@amacapital.net>
CC: Nadav Amit <nadav.amit@gmail.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Kees Cook <keescook@chromium.org>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: Mimi Zohar <zohar@linux.vnet.ibm.com>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
CC: Ahmed Soliman <ahmedsoliman@mena.vt.edu>
CC: linux-integrity@vger.kernel.org
CC: kernel-hardening@lists.openwall.com
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 mm/rodata_test.c | 48 ++++++++++++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/mm/rodata_test.c b/mm/rodata_test.c
index d908c8769b48..e1349520b436 100644
--- a/mm/rodata_test.c
+++ b/mm/rodata_test.c
@@ -14,44 +14,52 @@
 #include <linux/uaccess.h>
 #include <asm/sections.h>
 
-static const int rodata_test_data = 0xC3;
+#define INIT_TEST_VAL 0xC3
 
-void rodata_test(void)
+static const int rodata_test_data = INIT_TEST_VAL;
+
+static bool test_data(char *data_type, const int *data,
+		      unsigned long start, unsigned long end)
 {
-	unsigned long start, end;
 	int zero = 0;
 
 	/* test 1: read the value */
 	/* If this test fails, some previous testrun has clobbered the state */
-	if (!rodata_test_data) {
-		pr_err("test 1 fails (start data)\n");
-		return;
+	if (*data != INIT_TEST_VAL) {
+		pr_err("%s: test 1 fails (init data value)\n", data_type);
+		return false;
 	}
 
 	/* test 2: write to the variable; this should fault */
-	if (!probe_kernel_write((void *)&rodata_test_data,
-				(void *)&zero, sizeof(zero))) {
-		pr_err("test data was not read only\n");
-		return;
+	if (!probe_kernel_write((void *)data, (void *)&zero, sizeof(zero))) {
+		pr_err("%s: test data was not read only\n", data_type);
+		return false;
 	}
 
 	/* test 3: check the value hasn't changed */
-	if (rodata_test_data == zero) {
-		pr_err("test data was changed\n");
-		return;
+	if (*data != INIT_TEST_VAL) {
+		pr_err("%s: test data was changed\n", data_type);
+		return false;
 	}
 
 	/* test 4: check if the rodata section is PAGE_SIZE aligned */
-	start = (unsigned long)__start_rodata;
-	end = (unsigned long)__end_rodata;
 	if (start & (PAGE_SIZE - 1)) {
-		pr_err("start of .rodata is not page size aligned\n");
-		return;
+		pr_err("%s: start of data is not page size aligned\n",
+		       data_type);
+		return false;
 	}
 	if (end & (PAGE_SIZE - 1)) {
-		pr_err("end of .rodata is not page size aligned\n");
-		return;
+		pr_err("%s: end of data is not page size aligned\n",
+		       data_type);
+		return false;
 	}
+	pr_info("%s tests were successful", data_type);
+	return true;
+}
 
-	pr_info("all tests were successful\n");
+void rodata_test(void)
+{
+	test_data("rodata", &rodata_test_data,
+		  (unsigned long)&__start_rodata,
+		  (unsigned long)&__end_rodata);
 }
-- 
2.19.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 09/12] rodata_test: add verification for __wr_after_init
       [not found] <20181221181423.20455-1-igor.stoppa@huawei.com>
                   ` (7 preceding siblings ...)
  2018-12-21 18:14 ` [PATCH 08/12] rodata_test: refactor tests Igor Stoppa
@ 2018-12-21 18:14 ` Igor Stoppa
  2018-12-21 18:14 ` [PATCH 10/12] __wr_after_init: test write rare functionality Igor Stoppa
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 18:14 UTC (permalink / raw)
  To: Andy Lutomirski, Matthew Wilcox, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann
  Cc: igor.stoppa, Nadav Amit, Kees Cook, Ahmed Soliman,
	linux-integrity, kernel-hardening, linux-mm, linux-kernel

The write protection of the __wr_after_init data can be verified with the
same methodology used for const data.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>

CC: Andy Lutomirski <luto@amacapital.net>
CC: Nadav Amit <nadav.amit@gmail.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Kees Cook <keescook@chromium.org>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: Mimi Zohar <zohar@linux.vnet.ibm.com>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
CC: Ahmed Soliman <ahmedsoliman@mena.vt.edu>
CC: linux-integrity@vger.kernel.org
CC: kernel-hardening@lists.openwall.com
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 mm/rodata_test.c | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/mm/rodata_test.c b/mm/rodata_test.c
index e1349520b436..a669cf9f5a61 100644
--- a/mm/rodata_test.c
+++ b/mm/rodata_test.c
@@ -16,8 +16,23 @@
 
 #define INIT_TEST_VAL 0xC3
 
+/*
+ * Note: __ro_after_init data is, for every practical effect, equivalent to
+ * const data, since they are even write protected at the same time; there
+ * is no need for separate testing.
+ * __wr_after_init data, otoh, is altered also after the write protection
+ * takes place and it cannot be exploitable for altering more permanent
+ * data.
+ */
+
 static const int rodata_test_data = INIT_TEST_VAL;
 
+#ifdef CONFIG_PRMEM
+static int wr_after_init_test_data __wr_after_init = INIT_TEST_VAL;
+extern long __start_wr_after_init;
+extern long __end_wr_after_init;
+#endif
+
 static bool test_data(char *data_type, const int *data,
 		      unsigned long start, unsigned long end)
 {
@@ -59,7 +74,13 @@ static bool test_data(char *data_type, const int *data,
 
 void rodata_test(void)
 {
-	test_data("rodata", &rodata_test_data,
-		  (unsigned long)&__start_rodata,
-		  (unsigned long)&__end_rodata);
+	if (!test_data("rodata", &rodata_test_data,
+		       (unsigned long)&__start_rodata,
+		       (unsigned long)&__end_rodata))
+		return;
+#ifdef CONFIG_PRMEM
+	    test_data("wr after init data", &wr_after_init_test_data,
+		      (unsigned long)&__start_wr_after_init,
+		      (unsigned long)&__end_wr_after_init);
+#endif
 }
-- 
2.19.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 10/12] __wr_after_init: test write rare functionality
       [not found] <20181221181423.20455-1-igor.stoppa@huawei.com>
                   ` (8 preceding siblings ...)
  2018-12-21 18:14 ` [PATCH 09/12] rodata_test: add verification for __wr_after_init Igor Stoppa
@ 2018-12-21 18:14 ` Igor Stoppa
  2018-12-21 18:14 ` [PATCH 11/12] IMA: turn ima_policy_flags into __wr_after_init Igor Stoppa
  2018-12-21 18:14 ` [PATCH 12/12] x86_64: __clear_user as case of __memset_user Igor Stoppa
  11 siblings, 0 replies; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 18:14 UTC (permalink / raw)
  To: Andy Lutomirski, Matthew Wilcox, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann
  Cc: igor.stoppa, Nadav Amit, Kees Cook, Ahmed Soliman,
	linux-integrity, kernel-hardening, linux-mm, linux-kernel

Set of test cases meant to confirm that the write rare functionality
works as expected.
It can be optionally compiled as module.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>

CC: Andy Lutomirski <luto@amacapital.net>
CC: Nadav Amit <nadav.amit@gmail.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Kees Cook <keescook@chromium.org>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: Mimi Zohar <zohar@linux.vnet.ibm.com>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
CC: Ahmed Soliman <ahmedsoliman@mena.vt.edu>
CC: linux-integrity@vger.kernel.org
CC: kernel-hardening@lists.openwall.com
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 mm/Kconfig.debug     |   8 +++
 mm/Makefile          |   1 +
 mm/test_write_rare.c | 135 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 144 insertions(+)
 create mode 100644 mm/test_write_rare.c

diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index b10305cfac3c..ae018e56c4e4 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -102,3 +102,11 @@ config DEBUG_PRMEM
     help
       After any write rare operation, compares the data written with the
       value provided by the caller.
+
+config DEBUG_PRMEM_TEST
+    tristate "Run self test for statically allocated protected memory"
+    depends on PRMEM
+    default n
+    help
+      Tries to verify that the protection for statically allocated memory
+      works correctly and that the memory is effectively protected.
diff --git a/mm/Makefile b/mm/Makefile
index ef3867c16ce0..8de1d468f4e7 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
 obj-$(CONFIG_PRMEM) += prmem.o
+obj-$(CONFIG_DEBUG_PRMEM_TEST) += test_write_rare.o
 obj-$(CONFIG_KSM) += ksm.o
 obj-$(CONFIG_PAGE_POISONING) += page_poison.o
 obj-$(CONFIG_SLAB) += slab.o
diff --git a/mm/test_write_rare.c b/mm/test_write_rare.c
new file mode 100644
index 000000000000..30574bc34a20
--- /dev/null
+++ b/mm/test_write_rare.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * test_write_rare.c
+ *
+ * (C) Copyright 2018 Huawei Technologies Co. Ltd.
+ * Author: Igor Stoppa <igor.stoppa@huawei.com>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/bug.h>
+#include <linux/prmem.h>
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+extern long __start_wr_after_init;
+extern long __end_wr_after_init;
+
+static __wr_after_init int scalar = '0';
+static __wr_after_init u8 array[PAGE_SIZE * 3] __aligned(PAGE_SIZE);
+
+/* The section must occupy a non-zero number of whole pages */
+static bool test_alignment(void)
+{
+	unsigned long pstart = (unsigned long)&__start_wr_after_init;
+	unsigned long pend = (unsigned long)&__end_wr_after_init;
+
+	if (WARN((pstart & ~PAGE_MASK) || (pend & ~PAGE_MASK) ||
+		 (pstart >= pend), "Boundaries test failed."))
+		return false;
+	pr_info("Boundaries test passed.");
+	return true;
+}
+
+static bool test_pattern(void)
+{
+	return (memtst(array, '0', PAGE_SIZE / 2) ||
+		memtst(array + PAGE_SIZE / 2, '1', PAGE_SIZE * 3 / 4) ||
+		memtst(array + PAGE_SIZE * 5 / 4, '0', PAGE_SIZE / 2) ||
+		memtst(array + PAGE_SIZE * 7 / 4, '1', PAGE_SIZE * 3 / 4) ||
+		memtst(array + PAGE_SIZE * 5 / 2, '0', PAGE_SIZE / 2));
+}
+
+static bool test_wr_memset(void)
+{
+	int new_val = '1';
+
+	wr_memset(&scalar, new_val, sizeof(scalar));
+	if (WARN(memtst(&scalar, new_val, sizeof(scalar)),
+		 "Scalar write rare memset test failed."))
+		return false;
+
+	pr_info("Scalar write rare memset test passed.");
+
+	wr_memset(array, '0', PAGE_SIZE * 3);
+	if (WARN(memtst(array, '0', PAGE_SIZE * 3),
+		 "Array write rare memset test failed."))
+		return false;
+
+	wr_memset(array + PAGE_SIZE / 2, '1', PAGE_SIZE * 2);
+	if (WARN(memtst(array + PAGE_SIZE / 2, '1', PAGE_SIZE * 2),
+		 "Array write rare memset test failed."))
+		return false;
+
+	wr_memset(array + PAGE_SIZE * 5 / 4, '0', PAGE_SIZE / 2);
+	if (WARN(memtst(array + PAGE_SIZE * 5 / 4, '0', PAGE_SIZE / 2),
+		 "Array write rare memset test failed."))
+		return false;
+
+	if (WARN(test_pattern(), "Array write rare memset test failed."))
+		return false;
+
+	pr_info("Array write rare memset test passed.");
+	return true;
+}
+
+static u8 array_1[PAGE_SIZE * 2];
+static u8 array_2[PAGE_SIZE * 2];
+
+static bool test_wr_memcpy(void)
+{
+	int new_val = 0x12345678;
+
+	wr_assign(scalar, new_val);
+	if (WARN(memcmp(&scalar, &new_val, sizeof(scalar)),
+		 "Scalar write rare memcpy test failed."))
+		return false;
+	pr_info("Scalar write rare memcpy test passed.");
+
+	wr_memset(array, '0', PAGE_SIZE * 3);
+	memset(array_1, '1', PAGE_SIZE * 2);
+	memset(array_2, '0', PAGE_SIZE * 2);
+	wr_memcpy(array + PAGE_SIZE / 2, array_1, PAGE_SIZE * 2);
+	wr_memcpy(array + PAGE_SIZE * 5 / 4, array_2, PAGE_SIZE / 2);
+
+	if (WARN(test_pattern(), "Array write rare memcpy test failed."))
+		return false;
+
+	pr_info("Array write rare memcpy test passed.");
+	return true;
+}
+
+static __wr_after_init int *dst;
+static int reference = 0x54;
+
+static bool test_wr_rcu_assign_pointer(void)
+{
+	wr_rcu_assign_pointer(dst, &reference);
+	return dst == &reference;
+}
+
+static int __init test_static_wr_init_module(void)
+{
+	pr_info("static write_rare test");
+	if (WARN(!(test_alignment() &&
+		   test_wr_memset() &&
+		   test_wr_memcpy() &&
+		   test_wr_rcu_assign_pointer()),
+		 "static rare-write test failed"))
+		return -EFAULT;
+	pr_info("static write_rare test passed");
+	return 0;
+}
+
+module_init(test_static_wr_init_module);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Igor Stoppa <igor.stoppa@huawei.com>");
+MODULE_DESCRIPTION("Test module for static write rare.");
-- 
2.19.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 11/12] IMA: turn ima_policy_flags into __wr_after_init
       [not found] <20181221181423.20455-1-igor.stoppa@huawei.com>
                   ` (9 preceding siblings ...)
  2018-12-21 18:14 ` [PATCH 10/12] __wr_after_init: test write rare functionality Igor Stoppa
@ 2018-12-21 18:14 ` Igor Stoppa
  2018-12-21 18:14 ` [PATCH 12/12] x86_64: __clear_user as case of __memset_user Igor Stoppa
  11 siblings, 0 replies; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 18:14 UTC (permalink / raw)
  To: Andy Lutomirski, Matthew Wilcox, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann
  Cc: igor.stoppa, Nadav Amit, Kees Cook, Ahmed Soliman,
	linux-integrity, kernel-hardening, linux-mm, linux-kernel

The policy flags could be targeted by an attacker aiming at disabling IMA,
so that there would be no trace of a file system modification in the
measurement list.

Since the flags can be altered at runtime, it is not possible to make
them become fully read-only, for example with __ro_after_init.

__wr_after_init can still provide some protection, at least against
simple memory overwrite attacks

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>

CC: Andy Lutomirski <luto@amacapital.net>
CC: Nadav Amit <nadav.amit@gmail.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Kees Cook <keescook@chromium.org>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: Mimi Zohar <zohar@linux.vnet.ibm.com>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
CC: Ahmed Soliman <ahmedsoliman@mena.vt.edu>
CC: linux-integrity@vger.kernel.org
CC: kernel-hardening@lists.openwall.com
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 security/integrity/ima/ima.h        | 3 ++-
 security/integrity/ima/ima_policy.c | 9 +++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index cc12f3449a72..297c25f5122e 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -24,6 +24,7 @@
 #include <linux/hash.h>
 #include <linux/tpm.h>
 #include <linux/audit.h>
+#include <linux/prmem.h>
 #include <crypto/hash_info.h>
 
 #include "../integrity.h"
@@ -50,7 +51,7 @@ enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8 };
 #define IMA_TEMPLATE_IMA_FMT "d|n"
 
 /* current content of the policy */
-extern int ima_policy_flag;
+extern int ima_policy_flag __wr_after_init;
 
 /* set during initialization */
 extern int ima_hash_algo;
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 7489cb7de6dc..2004de818d92 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -47,7 +47,7 @@
 #define INVALID_PCR(a) (((a) < 0) || \
 	(a) >= (FIELD_SIZEOF(struct integrity_iint_cache, measured_pcrs) * 8))
 
-int ima_policy_flag;
+int ima_policy_flag __wr_after_init;
 static int temp_ima_appraise;
 static int build_ima_appraise __ro_after_init;
 
@@ -452,12 +452,13 @@ void ima_update_policy_flag(void)
 
 	list_for_each_entry(entry, ima_rules, list) {
 		if (entry->action & IMA_DO_MASK)
-			ima_policy_flag |= entry->action;
+			wr_assign(ima_policy_flag,
+				  ima_policy_flag | entry->action);
 	}
 
 	ima_appraise |= (build_ima_appraise | temp_ima_appraise);
 	if (!ima_appraise)
-		ima_policy_flag &= ~IMA_APPRAISE;
+		wr_assign(ima_policy_flag, ima_policy_flag & ~IMA_APPRAISE);
 }
 
 static int ima_appraise_flag(enum ima_hooks func)
@@ -574,7 +575,7 @@ void ima_update_policy(void)
 	list_splice_tail_init_rcu(&ima_temp_rules, policy, synchronize_rcu);
 
 	if (ima_rules != policy) {
-		ima_policy_flag = 0;
+		wr_assign(ima_policy_flag, 0);
 		ima_rules = policy;
 	}
 	ima_update_policy_flag();
-- 
2.19.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 12/12] x86_64: __clear_user as case of __memset_user
       [not found] <20181221181423.20455-1-igor.stoppa@huawei.com>
                   ` (10 preceding siblings ...)
  2018-12-21 18:14 ` [PATCH 11/12] IMA: turn ima_policy_flags into __wr_after_init Igor Stoppa
@ 2018-12-21 18:14 ` Igor Stoppa
  11 siblings, 0 replies; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 18:14 UTC (permalink / raw)
  To: Andy Lutomirski, Matthew Wilcox, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann
  Cc: igor.stoppa, Nadav Amit, Kees Cook, Ahmed Soliman,
	linux-integrity, kernel-hardening, linux-mm, linux-kernel

To avoid code duplication, re-use __memset_user(), when clearing
user-space memory.

The overhead should be minimal (2 extra register assignments) and
outside of the writing loop.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>

CC: Andy Lutomirski <luto@amacapital.net>
CC: Nadav Amit <nadav.amit@gmail.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Kees Cook <keescook@chromium.org>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: Mimi Zohar <zohar@linux.vnet.ibm.com>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
CC: Ahmed Soliman <ahmedsoliman@mena.vt.edu>
CC: linux-integrity@vger.kernel.org
CC: kernel-hardening@lists.openwall.com
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 arch/x86/lib/usercopy_64.c | 29 +----------------------------
 1 file changed, 1 insertion(+), 28 deletions(-)

diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 84f8f8a20b30..ab6aabb62055 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -69,34 +69,7 @@ EXPORT_SYMBOL(memset_user);
 
 unsigned long __clear_user(void __user *addr, unsigned long size)
 {
-	long __d0;
-	might_fault();
-	/* no memory constraint because it doesn't change any memory gcc knows
-	   about */
-	stac();
-	asm volatile(
-		"	testq  %[size8],%[size8]\n"
-		"	jz     4f\n"
-		"0:	movq $0,(%[dst])\n"
-		"	addq   $8,%[dst]\n"
-		"	decl %%ecx ; jnz   0b\n"
-		"4:	movq  %[size1],%%rcx\n"
-		"	testl %%ecx,%%ecx\n"
-		"	jz     2f\n"
-		"1:	movb   $0,(%[dst])\n"
-		"	incq   %[dst]\n"
-		"	decl %%ecx ; jnz  1b\n"
-		"2:\n"
-		".section .fixup,\"ax\"\n"
-		"3:	lea 0(%[size1],%[size8],8),%[size8]\n"
-		"	jmp 2b\n"
-		".previous\n"
-		_ASM_EXTABLE_UA(0b, 3b)
-		_ASM_EXTABLE_UA(1b, 2b)
-		: [size8] "=&c"(size), [dst] "=&D" (__d0)
-		: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr));
-	clac();
-	return size;
+	return __memset_user(addr, 0, size);
 }
 EXPORT_SYMBOL(__clear_user);
 
-- 
2.19.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 01/12] x86_64: memset_user()
  2018-12-21 18:14 ` [PATCH 01/12] x86_64: memset_user() Igor Stoppa
@ 2018-12-21 18:25   ` Matthew Wilcox
  2018-12-21 18:46     ` Igor Stoppa
  2018-12-21 20:05     ` Cyrill Gorcunov
  0 siblings, 2 replies; 24+ messages in thread
From: Matthew Wilcox @ 2018-12-21 18:25 UTC (permalink / raw)
  To: Igor Stoppa
  Cc: Andy Lutomirski, Peter Zijlstra, Dave Hansen, Mimi Zohar,
	Thiago Jung Bauermann, igor.stoppa, Nadav Amit, Kees Cook,
	Ahmed Soliman, linux-integrity, kernel-hardening, linux-mm,
	linux-kernel

On Fri, Dec 21, 2018 at 08:14:12PM +0200, Igor Stoppa wrote:
> +unsigned long __memset_user(void __user *addr, int c, unsigned long size)
> +{
> +	long __d0;
> +	unsigned long  pattern = 0;
> +	int i;
> +
> +	for (i = 0; i < 8; i++)
> +		pattern = (pattern << 8) | (0xFF & c);

That's inefficient.

	pattern = (unsigned char)c;
	pattern |= pattern << 8;
	pattern |= pattern << 16;
	pattern |= pattern << 32;


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 03/12] __wr_after_init: generic functionality
  2018-12-21 18:14 ` [PATCH 03/12] __wr_after_init: generic functionality Igor Stoppa
@ 2018-12-21 18:41   ` Matthew Wilcox
  2018-12-21 19:07     ` Igor Stoppa
  0 siblings, 1 reply; 24+ messages in thread
From: Matthew Wilcox @ 2018-12-21 18:41 UTC (permalink / raw)
  To: Igor Stoppa
  Cc: Andy Lutomirski, Peter Zijlstra, Dave Hansen, Mimi Zohar,
	Thiago Jung Bauermann, igor.stoppa, Nadav Amit, Kees Cook,
	Ahmed Soliman, linux-integrity, kernel-hardening, linux-mm,
	linux-kernel

On Fri, Dec 21, 2018 at 08:14:14PM +0200, Igor Stoppa wrote:
> +static inline int memtst(void *p, int c, __kernel_size_t len)

I don't understand why you're verifying that writes actually happen
in production code.  Sure, write lib/test_wrmem.c or something, but
verifying every single rare write seems like a mistake to me.

> +#ifndef CONFIG_PRMEM

So is this PRMEM or wr_mem?  It's not obvious that CONFIG_PRMEM controls
wrmem.

> +#define wr_assign(var, val)	((var) = (val))

The hamming distance between 'var' and 'val' is too small.  The convention
in the line immediately below (p and v) is much more readable.

> +#define wr_rcu_assign_pointer(p, v)	rcu_assign_pointer(p, v)
> +#define wr_assign(var, val) ({			\
> +	typeof(var) tmp = (typeof(var))val;	\
> +						\
> +	wr_memcpy(&var, &tmp, sizeof(var));	\
> +	var;					\
> +})

Doesn't wr_memcpy return 'var' anyway?

> +/**
> + * wr_memcpy() - copyes size bytes from q to p

typo

> + * @p: beginning of the memory to write to
> + * @q: beginning of the memory to read from
> + * @size: amount of bytes to copy
> + *
> + * Returns pointer to the destination

> + * The architecture code must provide:
> + *   void __wr_enable(wr_state_t *state)
> + *   void *__wr_addr(void *addr)
> + *   void *__wr_memcpy(void *p, const void *q, __kernel_size_t size)
> + *   void __wr_disable(wr_state_t *state)

This section shouldn't be in the user documentation of wr_memcpy().

> + */
> +void *wr_memcpy(void *p, const void *q, __kernel_size_t size)
> +{
> +	wr_state_t wr_state;
> +	void *wr_poking_addr = __wr_addr(p);
> +
> +	if (WARN_ONCE(!wr_ready, "No writable mapping available") ||

Surely not.  If somebody's called wr_memcpy() before wr_ready is set,
that means we can just call memcpy().


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 01/12] x86_64: memset_user()
  2018-12-21 18:25   ` Matthew Wilcox
@ 2018-12-21 18:46     ` Igor Stoppa
  2018-12-21 20:05     ` Cyrill Gorcunov
  1 sibling, 0 replies; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 18:46 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Andy Lutomirski, Peter Zijlstra, Dave Hansen, Mimi Zohar,
	Thiago Jung Bauermann, igor.stoppa, Nadav Amit, Kees Cook,
	Ahmed Soliman, linux-integrity, kernel-hardening, linux-mm,
	linux-kernel



On 21/12/2018 20:25, Matthew Wilcox wrote:
> On Fri, Dec 21, 2018 at 08:14:12PM +0200, Igor Stoppa wrote:
>> +unsigned long __memset_user(void __user *addr, int c, unsigned long size)
>> +{
>> +	long __d0;
>> +	unsigned long  pattern = 0;
>> +	int i;
>> +
>> +	for (i = 0; i < 8; i++)
>> +		pattern = (pattern << 8) | (0xFF & c);
> 
> That's inefficient.
> 
> 	pattern = (unsigned char)c;
> 	pattern |= pattern << 8;
> 	pattern |= pattern << 16;
> 	pattern |= pattern << 32;

ok, thank you

--
igor

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 03/12] __wr_after_init: generic functionality
  2018-12-21 18:41   ` Matthew Wilcox
@ 2018-12-21 19:07     ` Igor Stoppa
  2018-12-21 19:43       ` Matthew Wilcox
  0 siblings, 1 reply; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 19:07 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Andy Lutomirski, Peter Zijlstra, Dave Hansen, Mimi Zohar,
	Thiago Jung Bauermann, igor.stoppa, Nadav Amit, Kees Cook,
	Ahmed Soliman, linux-integrity, kernel-hardening, linux-mm,
	linux-kernel



On 21/12/2018 20:41, Matthew Wilcox wrote:
> On Fri, Dec 21, 2018 at 08:14:14PM +0200, Igor Stoppa wrote:
>> +static inline int memtst(void *p, int c, __kernel_size_t len)
> 
> I don't understand why you're verifying that writes actually happen
> in production code.  Sure, write lib/test_wrmem.c or something, but
> verifying every single rare write seems like a mistake to me.

This is actually something I wrote more as a stop-gap.
I have the feeling there should be already something similar available.
And probably I could not find it. Unless it's so trivial that it doesn't 
deserve to become a function?

But if there is really no existing alternative, I can put it in a 
separate file.

> 
>> +#ifndef CONFIG_PRMEM
> 
> So is this PRMEM or wr_mem?  It's not obvious that CONFIG_PRMEM controls
> wrmem.

In my mind (maybe still clinging to the old implementation), PRMEM is 
the master toggle, for protected memory.

Then there are various types and the first one being now implemented is 
write rare after init (because ro after init already exists).

However, the same levels of protection should then follow for 
dynamically allocated memory (ye old pmalloc).

PRMEM would then become the moniker for the whole shebang.

>> +#define wr_assign(var, val)	((var) = (val))
> 
> The hamming distance between 'var' and 'val' is too small.  The convention
> in the line immediately below (p and v) is much more readable.

ok, I'll fix it

>> +#define wr_rcu_assign_pointer(p, v)	rcu_assign_pointer(p, v)
>> +#define wr_assign(var, val) ({			\
>> +	typeof(var) tmp = (typeof(var))val;	\
>> +						\
>> +	wr_memcpy(&var, &tmp, sizeof(var));	\
>> +	var;					\
>> +})
> 
> Doesn't wr_memcpy return 'var' anyway?

It should return the destination, which is &var.

But I wanted to return the actual value of the assignment, val

Like if I do  (a = 7)  it evaluates to 7,

similarly wr_assign(a, 7) would also evaluate to 7

The reason why i returned var instead of val is that it would allow to 
detect any error.

>> +/**
>> + * wr_memcpy() - copyes size bytes from q to p
> 
> typo

:-( thanks

>> + * @p: beginning of the memory to write to
>> + * @q: beginning of the memory to read from
>> + * @size: amount of bytes to copy
>> + *
>> + * Returns pointer to the destination
> 
>> + * The architecture code must provide:
>> + *   void __wr_enable(wr_state_t *state)
>> + *   void *__wr_addr(void *addr)
>> + *   void *__wr_memcpy(void *p, const void *q, __kernel_size_t size)
>> + *   void __wr_disable(wr_state_t *state)
> 
> This section shouldn't be in the user documentation of wr_memcpy().

ok

>> + */
>> +void *wr_memcpy(void *p, const void *q, __kernel_size_t size)
>> +{
>> +	wr_state_t wr_state;
>> +	void *wr_poking_addr = __wr_addr(p);
>> +
>> +	if (WARN_ONCE(!wr_ready, "No writable mapping available") ||
> 
> Surely not.  If somebody's called wr_memcpy() before wr_ready is set,
> that means we can just call memcpy().


What I was trying to catch is the case where, after a failed init, the 
writable mapping doesn't exist. In that case wr_ready is also not set.

The problem is that I just don't know what to do in a case where there 
has been such a major error which prevents he creation of hte alternate 
mapping.

I understand that we still want to continue, to provide as much debug 
info as possible, but I am at a loss about finding the saner course of 
actions.

--
igor

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 03/12] __wr_after_init: generic functionality
  2018-12-21 19:07     ` Igor Stoppa
@ 2018-12-21 19:43       ` Matthew Wilcox
  2018-12-21 21:54         ` Igor Stoppa
  0 siblings, 1 reply; 24+ messages in thread
From: Matthew Wilcox @ 2018-12-21 19:43 UTC (permalink / raw)
  To: Igor Stoppa
  Cc: Andy Lutomirski, Peter Zijlstra, Dave Hansen, Mimi Zohar,
	Thiago Jung Bauermann, igor.stoppa, Nadav Amit, Kees Cook,
	Ahmed Soliman, linux-integrity, kernel-hardening, linux-mm,
	linux-kernel

On Fri, Dec 21, 2018 at 09:07:54PM +0200, Igor Stoppa wrote:
> On 21/12/2018 20:41, Matthew Wilcox wrote:
> > On Fri, Dec 21, 2018 at 08:14:14PM +0200, Igor Stoppa wrote:
> > > +static inline int memtst(void *p, int c, __kernel_size_t len)
> > 
> > I don't understand why you're verifying that writes actually happen
> > in production code.  Sure, write lib/test_wrmem.c or something, but
> > verifying every single rare write seems like a mistake to me.
> 
> This is actually something I wrote more as a stop-gap.
> I have the feeling there should be already something similar available.
> And probably I could not find it. Unless it's so trivial that it doesn't
> deserve to become a function?
> 
> But if there is really no existing alternative, I can put it in a separate
> file.

I'm not questioning the implementation, I'm questioning why it's ever
called.  If I type 'p = q', I don't then verify that p actually is equal
to q.  I just assume that the compiler did its job.

> > > +#ifndef CONFIG_PRMEM
> > 
> > So is this PRMEM or wr_mem?  It's not obvious that CONFIG_PRMEM controls
> > wrmem.
> 
> In my mind (maybe still clinging to the old implementation), PRMEM is the
> master toggle, for protected memory.
> 
> Then there are various types and the first one being now implemented is
> write rare after init (because ro after init already exists).
> 
> However, the same levels of protection should then follow for dynamically
> allocated memory (ye old pmalloc).
> 
> PRMEM would then become the moniker for the whole shebang.

To my mind, what we have in this patchset is support for statically
allocated protected (or write-rare) memory.  Later, we'll add dynamically
allocated protected memory.  So it's all protected memory, and we'll
use the same accessors for both ... right?

> > > +#define wr_rcu_assign_pointer(p, v)	rcu_assign_pointer(p, v)
> > > +#define wr_assign(var, val) ({			\
> > > +	typeof(var) tmp = (typeof(var))val;	\
> > > +						\
> > > +	wr_memcpy(&var, &tmp, sizeof(var));	\
> > > +	var;					\
> > > +})
> > 
> > Doesn't wr_memcpy return 'var' anyway?
> 
> It should return the destination, which is &var.
> 
> But I wanted to return the actual value of the assignment, val
> 
> Like if I do  (a = 7)  it evaluates to 7,
> 
> similarly wr_assign(a, 7) would also evaluate to 7
> 
> The reason why i returned var instead of val is that it would allow to
> detect any error.

Ah, good point; I missed the var vs &var distinction.

> > > +void *wr_memcpy(void *p, const void *q, __kernel_size_t size)
> > > +{
> > > +	wr_state_t wr_state;
> > > +	void *wr_poking_addr = __wr_addr(p);
> > > +
> > > +	if (WARN_ONCE(!wr_ready, "No writable mapping available") ||
> > 
> > Surely not.  If somebody's called wr_memcpy() before wr_ready is set,
> > that means we can just call memcpy().
> 
> What I was trying to catch is the case where, after a failed init, the
> writable mapping doesn't exist. In that case wr_ready is also not set.
> 
> The problem is that I just don't know what to do in a case where there has
> been such a major error which prevents he creation of hte alternate mapping.
> 
> I understand that we still want to continue, to provide as much debug info
> as possible, but I am at a loss about finding the saner course of actions.

I don't think there's anything to be done in that case.  Indeed,
I think the only thing to do is panic and stop the whole machine if
initialisation fails.  We'd be in a situation where nothing can update
protected memory, and the machine just won't work.

I suppose we could "fail insecure" and never protect the memory, but I
think that's asking for trouble.

Anyway, my concern was for a driver which can be built either as a
module or built-in.  Its init code will be called before write-protection
happens when it's built in, and after write-protection happens when it's
a module.  It should be able to use wr_assign() in either circumstance.
One might also have a utility function which is called from both init
and non-init code and want to use wr_assign() whether initialisation
has completed or not.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 01/12] x86_64: memset_user()
  2018-12-21 18:25   ` Matthew Wilcox
  2018-12-21 18:46     ` Igor Stoppa
@ 2018-12-21 20:05     ` Cyrill Gorcunov
  2018-12-21 20:29       ` Matthew Wilcox
  1 sibling, 1 reply; 24+ messages in thread
From: Cyrill Gorcunov @ 2018-12-21 20:05 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Igor Stoppa, Andy Lutomirski, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann, igor.stoppa, Nadav Amit,
	Kees Cook, Ahmed Soliman, linux-integrity, kernel-hardening,
	linux-mm, linux-kernel

On Fri, Dec 21, 2018 at 10:25:16AM -0800, Matthew Wilcox wrote:
> On Fri, Dec 21, 2018 at 08:14:12PM +0200, Igor Stoppa wrote:
> > +unsigned long __memset_user(void __user *addr, int c, unsigned long size)
> > +{
> > +	long __d0;
> > +	unsigned long  pattern = 0;
> > +	int i;
> > +
> > +	for (i = 0; i < 8; i++)
> > +		pattern = (pattern << 8) | (0xFF & c);
> 
> That's inefficient.
> 
> 	pattern = (unsigned char)c;
> 	pattern |= pattern << 8;
> 	pattern |= pattern << 16;
> 	pattern |= pattern << 32;

Won't

	pattern = 0x0101010101010101 * c;

do the same but faster?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 01/12] x86_64: memset_user()
  2018-12-21 20:05     ` Cyrill Gorcunov
@ 2018-12-21 20:29       ` Matthew Wilcox
  2018-12-21 20:46         ` Cyrill Gorcunov
  0 siblings, 1 reply; 24+ messages in thread
From: Matthew Wilcox @ 2018-12-21 20:29 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Igor Stoppa, Andy Lutomirski, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann, igor.stoppa, Nadav Amit,
	Kees Cook, Ahmed Soliman, linux-integrity, kernel-hardening,
	linux-mm, linux-kernel

On Fri, Dec 21, 2018 at 11:05:46PM +0300, Cyrill Gorcunov wrote:
> On Fri, Dec 21, 2018 at 10:25:16AM -0800, Matthew Wilcox wrote:
> > On Fri, Dec 21, 2018 at 08:14:12PM +0200, Igor Stoppa wrote:
> > > +unsigned long __memset_user(void __user *addr, int c, unsigned long size)
> > > +{
> > > +	long __d0;
> > > +	unsigned long  pattern = 0;
> > > +	int i;
> > > +
> > > +	for (i = 0; i < 8; i++)
> > > +		pattern = (pattern << 8) | (0xFF & c);
> > 
> > That's inefficient.
> > 
> > 	pattern = (unsigned char)c;
> > 	pattern |= pattern << 8;
> > 	pattern |= pattern << 16;
> > 	pattern |= pattern << 32;
> 
> Won't
> 
> 	pattern = 0x0101010101010101 * c;
> 
> do the same but faster?

Depends on your CPU.  Some yes, some no.

(Also you need to cast 'c' to unsigned char to avoid someone passing in
0x1234 and getting 0x4646464646464634 instead of 0x3434343434343434)

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 01/12] x86_64: memset_user()
  2018-12-21 20:29       ` Matthew Wilcox
@ 2018-12-21 20:46         ` Cyrill Gorcunov
  2018-12-21 21:07           ` Matthew Wilcox
  0 siblings, 1 reply; 24+ messages in thread
From: Cyrill Gorcunov @ 2018-12-21 20:46 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Igor Stoppa, Andy Lutomirski, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann, igor.stoppa, Nadav Amit,
	Kees Cook, Ahmed Soliman, linux-integrity, kernel-hardening,
	linux-mm, linux-kernel

On Fri, Dec 21, 2018 at 12:29:46PM -0800, Matthew Wilcox wrote:
> > > 
> > > That's inefficient.
> > > 
> > > 	pattern = (unsigned char)c;
> > > 	pattern |= pattern << 8;
> > > 	pattern |= pattern << 16;
> > > 	pattern |= pattern << 32;
> > 
> > Won't
> > 
> > 	pattern = 0x0101010101010101 * c;
> > 
> > do the same but faster?
> 
> Depends on your CPU.  Some yes, some no.
> 
> (Also you need to cast 'c' to unsigned char to avoid someone passing in
> 0x1234 and getting 0x4646464646464634 instead of 0x3434343434343434)

Cast to unsigned char is needed in any case. And as far as I remember
we've been using this multiplication trick for a really long time
in x86 land. I'm out of sources right now but it should be somewhere
in assembly libs.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 01/12] x86_64: memset_user()
  2018-12-21 20:46         ` Cyrill Gorcunov
@ 2018-12-21 21:07           ` Matthew Wilcox
  2018-12-21 21:17             ` Cyrill Gorcunov
  0 siblings, 1 reply; 24+ messages in thread
From: Matthew Wilcox @ 2018-12-21 21:07 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Igor Stoppa, Andy Lutomirski, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann, igor.stoppa, Nadav Amit,
	Kees Cook, Ahmed Soliman, linux-integrity, kernel-hardening,
	linux-mm, linux-kernel

On Fri, Dec 21, 2018 at 11:46:16PM +0300, Cyrill Gorcunov wrote:
> Cast to unsigned char is needed in any case. And as far as I remember
> we've been using this multiplication trick for a really long time
> in x86 land. I'm out of sources right now but it should be somewhere
> in assembly libs.

x86 isn't the only CPU.  Some CPUs have slow multiplies but fast shifts.
Also loading 0x0101010101010101 into a register may be inefficient on
some CPUs.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 01/12] x86_64: memset_user()
  2018-12-21 21:07           ` Matthew Wilcox
@ 2018-12-21 21:17             ` Cyrill Gorcunov
  0 siblings, 0 replies; 24+ messages in thread
From: Cyrill Gorcunov @ 2018-12-21 21:17 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Igor Stoppa, Andy Lutomirski, Peter Zijlstra, Dave Hansen,
	Mimi Zohar, Thiago Jung Bauermann, igor.stoppa, Nadav Amit,
	Kees Cook, Ahmed Soliman, linux-integrity, kernel-hardening,
	linux-mm, linux-kernel

On Fri, Dec 21, 2018 at 01:07:21PM -0800, Matthew Wilcox wrote:
> On Fri, Dec 21, 2018 at 11:46:16PM +0300, Cyrill Gorcunov wrote:
> > Cast to unsigned char is needed in any case. And as far as I remember
> > we've been using this multiplication trick for a really long time
> > in x86 land. I'm out of sources right now but it should be somewhere
> > in assembly libs.
> 
> x86 isn't the only CPU.  Some CPUs have slow multiplies but fast shifts.

This is x86-64 patch, not some generic code.

> Also loading 0x0101010101010101 into a register may be inefficient on
> some CPUs.

It is pretty efficient on x86-64. Moreover the self dependents as
a |= a << b is a source for data hazards inside cpu engine. Anyway
i'm not going to insist, just wanted to remind about such trick.
Up to you what to choose.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 03/12] __wr_after_init: generic functionality
  2018-12-21 19:43       ` Matthew Wilcox
@ 2018-12-21 21:54         ` Igor Stoppa
  0 siblings, 0 replies; 24+ messages in thread
From: Igor Stoppa @ 2018-12-21 21:54 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Andy Lutomirski, Peter Zijlstra, Dave Hansen, Mimi Zohar,
	Thiago Jung Bauermann, igor.stoppa, Nadav Amit, Kees Cook,
	Ahmed Soliman, linux-integrity, kernel-hardening, linux-mm,
	linux-kernel



On 21/12/2018 21:43, Matthew Wilcox wrote:
> On Fri, Dec 21, 2018 at 09:07:54PM +0200, Igor Stoppa wrote:
>> On 21/12/2018 20:41, Matthew Wilcox wrote:
>>> On Fri, Dec 21, 2018 at 08:14:14PM +0200, Igor Stoppa wrote:
>>>> +static inline int memtst(void *p, int c, __kernel_size_t len)
>>>
>>> I don't understand why you're verifying that writes actually happen
>>> in production code.  Sure, write lib/test_wrmem.c or something, but
>>> verifying every single rare write seems like a mistake to me.
>>
>> This is actually something I wrote more as a stop-gap.
>> I have the feeling there should be already something similar available.
>> And probably I could not find it. Unless it's so trivial that it doesn't
>> deserve to become a function?
>>
>> But if there is really no existing alternative, I can put it in a separate
>> file.
> 
> I'm not questioning the implementation, I'm questioning why it's ever
> called.  If I type 'p = q', I don't then verify that p actually is equal
> to q.  I just assume that the compiler did its job. 

Paranoia, probably.

My thinking is that, once the data is protected, it could still be 
attacked through the metadata. A pte, for example.
Preventing the setting of a flag, that for example enables a 
functionality, might be a nice way to thwart all this protection.

If I verify that the write was successful, through the read-only 
address, then I know that the action really completed successfully.

There are many more types of attack that one can come up with, but 
attacking the metadata is probably the most likely next level.

So what I'm trying to do is more akin to:

p = &d;
*p = q;
d == q;

But in our case there is an indefinite amount of time between the 
creation of
the alternate mapping and its use.

Another way could be to check that the mapping is correct before writing 
to it. Maybe safer? I went for confirming that the end result is correct.

Of course it adds overhead, but if the whole thing is already slow and 
happening not too often, how much does it matter?

An alternative approach would be that the code invoking the wr operation 
performs an explicit test.

Would it look better if I implemented this as a wr_assign_verify() 
inline function?

>>>> +#ifndef CONFIG_PRMEM
>>>
>>> So is this PRMEM or wr_mem?  It's not obvious that CONFIG_PRMEM controls
>>> wrmem.
>>
>> In my mind (maybe still clinging to the old implementation), PRMEM is the
>> master toggle, for protected memory.
>>
>> Then there are various types and the first one being now implemented is
>> write rare after init (because ro after init already exists).
>>
>> However, the same levels of protection should then follow for dynamically
>> allocated memory (ye old pmalloc).
>>
>> PRMEM would then become the moniker for the whole shebang.
> 
> To my mind, what we have in this patchset is support for statically
> allocated protected (or write-rare) memory.  Later, we'll add dynamically
> allocated protected memory.  So it's all protected memory, and we'll
> use the same accessors for both ... right?

The static one is only write rare because read only after init already 
exists.

The dynamic one must introduce the same write rare, yes, but it should 
also introduce read_only (I do not count the destruction of an entire 
pool as a write rare operation). Ex: SELinux policyDB.

write rare, regardless if dynamic or static, is a sub-case of protected 
memory, hence the differentiation between protected and write rare.

I'm not claiming to be particularly skilled at choosing names, so if 
something better sounding is available, it can be used.
This is the best I could come up with.

[...]

> I don't think there's anything to be done in that case.  Indeed,
> I think the only thing to do is panic and stop the whole machine if
> initialisation fails.  We'd be in a situation where nothing can update
> protected memory, and the machine just won't work.
> 
> I suppose we could "fail insecure" and never protect the memory, but I
> think that's asking for trouble.

ok, so init will BUG() if it fails, instead of the current WARN_ONCE() 
and return.

> Anyway, my concern was for a driver which can be built either as a
> module or built-in.  Its init code will be called before write-protection
> happens when it's built in, and after write-protection happens when it's
> a module.  It should be able to use wr_assign() in either circumstance.
> One might also have a utility function which is called from both init
> and non-init code and want to use wr_assign() whether initialisation
> has completed or not.

If the writable mapping is created early enough, the only penalty for 
using the write-rare function on a writable variable is that it would be 
slower. Probably there wouldn't be so much data to deal with.

If the driver is dealing with some HW, most likely that would make any 
write rare extra delay look negligible.

--
igor

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 10/12] __wr_after_init: test write rare functionality
  2018-12-19 21:33 [RFC v2 PATCH 0/12] hardening: statically allocated protected memory Igor Stoppa
@ 2018-12-19 21:33 ` Igor Stoppa
  0 siblings, 0 replies; 24+ messages in thread
From: Igor Stoppa @ 2018-12-19 21:33 UTC (permalink / raw)
  To: Andy Lutomirski, Matthew Wilcox, Peter Zijlstra, Dave Hansen, Mimi Zohar
  Cc: igor.stoppa, Nadav Amit, Kees Cook, linux-integrity,
	kernel-hardening, linux-mm, linux-kernel

Set of test cases meant to confirm that the write rare functionality
works as expected.
It can be optionally compiled as module.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>

CC: Andy Lutomirski <luto@amacapital.net>
CC: Nadav Amit <nadav.amit@gmail.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Kees Cook <keescook@chromium.org>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: Mimi Zohar <zohar@linux.vnet.ibm.com>
CC: linux-integrity@vger.kernel.org
CC: kernel-hardening@lists.openwall.com
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 mm/Kconfig.debug     |   8 +++
 mm/Makefile          |   1 +
 mm/test_write_rare.c | 135 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 144 insertions(+)
 create mode 100644 mm/test_write_rare.c

diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index b10305cfac3c..ae018e56c4e4 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -102,3 +102,11 @@ config DEBUG_PRMEM
     help
       After any write rare operation, compares the data written with the
       value provided by the caller.
+
+config DEBUG_PRMEM_TEST
+    tristate "Run self test for statically allocated protected memory"
+    depends on PRMEM
+    default n
+    help
+      Tries to verify that the protection for statically allocated memory
+      works correctly and that the memory is effectively protected.
diff --git a/mm/Makefile b/mm/Makefile
index d210cc9d6f80..62d719c0ee1e 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -58,6 +58,7 @@ obj-$(CONFIG_SPARSEMEM)	+= sparse.o
 obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
+obj-$(CONFIG_DEBUG_PRMEM_TEST) += test_write_rare.o
 obj-$(CONFIG_KSM) += ksm.o
 obj-$(CONFIG_PAGE_POISONING) += page_poison.o
 obj-$(CONFIG_SLAB) += slab.o
diff --git a/mm/test_write_rare.c b/mm/test_write_rare.c
new file mode 100644
index 000000000000..30574bc34a20
--- /dev/null
+++ b/mm/test_write_rare.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * test_write_rare.c
+ *
+ * (C) Copyright 2018 Huawei Technologies Co. Ltd.
+ * Author: Igor Stoppa <igor.stoppa@huawei.com>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/bug.h>
+#include <linux/prmem.h>
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+extern long __start_wr_after_init;
+extern long __end_wr_after_init;
+
+static __wr_after_init int scalar = '0';
+static __wr_after_init u8 array[PAGE_SIZE * 3] __aligned(PAGE_SIZE);
+
+/* The section must occupy a non-zero number of whole pages */
+static bool test_alignment(void)
+{
+	unsigned long pstart = (unsigned long)&__start_wr_after_init;
+	unsigned long pend = (unsigned long)&__end_wr_after_init;
+
+	if (WARN((pstart & ~PAGE_MASK) || (pend & ~PAGE_MASK) ||
+		 (pstart >= pend), "Boundaries test failed."))
+		return false;
+	pr_info("Boundaries test passed.");
+	return true;
+}
+
+static bool test_pattern(void)
+{
+	return (memtst(array, '0', PAGE_SIZE / 2) ||
+		memtst(array + PAGE_SIZE / 2, '1', PAGE_SIZE * 3 / 4) ||
+		memtst(array + PAGE_SIZE * 5 / 4, '0', PAGE_SIZE / 2) ||
+		memtst(array + PAGE_SIZE * 7 / 4, '1', PAGE_SIZE * 3 / 4) ||
+		memtst(array + PAGE_SIZE * 5 / 2, '0', PAGE_SIZE / 2));
+}
+
+static bool test_wr_memset(void)
+{
+	int new_val = '1';
+
+	wr_memset(&scalar, new_val, sizeof(scalar));
+	if (WARN(memtst(&scalar, new_val, sizeof(scalar)),
+		 "Scalar write rare memset test failed."))
+		return false;
+
+	pr_info("Scalar write rare memset test passed.");
+
+	wr_memset(array, '0', PAGE_SIZE * 3);
+	if (WARN(memtst(array, '0', PAGE_SIZE * 3),
+		 "Array write rare memset test failed."))
+		return false;
+
+	wr_memset(array + PAGE_SIZE / 2, '1', PAGE_SIZE * 2);
+	if (WARN(memtst(array + PAGE_SIZE / 2, '1', PAGE_SIZE * 2),
+		 "Array write rare memset test failed."))
+		return false;
+
+	wr_memset(array + PAGE_SIZE * 5 / 4, '0', PAGE_SIZE / 2);
+	if (WARN(memtst(array + PAGE_SIZE * 5 / 4, '0', PAGE_SIZE / 2),
+		 "Array write rare memset test failed."))
+		return false;
+
+	if (WARN(test_pattern(), "Array write rare memset test failed."))
+		return false;
+
+	pr_info("Array write rare memset test passed.");
+	return true;
+}
+
+static u8 array_1[PAGE_SIZE * 2];
+static u8 array_2[PAGE_SIZE * 2];
+
+static bool test_wr_memcpy(void)
+{
+	int new_val = 0x12345678;
+
+	wr_assign(scalar, new_val);
+	if (WARN(memcmp(&scalar, &new_val, sizeof(scalar)),
+		 "Scalar write rare memcpy test failed."))
+		return false;
+	pr_info("Scalar write rare memcpy test passed.");
+
+	wr_memset(array, '0', PAGE_SIZE * 3);
+	memset(array_1, '1', PAGE_SIZE * 2);
+	memset(array_2, '0', PAGE_SIZE * 2);
+	wr_memcpy(array + PAGE_SIZE / 2, array_1, PAGE_SIZE * 2);
+	wr_memcpy(array + PAGE_SIZE * 5 / 4, array_2, PAGE_SIZE / 2);
+
+	if (WARN(test_pattern(), "Array write rare memcpy test failed."))
+		return false;
+
+	pr_info("Array write rare memcpy test passed.");
+	return true;
+}
+
+static __wr_after_init int *dst;
+static int reference = 0x54;
+
+static bool test_wr_rcu_assign_pointer(void)
+{
+	wr_rcu_assign_pointer(dst, &reference);
+	return dst == &reference;
+}
+
+static int __init test_static_wr_init_module(void)
+{
+	pr_info("static write_rare test");
+	if (WARN(!(test_alignment() &&
+		   test_wr_memset() &&
+		   test_wr_memcpy() &&
+		   test_wr_rcu_assign_pointer()),
+		 "static rare-write test failed"))
+		return -EFAULT;
+	pr_info("static write_rare test passed");
+	return 0;
+}
+
+module_init(test_static_wr_init_module);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Igor Stoppa <igor.stoppa@huawei.com>");
+MODULE_DESCRIPTION("Test module for static write rare.");
-- 
2.19.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, back to index

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20181221181423.20455-1-igor.stoppa@huawei.com>
2018-12-21 18:14 ` [PATCH 01/12] x86_64: memset_user() Igor Stoppa
2018-12-21 18:25   ` Matthew Wilcox
2018-12-21 18:46     ` Igor Stoppa
2018-12-21 20:05     ` Cyrill Gorcunov
2018-12-21 20:29       ` Matthew Wilcox
2018-12-21 20:46         ` Cyrill Gorcunov
2018-12-21 21:07           ` Matthew Wilcox
2018-12-21 21:17             ` Cyrill Gorcunov
2018-12-21 18:14 ` [PATCH 02/12] __wr_after_init: linker section and label Igor Stoppa
2018-12-21 18:14 ` [PATCH 03/12] __wr_after_init: generic functionality Igor Stoppa
2018-12-21 18:41   ` Matthew Wilcox
2018-12-21 19:07     ` Igor Stoppa
2018-12-21 19:43       ` Matthew Wilcox
2018-12-21 21:54         ` Igor Stoppa
2018-12-21 18:14 ` [PATCH 04/12] __wr_after_init: debug writes Igor Stoppa
2018-12-21 18:14 ` [PATCH 05/12] __wr_after_init: x86_64: __wr_op Igor Stoppa
2018-12-21 18:14 ` [PATCH 06/12] __wr_after_init: Documentation: self-protection Igor Stoppa
2018-12-21 18:14 ` [PATCH 07/12] __wr_after_init: lkdtm test Igor Stoppa
2018-12-21 18:14 ` [PATCH 08/12] rodata_test: refactor tests Igor Stoppa
2018-12-21 18:14 ` [PATCH 09/12] rodata_test: add verification for __wr_after_init Igor Stoppa
2018-12-21 18:14 ` [PATCH 10/12] __wr_after_init: test write rare functionality Igor Stoppa
2018-12-21 18:14 ` [PATCH 11/12] IMA: turn ima_policy_flags into __wr_after_init Igor Stoppa
2018-12-21 18:14 ` [PATCH 12/12] x86_64: __clear_user as case of __memset_user Igor Stoppa
2018-12-19 21:33 [RFC v2 PATCH 0/12] hardening: statically allocated protected memory Igor Stoppa
2018-12-19 21:33 ` [PATCH 10/12] __wr_after_init: test write rare functionality Igor Stoppa

Linux-Integrity Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-integrity/0 linux-integrity/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-integrity linux-integrity/ https://lore.kernel.org/linux-integrity \
		linux-integrity@vger.kernel.org linux-integrity@archiver.kernel.org
	public-inbox-index linux-integrity


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-integrity


AGPL code for this site: git clone https://public-inbox.org/ public-inbox