linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 02/17] prmem: write rare for static allocation
       [not found] <20181023213504.28905-1-igor.stoppa@huawei.com>
@ 2018-10-23 21:34 ` Igor Stoppa
  2018-10-25  0:24   ` Dave Hansen
  2018-10-26  9:41   ` Peter Zijlstra
  2018-10-23 21:34 ` [PATCH 03/17] prmem: vmalloc support for dynamic allocation Igor Stoppa
                   ` (6 subsequent siblings)
  7 siblings, 2 replies; 26+ messages in thread
From: Igor Stoppa @ 2018-10-23 21:34 UTC (permalink / raw)
  To: Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

Implementation of write rare for statically allocated data, located in a
specific memory section through the use of the __write_rare label.

The basic functions are wr_memcpy() and wr_memset(): the write rare
counterparts of memcpy() and memset() respectively.

To minimize chances of attacks, this implementation does not unprotect
existing memory pages.
Instead, it remaps them, one by one, at random free locations, as writable.
Each page is mapped as writable strictly for the time needed to perform
changes in said page.
While a page is remapped, interrupts are disabled on the core performing
the write rare operation, to avoid being frozen mid-air by an attack
using interrupts for stretching the duration of the alternate mapping.
OTOH, to avoid introducing unpredictable delays, the interrupts are
re-enabled inbetween page remapping, when write operations are either
completed or not yet started, and there is not alternate, writable
mapping to exploit.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>
CC: Michal Hocko <mhocko@kernel.org>
CC: Vlastimil Babka <vbabka@suse.cz>
CC: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Tatashin <pasha.tatashin@oracle.com>
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 MAINTAINERS           |   7 ++
 include/linux/prmem.h | 213 ++++++++++++++++++++++++++++++++++++++++++
 mm/Makefile           |   1 +
 mm/prmem.c            |  10 ++
 4 files changed, 231 insertions(+)
 create mode 100644 include/linux/prmem.h
 create mode 100644 mm/prmem.c

diff --git a/MAINTAINERS b/MAINTAINERS
index b2f710eee67a..e566c5d09faf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9454,6 +9454,13 @@ F:	kernel/sched/membarrier.c
 F:	include/uapi/linux/membarrier.h
 F:	arch/powerpc/include/asm/membarrier.h
 
+MEMORY HARDENING
+M:	Igor Stoppa <igor.stoppa@gmail.com>
+L:	kernel-hardening@lists.openwall.com
+S:	Maintained
+F:	include/linux/prmem.h
+F:	mm/prmem.c
+
 MEMORY MANAGEMENT
 L:	linux-mm@kvack.org
 W:	http://www.linux-mm.org
diff --git a/include/linux/prmem.h b/include/linux/prmem.h
new file mode 100644
index 000000000000..3ba41d76a582
--- /dev/null
+++ b/include/linux/prmem.h
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * prmem.h: Header for memory protection library
+ *
+ * (C) Copyright 2018 Huawei Technologies Co. Ltd.
+ * Author: Igor Stoppa <igor.stoppa@huawei.com>
+ *
+ * Support for:
+ * - statically allocated write rare data
+ */
+
+#ifndef _LINUX_PRMEM_H
+#define _LINUX_PRMEM_H
+
+#include <linux/set_memory.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/set_memory.h>
+
+/* ============================ Write Rare ============================ */
+
+extern const char WR_ERR_RANGE_MSG[];
+extern const char WR_ERR_PAGE_MSG[];
+
+/*
+ * The following two variables are statically allocated by the linker
+ * script at the the boundaries of the memory region (rounded up to
+ * multiples of PAGE_SIZE) reserved for __wr_after_init.
+ */
+extern long __start_wr_after_init;
+extern long __end_wr_after_init;
+
+static __always_inline bool __is_wr_after_init(const void *ptr, size_t size)
+{
+	size_t start = (size_t)&__start_wr_after_init;
+	size_t end = (size_t)&__end_wr_after_init;
+	size_t low = (size_t)ptr;
+	size_t high = (size_t)ptr + size;
+
+	return likely(start <= low && low < high && high <= end);
+}
+
+/**
+ * wr_memset() - sets n bytes of the destination to the c value
+ * @dst: beginning of the memory to write to
+ * @c: byte to replicate
+ * @size: amount of bytes to copy
+ *
+ * Returns true on success, false otherwise.
+ */
+static __always_inline
+bool wr_memset(const void *dst, const int c, size_t n_bytes)
+{
+	size_t size;
+	unsigned long flags;
+	uintptr_t d = (uintptr_t)dst;
+
+	if (WARN(!__is_wr_after_init(dst, n_bytes), WR_ERR_RANGE_MSG))
+		return false;
+	while (n_bytes) {
+		struct page *page;
+		uintptr_t base;
+		uintptr_t offset;
+		uintptr_t offset_complement;
+
+		local_irq_save(flags);
+		page = virt_to_page(d);
+		offset = d & ~PAGE_MASK;
+		offset_complement = PAGE_SIZE - offset;
+		size = min(n_bytes, offset_complement);
+		base = (uintptr_t)vmap(&page, 1, VM_MAP, PAGE_KERNEL);
+		if (WARN(!base, WR_ERR_PAGE_MSG)) {
+			local_irq_restore(flags);
+			return false;
+		}
+		memset((void *)(base + offset), c, size);
+		vunmap((void *)base);
+		d += size;
+		n_bytes -= size;
+		local_irq_restore(flags);
+	}
+	return true;
+}
+
+/**
+ * wr_memcpy() - copyes n bytes from source to destination
+ * @dst: beginning of the memory to write to
+ * @src: beginning of the memory to read from
+ * @n_bytes: amount of bytes to copy
+ *
+ * Returns true on success, false otherwise.
+ */
+static __always_inline
+bool wr_memcpy(const void *dst, const void *src, size_t n_bytes)
+{
+	size_t size;
+	unsigned long flags;
+	uintptr_t d = (uintptr_t)dst;
+	uintptr_t s = (uintptr_t)src;
+
+	if (WARN(!__is_wr_after_init(dst, n_bytes), WR_ERR_RANGE_MSG))
+		return false;
+	while (n_bytes) {
+		struct page *page;
+		uintptr_t base;
+		uintptr_t offset;
+		uintptr_t offset_complement;
+
+		local_irq_save(flags);
+		page = virt_to_page(d);
+		offset = d & ~PAGE_MASK;
+		offset_complement = PAGE_SIZE - offset;
+		size = (size_t)min(n_bytes, offset_complement);
+		base = (uintptr_t)vmap(&page, 1, VM_MAP, PAGE_KERNEL);
+		if (WARN(!base, WR_ERR_PAGE_MSG)) {
+			local_irq_restore(flags);
+			return false;
+		}
+		__write_once_size((void *)(base + offset), (void *)s, size);
+		vunmap((void *)base);
+		d += size;
+		s += size;
+		n_bytes -= size;
+		local_irq_restore(flags);
+	}
+	return true;
+}
+
+/*
+ * rcu_assign_pointer is a macro, which takes advantage of being able to
+ * take the address of the destination parameter "p", so that it can be
+ * passed to WRITE_ONCE(), which is called in one of the branches of
+ * rcu_assign_pointer() and also, being a macro, can rely on the
+ * preprocessor for taking the address of its parameter.
+ * For the sake of staying compatible with the API, also
+ * wr_rcu_assign_pointer() is a macro that accepts a pointer as parameter,
+ * instead of the address of said pointer.
+ * However it is simply a wrapper to __wr_rcu_ptr(), which receives the
+ * address of the pointer.
+ */
+static __always_inline
+uintptr_t __wr_rcu_ptr(const void *dst_p_p, const void *src_p)
+{
+	unsigned long flags;
+	struct page *page;
+	void *base;
+	uintptr_t offset;
+	const size_t size = sizeof(void *);
+
+	if (WARN(!__is_wr_after_init(dst_p_p, size), WR_ERR_RANGE_MSG))
+		return (uintptr_t)NULL;
+	local_irq_save(flags);
+	page = virt_to_page(dst_p_p);
+	offset = (uintptr_t)dst_p_p & ~PAGE_MASK;
+	base = vmap(&page, 1, VM_MAP, PAGE_KERNEL);
+	if (WARN(!base, WR_ERR_PAGE_MSG)) {
+		local_irq_restore(flags);
+		return (uintptr_t)NULL;
+	}
+	rcu_assign_pointer((*(void **)(offset + (uintptr_t)base)), src_p);
+	vunmap(base);
+	local_irq_restore(flags);
+	return (uintptr_t)src_p;
+}
+
+#define wr_rcu_assign_pointer(p, v)	__wr_rcu_ptr(&p, v)
+
+#define __wr_simple(dst_ptr, src_ptr)					\
+	wr_memcpy(dst_ptr, src_ptr, sizeof(*(src_ptr)))
+
+#define __wr_safe(dst_ptr, src_ptr,					\
+		  unique_dst_ptr, unique_src_ptr)			\
+({									\
+	typeof(dst_ptr) unique_dst_ptr = (dst_ptr);			\
+	typeof(src_ptr) unique_src_ptr = (src_ptr);			\
+									\
+	wr_memcpy(unique_dst_ptr, unique_src_ptr,			\
+		  sizeof(*(unique_src_ptr)));				\
+})
+
+#define __safe_ops(dst, src)	\
+	(__typecheck(dst, src) && __no_side_effects(dst, src))
+
+/**
+ * wr - copies an object over another of same type and size
+ * @dst_ptr: address of the destination object
+ * @src_ptr: address of the source object
+ */
+#define wr(dst_ptr, src_ptr)						\
+	__builtin_choose_expr(__safe_ops(dst_ptr, src_ptr),		\
+			      __wr_simple(dst_ptr, src_ptr),		\
+			      __wr_safe(dst_ptr, src_ptr,		\
+						__UNIQUE_ID(__dst_ptr),	\
+						__UNIQUE_ID(__src_ptr)))
+
+/**
+ * wr_ptr() - alters a pointer in write rare memory
+ * @dst: target for write
+ * @val: new value
+ *
+ * Returns true on success, false otherwise.
+ */
+static __always_inline
+bool wr_ptr(const void *dst, const void *val)
+{
+	return wr_memcpy(dst, &val, sizeof(val));
+}
+#endif
diff --git a/mm/Makefile b/mm/Makefile
index 26ef77a3883b..215c6a6d7304 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -64,6 +64,7 @@ obj-$(CONFIG_SPARSEMEM)	+= sparse.o
 obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
+obj-$(CONFIG_PRMEM) += prmem.o
 obj-$(CONFIG_KSM) += ksm.o
 obj-$(CONFIG_PAGE_POISONING) += page_poison.o
 obj-$(CONFIG_SLAB) += slab.o
diff --git a/mm/prmem.c b/mm/prmem.c
new file mode 100644
index 000000000000..de9258f5f29a
--- /dev/null
+++ b/mm/prmem.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * prmem.c: Memory Protection Library
+ *
+ * (C) Copyright 2017-2018 Huawei Technologies Co. Ltd.
+ * Author: Igor Stoppa <igor.stoppa@huawei.com>
+ */
+
+const char WR_ERR_RANGE_MSG[] = "Write rare on invalid memory range.";
+const char WR_ERR_PAGE_MSG[] = "Failed to remap write rare page.";
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 03/17] prmem: vmalloc support for dynamic allocation
       [not found] <20181023213504.28905-1-igor.stoppa@huawei.com>
  2018-10-23 21:34 ` [PATCH 02/17] prmem: write rare for static allocation Igor Stoppa
@ 2018-10-23 21:34 ` Igor Stoppa
  2018-10-25  0:26   ` Dave Hansen
  2018-10-23 21:34 ` [PATCH 04/17] prmem: " Igor Stoppa
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 26+ messages in thread
From: Igor Stoppa @ 2018-10-23 21:34 UTC (permalink / raw)
  To: Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Andrew Morton, Chintan Pandya, Joe Perches, Luis R. Rodriguez,
	Thomas Gleixner, Kate Stewart, Greg Kroah-Hartman,
	Philippe Ombredanne, linux-mm, linux-kernel

Prepare vmalloc for:
- tagging areas used for dynamic allocation of protected memory
- supporting various tags, related to the property that an area might have
- extrapolating the pool containing a given area
- chaining the areas in each pool
- extrapolating the area containing a given memory address

NOTE:
Since there is a list_head structure that is used only when disposing of
the allocation (the field purge_list), there are two pointers for the take,
before it comes the time of freeing the allocation.
To avoid increasing the size of the vmap_area structure, instead of
using a standard doubly linked list for tracking the chain of
vmap_areas, only one pointer is spent for this purpose, in a single
linked list, while the other is used to provide a direct connection to the
parent pool.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>
CC: Michal Hocko <mhocko@kernel.org>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Chintan Pandya <cpandya@codeaurora.org>
CC: Joe Perches <joe@perches.com>
CC: "Luis R. Rodriguez" <mcgrof@kernel.org>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Kate Stewart <kstewart@linuxfoundation.org>
CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
CC: Philippe Ombredanne <pombredanne@nexb.com>
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 include/linux/vmalloc.h | 12 +++++++++++-
 mm/vmalloc.c            |  2 +-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 398e9c95cd61..4d14a3b8089e 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -21,6 +21,9 @@ struct notifier_block;		/* in notifier.h */
 #define VM_UNINITIALIZED	0x00000020	/* vm_struct is not fully initialized */
 #define VM_NO_GUARD		0x00000040      /* don't add guard page */
 #define VM_KASAN		0x00000080      /* has allocated kasan shadow memory */
+#define VM_PMALLOC		0x00000100	/* pmalloc area - see docs */
+#define VM_PMALLOC_WR		0x00000200	/* pmalloc write rare area */
+#define VM_PMALLOC_PROTECTED	0x00000400	/* pmalloc protected area */
 /* bits [20..32] reserved for arch specific ioremap internals */
 
 /*
@@ -48,7 +51,13 @@ struct vmap_area {
 	unsigned long flags;
 	struct rb_node rb_node;         /* address sorted rbtree */
 	struct list_head list;          /* address sorted list */
-	struct llist_node purge_list;    /* "lazy purge" list */
+	union {
+		struct llist_node purge_list;    /* "lazy purge" list */
+		struct {
+			struct vmap_area *next;
+			struct pmalloc_pool *pool;
+		};
+	};
 	struct vm_struct *vm;
 	struct rcu_head rcu_head;
 };
@@ -134,6 +143,7 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
 					const void *caller);
 extern struct vm_struct *remove_vm_area(const void *addr);
 extern struct vm_struct *find_vm_area(const void *addr);
+extern struct vmap_area *find_vmap_area(unsigned long addr);
 
 extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
 			struct page **pages);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index a728fc492557..15850005fea5 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -742,7 +742,7 @@ static void free_unmap_vmap_area(struct vmap_area *va)
 	free_vmap_area_noflush(va);
 }
 
-static struct vmap_area *find_vmap_area(unsigned long addr)
+struct vmap_area *find_vmap_area(unsigned long addr)
 {
 	struct vmap_area *va;
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 04/17] prmem: dynamic allocation
       [not found] <20181023213504.28905-1-igor.stoppa@huawei.com>
  2018-10-23 21:34 ` [PATCH 02/17] prmem: write rare for static allocation Igor Stoppa
  2018-10-23 21:34 ` [PATCH 03/17] prmem: vmalloc support for dynamic allocation Igor Stoppa
@ 2018-10-23 21:34 ` Igor Stoppa
  2018-10-23 21:34 ` [PATCH 05/17] prmem: shorthands for write rare on common types Igor Stoppa
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 26+ messages in thread
From: Igor Stoppa @ 2018-10-23 21:34 UTC (permalink / raw)
  To: Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

Extension of protected memory to dynamic allocations.

Allocations are performed from "pools".
A pool is a list of virtual memory areas, in various state of
protection.

Supported cases
===============

Read Only Pool
--------------
Memory is allocated from the pool, in writable state.
Then it gets written and the content of the pool is write protected and
it cannot be altered anymore. It is only possible to destroy the pool.

Auto Read Only Pool
-------------------
Same as the plain read only, but every time a memory area is full and
phased out, it is automatically marked as read only.

Write Rare Pool
---------------
Memory is allocated from the pool, in writable state.
Then it gets written and the content of the pool is write protected and
it can be altered only by invoking special write rare functions.

Auto Write Rare Pool
--------------------
Same as the plain write rare, but every time a memory area is full and
phased out, it is automatically marked as write rare.

Start Write Rare Pool
---------------------
The memory handed out is already in write rare mode and the only way to
alter it is to use write rare functions.

When a pool is destroyed, all the memory that was obtained from it is
automatically freed. This is the only way to release protected memory.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>
CC: Michal Hocko <mhocko@kernel.org>
CC: Vlastimil Babka <vbabka@suse.cz>
CC: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Tatashin <pasha.tatashin@oracle.com>
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 include/linux/prmem.h | 220 +++++++++++++++++++++++++++++++++--
 mm/Kconfig            |   6 +
 mm/prmem.c            | 263 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 482 insertions(+), 7 deletions(-)

diff --git a/include/linux/prmem.h b/include/linux/prmem.h
index 3ba41d76a582..26fd48410d97 100644
--- a/include/linux/prmem.h
+++ b/include/linux/prmem.h
@@ -7,6 +7,8 @@
  *
  * Support for:
  * - statically allocated write rare data
+ * - dynamically allocated read only data
+ * - dynamically allocated write rare data
  */
 
 #ifndef _LINUX_PRMEM_H
@@ -22,6 +24,11 @@
 #include <linux/irqflags.h>
 #include <linux/set_memory.h>
 
+#define VM_PMALLOC_MASK \
+		(VM_PMALLOC | VM_PMALLOC_WR | VM_PMALLOC_PROTECTED)
+#define VM_PMALLOC_WR_MASK		(VM_PMALLOC | VM_PMALLOC_WR)
+#define VM_PMALLOC_PROTECTED_MASK	(VM_PMALLOC | VM_PMALLOC_PROTECTED)
+
 /* ============================ Write Rare ============================ */
 
 extern const char WR_ERR_RANGE_MSG[];
@@ -45,11 +52,23 @@ static __always_inline bool __is_wr_after_init(const void *ptr, size_t size)
 	return likely(start <= low && low < high && high <= end);
 }
 
+static __always_inline bool __is_wr_pool(const void *ptr, size_t size)
+{
+	struct vmap_area *area;
+
+	if (!is_vmalloc_addr(ptr))
+		return false;
+	area = find_vmap_area((unsigned long)ptr);
+	return area && area->vm && (area->vm->size >= size) &&
+		((area->vm->flags & (VM_PMALLOC | VM_PMALLOC_WR)) ==
+		 (VM_PMALLOC | VM_PMALLOC_WR));
+}
+
 /**
  * wr_memset() - sets n bytes of the destination to the c value
  * @dst: beginning of the memory to write to
  * @c: byte to replicate
- * @size: amount of bytes to copy
+ * @n_bytes: amount of bytes to copy
  *
  * Returns true on success, false otherwise.
  */
@@ -59,8 +78,10 @@ bool wr_memset(const void *dst, const int c, size_t n_bytes)
 	size_t size;
 	unsigned long flags;
 	uintptr_t d = (uintptr_t)dst;
+	bool is_virt = __is_wr_after_init(dst, n_bytes);
 
-	if (WARN(!__is_wr_after_init(dst, n_bytes), WR_ERR_RANGE_MSG))
+	if (WARN(!(is_virt || likely(__is_wr_pool(dst, n_bytes))),
+		 WR_ERR_RANGE_MSG))
 		return false;
 	while (n_bytes) {
 		struct page *page;
@@ -69,7 +90,10 @@ bool wr_memset(const void *dst, const int c, size_t n_bytes)
 		uintptr_t offset_complement;
 
 		local_irq_save(flags);
-		page = virt_to_page(d);
+		if (is_virt)
+			page = virt_to_page(d);
+		else
+			page = vmalloc_to_page((void *)d);
 		offset = d & ~PAGE_MASK;
 		offset_complement = PAGE_SIZE - offset;
 		size = min(n_bytes, offset_complement);
@@ -102,8 +126,10 @@ bool wr_memcpy(const void *dst, const void *src, size_t n_bytes)
 	unsigned long flags;
 	uintptr_t d = (uintptr_t)dst;
 	uintptr_t s = (uintptr_t)src;
+	bool is_virt = __is_wr_after_init(dst, n_bytes);
 
-	if (WARN(!__is_wr_after_init(dst, n_bytes), WR_ERR_RANGE_MSG))
+	if (WARN(!(is_virt || likely(__is_wr_pool(dst, n_bytes))),
+		 WR_ERR_RANGE_MSG))
 		return false;
 	while (n_bytes) {
 		struct page *page;
@@ -112,7 +138,10 @@ bool wr_memcpy(const void *dst, const void *src, size_t n_bytes)
 		uintptr_t offset_complement;
 
 		local_irq_save(flags);
-		page = virt_to_page(d);
+		if (is_virt)
+			page = virt_to_page(d);
+		else
+			page = vmalloc_to_page((void *)d);
 		offset = d & ~PAGE_MASK;
 		offset_complement = PAGE_SIZE - offset;
 		size = (size_t)min(n_bytes, offset_complement);
@@ -151,11 +180,13 @@ uintptr_t __wr_rcu_ptr(const void *dst_p_p, const void *src_p)
 	void *base;
 	uintptr_t offset;
 	const size_t size = sizeof(void *);
+	bool is_virt = __is_wr_after_init(dst_p_p, size);
 
-	if (WARN(!__is_wr_after_init(dst_p_p, size), WR_ERR_RANGE_MSG))
+	if (WARN(!(is_virt || likely(__is_wr_pool(dst_p_p, size))),
+		 WR_ERR_RANGE_MSG))
 		return (uintptr_t)NULL;
 	local_irq_save(flags);
-	page = virt_to_page(dst_p_p);
+	page = is_virt ? virt_to_page(dst_p_p) : vmalloc_to_page(dst_p_p);
 	offset = (uintptr_t)dst_p_p & ~PAGE_MASK;
 	base = vmap(&page, 1, VM_MAP, PAGE_KERNEL);
 	if (WARN(!base, WR_ERR_PAGE_MSG)) {
@@ -210,4 +241,179 @@ bool wr_ptr(const void *dst, const void *val)
 {
 	return wr_memcpy(dst, &val, sizeof(val));
 }
+
+/* ============================ Allocator ============================ */
+
+#define PMALLOC_REFILL_DEFAULT (0)
+#define PMALLOC_DEFAULT_REFILL_SIZE PAGE_SIZE
+#define PMALLOC_ALIGN_ORDER_DEFAULT ilog2(ARCH_KMALLOC_MINALIGN)
+
+#define PMALLOC_RO		0x00
+#define PMALLOC_WR		0x01
+#define PMALLOC_AUTO		0x02
+#define PMALLOC_START		0x04
+#define PMALLOC_MASK		(PMALLOC_WR | PMALLOC_AUTO | PMALLOC_START)
+
+#define PMALLOC_MODE_RO		PMALLOC_RO
+#define PMALLOC_MODE_WR		PMALLOC_WR
+#define PMALLOC_MODE_AUTO_RO	(PMALLOC_RO | PMALLOC_AUTO)
+#define PMALLOC_MODE_AUTO_WR	(PMALLOC_WR | PMALLOC_AUTO)
+#define PMALLOC_MODE_START_WR	(PMALLOC_WR | PMALLOC_START)
+
+struct pmalloc_pool {
+	struct mutex mutex;
+	struct list_head pool_node;
+	struct vmap_area *area;
+	size_t align;
+	size_t refill;
+	size_t offset;
+	uint8_t mode;
+};
+
+/*
+ * The write rare functionality is fully implemented as __always_inline,
+ * to prevent having an internal function call that is capable of modifying
+ * write protected memory.
+ * Fully inlining the function allows the compiler to optimize away its
+ * interface, making it harder for an attacker to hijack it.
+ * This still leaves the door open to attacks that might try to reuse part
+ * of the code, by jumping in the middle of the function, however it can
+ * be mitigated by having a compiler plugin that enforces Control Flow
+ * Integrity (CFI).
+ * Any addition/modification to the write rare path must follow the same
+ * approach.
+ */
+
+void pmalloc_init_custom_pool(struct pmalloc_pool *pool, size_t refill,
+			      short align_order, uint8_t mode);
+
+struct pmalloc_pool *pmalloc_create_custom_pool(size_t refill,
+						short align_order,
+						uint8_t mode);
+
+/**
+ * pmalloc_create_pool() - create a protectable memory pool
+ * @mode: can the data be altered after protection
+ *
+ * Shorthand for pmalloc_create_custom_pool() with default argument:
+ * * refill is set to PMALLOC_REFILL_DEFAULT
+ * * align_order is set to PMALLOC_ALIGN_ORDER_DEFAULT
+ *
+ * Returns:
+ * * pointer to the new pool	- success
+ * * NULL			- error
+ */
+static inline struct pmalloc_pool *pmalloc_create_pool(uint8_t mode)
+{
+	return pmalloc_create_custom_pool(PMALLOC_REFILL_DEFAULT,
+					  PMALLOC_ALIGN_ORDER_DEFAULT,
+					  mode);
+}
+
+void *pmalloc(struct pmalloc_pool *pool, size_t size);
+
+/**
+ * pzalloc() - zero-initialized version of pmalloc()
+ * @pool: handle to the pool to be used for memory allocation
+ * @size: amount of memory (in bytes) requested
+ *
+ * Executes pmalloc(), initializing the memory requested to 0, before
+ * returning its address.
+ *
+ * Return:
+ * * pointer to the memory requested	- success
+ * * NULL				- error
+ */
+static inline void *pzalloc(struct pmalloc_pool *pool, size_t size)
+{
+	void *ptr = pmalloc(pool, size);
+
+	if (unlikely(!ptr))
+		return ptr;
+	if ((pool->mode & PMALLOC_MODE_START_WR) == PMALLOC_MODE_START_WR)
+		wr_memset(ptr, 0, size);
+	else
+		memset(ptr, 0, size);
+	return ptr;
+}
+
+/**
+ * pmalloc_array() - array version of pmalloc()
+ * @pool: handle to the pool to be used for memory allocation
+ * @n: number of elements in the array
+ * @size: amount of memory (in bytes) requested for each element
+ *
+ * Executes pmalloc(), on an array.
+ *
+ * Return:
+ * * the pmalloc result	- success
+ * * NULL		- error
+ */
+
+static inline
+void *pmalloc_array(struct pmalloc_pool *pool, size_t n, size_t size)
+{
+	size_t total_size = n * size;
+
+	if (unlikely(!(n && (total_size / n == size))))
+		return NULL;
+	return pmalloc(pool, n * size);
+}
+
+/**
+ * pcalloc() - array version of pzalloc()
+ * @pool: handle to the pool to be used for memory allocation
+ * @n: number of elements in the array
+ * @size: amount of memory (in bytes) requested for each element
+ *
+ * Executes pzalloc(), on an array.
+ *
+ * Return:
+ * * the pmalloc result	- success
+ * * NULL		- error
+ */
+static inline
+void *pcalloc(struct pmalloc_pool *pool, size_t n, size_t size)
+{
+	size_t total_size = n * size;
+
+	if (unlikely(!(n && (total_size / n == size))))
+		return NULL;
+	return pzalloc(pool, n * size);
+}
+
+/**
+ * pstrdup() - duplicate a string, using pmalloc()
+ * @pool: handle to the pool to be used for memory allocation
+ * @s: string to duplicate
+ *
+ * Generates a copy of the given string, allocating sufficient memory
+ * from the given pmalloc pool.
+ *
+ * Return:
+ * * pointer to the replica	- success
+ * * NULL			- error
+ */
+static inline char *pstrdup(struct pmalloc_pool *pool, const char *s)
+{
+	size_t len;
+	char *buf;
+
+	len = strlen(s) + 1;
+	buf = pmalloc(pool, len);
+	if (unlikely(!buf))
+		return buf;
+	if ((pool->mode & PMALLOC_MODE_START_WR) == PMALLOC_MODE_START_WR)
+		wr_memcpy(buf, s, len);
+	else
+		strncpy(buf, s, len);
+	return buf;
+}
+
+
+void pmalloc_protect_pool(struct pmalloc_pool *pool);
+
+void pmalloc_make_pool_ro(struct pmalloc_pool *pool);
+
+void pmalloc_destroy_pool(struct pmalloc_pool *pool);
 #endif
diff --git a/mm/Kconfig b/mm/Kconfig
index de64ea658716..1885f5565cbc 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -764,4 +764,10 @@ config GUP_BENCHMARK
 config ARCH_HAS_PTE_SPECIAL
 	bool
 
+config PRMEM
+    bool
+    depends on MMU
+    depends on ARCH_HAS_SET_MEMORY
+    default y
+
 endmenu
diff --git a/mm/prmem.c b/mm/prmem.c
index de9258f5f29a..7dd13ea43304 100644
--- a/mm/prmem.c
+++ b/mm/prmem.c
@@ -6,5 +6,268 @@
  * Author: Igor Stoppa <igor.stoppa@huawei.com>
  */
 
+#include <linux/printk.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/slab.h>
+#include <linux/set_memory.h>
+#include <linux/bug.h>
+#include <linux/mutex.h>
+#include <linux/llist.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+#include <linux/prmem.h>
+
 const char WR_ERR_RANGE_MSG[] = "Write rare on invalid memory range.";
 const char WR_ERR_PAGE_MSG[] = "Failed to remap write rare page.";
+
+static LIST_HEAD(pools_list);
+static DEFINE_MUTEX(pools_mutex);
+
+#define MAX_ALIGN_ORDER (ilog2(sizeof(void *)))
+
+
+/* Various helper functions. Inlined, to reduce the attack surface. */
+
+static __always_inline void protect_area(struct vmap_area *area)
+{
+	set_memory_ro(area->va_start, area->vm->nr_pages);
+	area->vm->flags |= VM_PMALLOC_PROTECTED_MASK;
+}
+
+static __always_inline bool empty(struct pmalloc_pool *pool)
+{
+	return unlikely(!pool->area);
+}
+
+/* Allocation from a protcted area is allowed only for a START_WR pool. */
+static __always_inline bool unwritable(struct pmalloc_pool *pool)
+{
+	return  (pool->area->vm->flags & VM_PMALLOC_PROTECTED) &&
+		!((pool->area->vm->flags & VM_PMALLOC_WR) &&
+		  (pool->mode & PMALLOC_START));
+}
+
+static __always_inline
+bool exhausted(struct pmalloc_pool *pool, size_t size)
+{
+	size_t space_before;
+	size_t space_after;
+
+	space_before = round_down(pool->offset, pool->align);
+	space_after = pool->offset - space_before;
+	return unlikely(space_after < size && space_before < size);
+}
+
+static __always_inline
+bool space_needed(struct pmalloc_pool *pool, size_t size)
+{
+	return empty(pool) || unwritable(pool) || exhausted(pool, size);
+}
+
+/**
+ * pmalloc_init_custom_pool() - initialize a protectable memory pool
+ * @pool: the pointer to the struct pmalloc_pool to initialize
+ * @refill: the minimum size to allocate when in need of more memory.
+ *          It will be rounded up to a multiple of PAGE_SIZE
+ *          The value of 0 gives the default amount of PAGE_SIZE.
+ * @align_order: log2 of the alignment to use when allocating memory
+ *               Negative values give ARCH_KMALLOC_MINALIGN
+ * @mode: is the data RO or RareWrite and should be provided already in
+ *        protected mode.
+ *        The value is one of:
+ *        PMALLOC_MODE_RO, PMALLOC_MODE_WR, PMALLOC_MODE_AUTO_RO
+ *        PMALLOC_MODE_AUTO_WR, PMALLOC_MODE_START_WR
+ *
+ * Initializes an empty memory pool, for allocation of protectable
+ * memory. Memory will be allocated upon request (through pmalloc).
+ */
+void pmalloc_init_custom_pool(struct pmalloc_pool *pool, size_t refill,
+			      short align_order, uint8_t mode)
+{
+	mutex_init(&pool->mutex);
+	pool->area = NULL;
+	if (align_order < 0)
+		pool->align = ARCH_KMALLOC_MINALIGN;
+	else
+		pool->align = 1UL << align_order;
+	pool->refill = refill ? PAGE_ALIGN(refill) :
+				PMALLOC_DEFAULT_REFILL_SIZE;
+	mode &= PMALLOC_MASK;
+	if (mode & PMALLOC_START)
+		mode |= PMALLOC_WR;
+	pool->mode = mode & PMALLOC_MASK;
+	pool->offset = 0;
+	mutex_lock(&pools_mutex);
+	list_add(&pool->pool_node, &pools_list);
+	mutex_unlock(&pools_mutex);
+}
+EXPORT_SYMBOL(pmalloc_init_custom_pool);
+
+/**
+ * pmalloc_create_custom_pool() - create a new protectable memory pool
+ * @refill: the minimum size to allocate when in need of more memory.
+ *          It will be rounded up to a multiple of PAGE_SIZE
+ *          The value of 0 gives the default amount of PAGE_SIZE.
+ * @align_order: log2 of the alignment to use when allocating memory
+ *               Negative values give ARCH_KMALLOC_MINALIGN
+ * @mode: can the data be altered after protection
+ *
+ * Creates a new (empty) memory pool for allocation of protectable
+ * memory. Memory will be allocated upon request (through pmalloc).
+ *
+ * Return:
+ * * pointer to the new pool	- success
+ * * NULL			- error
+ */
+struct pmalloc_pool *pmalloc_create_custom_pool(size_t refill,
+						short align_order,
+						uint8_t mode)
+{
+	struct pmalloc_pool *pool;
+
+	pool = kmalloc(sizeof(struct pmalloc_pool), GFP_KERNEL);
+	if (WARN(!pool, "Could not allocate pool meta data."))
+		return NULL;
+	pmalloc_init_custom_pool(pool, refill, align_order, mode);
+	return pool;
+}
+EXPORT_SYMBOL(pmalloc_create_custom_pool);
+
+static int grow(struct pmalloc_pool *pool, size_t min_size)
+{
+	void *addr;
+	struct vmap_area *new_area;
+	unsigned long size;
+	uint32_t tag_mask;
+
+	size = (min_size > pool->refill) ? min_size : pool->refill;
+	addr = vmalloc(size);
+	if (WARN(!addr, "Failed to allocate %zd bytes", PAGE_ALIGN(size)))
+		return -ENOMEM;
+
+	new_area = find_vmap_area((uintptr_t)addr);
+	tag_mask = VM_PMALLOC;
+	if (pool->mode & PMALLOC_WR)
+		tag_mask |= VM_PMALLOC_WR;
+	new_area->vm->flags |= (tag_mask & VM_PMALLOC_MASK);
+	new_area->pool = pool;
+	if (pool->mode & PMALLOC_START)
+		protect_area(new_area);
+	if (pool->mode & PMALLOC_AUTO && !empty(pool))
+		protect_area(pool->area);
+	/* The area size backed by pages, without the canary bird. */
+	pool->offset = new_area->vm->nr_pages * PAGE_SIZE;
+	new_area->next = pool->area;
+	pool->area = new_area;
+	return 0;
+}
+
+/**
+ * pmalloc() - allocate protectable memory from a pool
+ * @pool: handle to the pool to be used for memory allocation
+ * @size: amount of memory (in bytes) requested
+ *
+ * Allocates memory from a pool.
+ * If needed, the pool will automatically allocate enough memory to
+ * either satisfy the request or meet the "refill" parameter received
+ * upon creation.
+ * New allocation can happen also if the current memory in the pool is
+ * already write protected.
+ * Allocation happens with a mutex locked, therefore it is assumed to have
+ * exclusive write access to both the pool structure and the list of
+ * vmap_areas, while inside the lock.
+ *
+ * Return:
+ * * pointer to the memory requested	- success
+ * * NULL				- error
+ */
+void *pmalloc(struct pmalloc_pool *pool, size_t size)
+{
+	void *retval = NULL;
+
+	mutex_lock(&pool->mutex);
+	if (unlikely(space_needed(pool, size)) &&
+	    unlikely(grow(pool, size) != 0))
+		goto error;
+	pool->offset = round_down(pool->offset - size, pool->align);
+	retval = (void *)(pool->area->va_start + pool->offset);
+error:
+	mutex_unlock(&pool->mutex);
+	return retval;
+}
+EXPORT_SYMBOL(pmalloc);
+
+/**
+ * pmalloc_protect_pool() - write-protects the memory in the pool
+ * @pool: the pool associated to the memory to write-protect
+ *
+ * Write-protects all the memory areas currently assigned to the pool
+ * that are still unprotected.
+ * This does not prevent further allocation of additional memory, that
+ * can be initialized and protected.
+ * The catch is that protecting a pool will make unavailable whatever
+ * free memory it might still contain.
+ * Successive allocations will grab more free pages.
+ */
+void pmalloc_protect_pool(struct pmalloc_pool *pool)
+{
+	struct vmap_area *area;
+
+	mutex_lock(&pool->mutex);
+	for (area = pool->area; area; area = area->next)
+		protect_area(area);
+	mutex_unlock(&pool->mutex);
+}
+EXPORT_SYMBOL(pmalloc_protect_pool);
+
+
+/**
+ * pmalloc_make_pool_ro() - forces a pool to become read-only
+ * @pool: the pool associated to the memory to make ro
+ *
+ * Drops the possibility to perform controlled writes from both the pool
+ * metadata and all the vm_area structures associated to the pool.
+ * In case the pool was configured to automatically protect memory when
+ * allocating it, the configuration is dropped.
+ */
+void pmalloc_make_pool_ro(struct pmalloc_pool *pool)
+{
+	struct vmap_area *area;
+
+	mutex_lock(&pool->mutex);
+	pool->mode &= ~(PMALLOC_WR | PMALLOC_START);
+	for (area = pool->area; area; area = area->next)
+		protect_area(area);
+	mutex_unlock(&pool->mutex);
+}
+EXPORT_SYMBOL(pmalloc_make_pool_ro);
+
+/**
+ * pmalloc_destroy_pool() - destroys a pool and all the associated memory
+ * @pool: the pool to destroy
+ *
+ * All the memory associated to the pool will be freed, including the
+ * metadata used for the pool.
+ */
+void pmalloc_destroy_pool(struct pmalloc_pool *pool)
+{
+	struct vmap_area *area;
+
+	mutex_lock(&pools_mutex);
+	list_del(&pool->pool_node);
+	mutex_unlock(&pools_mutex);
+	while (pool->area) {
+		area = pool->area;
+		pool->area = area->next;
+		set_memory_rw(area->va_start, area->vm->nr_pages);
+		area->vm->flags &= ~VM_PMALLOC_MASK;
+		vfree((void *)area->va_start);
+	}
+	kfree(pool);
+}
+EXPORT_SYMBOL(pmalloc_destroy_pool);
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 05/17] prmem: shorthands for write rare on common types
       [not found] <20181023213504.28905-1-igor.stoppa@huawei.com>
                   ` (2 preceding siblings ...)
  2018-10-23 21:34 ` [PATCH 04/17] prmem: " Igor Stoppa
@ 2018-10-23 21:34 ` Igor Stoppa
  2018-10-25  0:28   ` Dave Hansen
  2018-10-23 21:34 ` [PATCH 06/17] prmem: test cases for memory protection Igor Stoppa
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 26+ messages in thread
From: Igor Stoppa @ 2018-10-23 21:34 UTC (permalink / raw)
  To: Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

Wrappers around the basic write rare functionality, addressing several
common data types found in the kernel, allowing to specify the new
values through immediates, like constants and defines.

Note:
The list is not complete and could be expanded.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>
CC: Michal Hocko <mhocko@kernel.org>
CC: Vlastimil Babka <vbabka@suse.cz>
CC: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Tatashin <pasha.tatashin@oracle.com>
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 MAINTAINERS                |   1 +
 include/linux/prmemextra.h | 133 +++++++++++++++++++++++++++++++++++++
 2 files changed, 134 insertions(+)
 create mode 100644 include/linux/prmemextra.h

diff --git a/MAINTAINERS b/MAINTAINERS
index e566c5d09faf..df7221eca160 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9459,6 +9459,7 @@ M:	Igor Stoppa <igor.stoppa@gmail.com>
 L:	kernel-hardening@lists.openwall.com
 S:	Maintained
 F:	include/linux/prmem.h
+F:	include/linux/prmemextra.h
 F:	mm/prmem.c
 
 MEMORY MANAGEMENT
diff --git a/include/linux/prmemextra.h b/include/linux/prmemextra.h
new file mode 100644
index 000000000000..36995717720e
--- /dev/null
+++ b/include/linux/prmemextra.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * prmemextra.h: Shorthands for write rare of basic data types
+ *
+ * (C) Copyright 2018 Huawei Technologies Co. Ltd.
+ * Author: Igor Stoppa <igor.stoppa@huawei.com>
+ *
+ */
+
+#ifndef _LINUX_PRMEMEXTRA_H
+#define _LINUX_PRMEMEXTRA_H
+
+#include <linux/prmem.h>
+
+/**
+ * wr_char - alters a char in write rare memory
+ * @dst: target for write
+ * @val: new value
+ *
+ * Returns true on success, false otherwise.
+ */
+static __always_inline
+bool wr_char(const char *dst, const char val)
+{
+	return wr_memcpy(dst, &val, sizeof(val));
+}
+
+/**
+ * wr_short - alters a short in write rare memory
+ * @dst: target for write
+ * @val: new value
+ *
+ * Returns true on success, false otherwise.
+ */
+static __always_inline
+bool wr_short(const short *dst, const short val)
+{
+	return wr_memcpy(dst, &val, sizeof(val));
+}
+
+/**
+ * wr_ushort - alters an unsigned short in write rare memory
+ * @dst: target for write
+ * @val: new value
+ *
+ * Returns true on success, false otherwise.
+ */
+static __always_inline
+bool wr_ushort(const unsigned short *dst, const unsigned short val)
+{
+	return wr_memcpy(dst, &val, sizeof(val));
+}
+
+/**
+ * wr_int - alters an int in write rare memory
+ * @dst: target for write
+ * @val: new value
+ *
+ * Returns true on success, false otherwise.
+ */
+static __always_inline
+bool wr_int(const int *dst, const int val)
+{
+	return wr_memcpy(dst, &val, sizeof(val));
+}
+
+/**
+ * wr_uint - alters an unsigned int in write rare memory
+ * @dst: target for write
+ * @val: new value
+ *
+ * Returns true on success, false otherwise.
+ */
+static __always_inline
+bool wr_uint(const unsigned int *dst, const unsigned int val)
+{
+	return wr_memcpy(dst, &val, sizeof(val));
+}
+
+/**
+ * wr_long - alters a long in write rare memory
+ * @dst: target for write
+ * @val: new value
+ *
+ * Returns true on success, false otherwise.
+ */
+static __always_inline
+bool wr_long(const long *dst, const long val)
+{
+	return wr_memcpy(dst, &val, sizeof(val));
+}
+
+/**
+ * wr_ulong - alters an unsigned long in write rare memory
+ * @dst: target for write
+ * @val: new value
+ *
+ * Returns true on success, false otherwise.
+ */
+static __always_inline
+bool wr_ulong(const unsigned long *dst, const unsigned long val)
+{
+	return wr_memcpy(dst, &val, sizeof(val));
+}
+
+/**
+ * wr_longlong - alters a long long in write rare memory
+ * @dst: target for write
+ * @val: new value
+ *
+ * Returns true on success, false otherwise.
+ */
+static __always_inline
+bool wr_longlong(const long long *dst, const long long val)
+{
+	return wr_memcpy(dst, &val, sizeof(val));
+}
+
+/**
+ * wr_ulonglong - alters an unsigned long long in write rare memory
+ * @dst: target for write
+ * @val: new value
+ *
+ * Returns true on success, false otherwise.
+ */
+static __always_inline
+bool wr_ulonglong(const unsigned long long *dst,
+			  const unsigned long long val)
+{
+	return wr_memcpy(dst, &val, sizeof(val));
+}
+
+#endif
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 06/17] prmem: test cases for memory protection
       [not found] <20181023213504.28905-1-igor.stoppa@huawei.com>
                   ` (3 preceding siblings ...)
  2018-10-23 21:34 ` [PATCH 05/17] prmem: shorthands for write rare on common types Igor Stoppa
@ 2018-10-23 21:34 ` Igor Stoppa
  2018-10-24  3:27   ` Randy Dunlap
  2018-10-25 16:43   ` Dave Hansen
  2018-10-23 21:34 ` [PATCH 07/17] prmem: lkdtm tests " Igor Stoppa
                   ` (2 subsequent siblings)
  7 siblings, 2 replies; 26+ messages in thread
From: Igor Stoppa @ 2018-10-23 21:34 UTC (permalink / raw)
  To: Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

The test cases verify the various interfaces offered by both prmem.h and
prmemextra.h

The tests avoid triggering crashes, by not performing actions that would
be treated as illegal. That part is handled in the lkdtm patch.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>
CC: Michal Hocko <mhocko@kernel.org>
CC: Vlastimil Babka <vbabka@suse.cz>
CC: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Tatashin <pasha.tatashin@oracle.com>
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 MAINTAINERS          |   2 +
 mm/Kconfig.debug     |   9 +
 mm/Makefile          |   1 +
 mm/test_pmalloc.c    | 633 +++++++++++++++++++++++++++++++++++++++++++
 mm/test_write_rare.c | 236 ++++++++++++++++
 5 files changed, 881 insertions(+)
 create mode 100644 mm/test_pmalloc.c
 create mode 100644 mm/test_write_rare.c

diff --git a/MAINTAINERS b/MAINTAINERS
index df7221eca160..ea979a5a9ec9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9461,6 +9461,8 @@ S:	Maintained
 F:	include/linux/prmem.h
 F:	include/linux/prmemextra.h
 F:	mm/prmem.c
+F:	mm/test_write_rare.c
+F:	mm/test_pmalloc.c
 
 MEMORY MANAGEMENT
 L:	linux-mm@kvack.org
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 9a7b8b049d04..57de5b3c0bae 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -94,3 +94,12 @@ config DEBUG_RODATA_TEST
     depends on STRICT_KERNEL_RWX
     ---help---
       This option enables a testcase for the setting rodata read-only.
+
+config DEBUG_PRMEM_TEST
+    tristate "Run self test for protected memory"
+    depends on STRICT_KERNEL_RWX
+    select PRMEM
+    default n
+    help
+      Tries to verify that the memory protection works correctly and that
+      the memory is effectively protected.
diff --git a/mm/Makefile b/mm/Makefile
index 215c6a6d7304..93b503d4659f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -65,6 +65,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
 obj-$(CONFIG_PRMEM) += prmem.o
+obj-$(CONFIG_DEBUG_PRMEM_TEST) += test_write_rare.o test_pmalloc.o
 obj-$(CONFIG_KSM) += ksm.o
 obj-$(CONFIG_PAGE_POISONING) += page_poison.o
 obj-$(CONFIG_SLAB) += slab.o
diff --git a/mm/test_pmalloc.c b/mm/test_pmalloc.c
new file mode 100644
index 000000000000..f9ee8fb29eea
--- /dev/null
+++ b/mm/test_pmalloc.c
@@ -0,0 +1,633 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * test_pmalloc.c
+ *
+ * (C) Copyright 2018 Huawei Technologies Co. Ltd.
+ * Author: Igor Stoppa <igor.stoppa@huawei.com>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/bug.h>
+#include <linux/prmemextra.h>
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define SIZE_1 (PAGE_SIZE * 3)
+#define SIZE_2 1000
+
+static const char MSG_NO_POOL[] = "Cannot allocate memory for the pool.";
+static const char MSG_NO_PMEM[] = "Cannot allocate memory from the pool.";
+
+#define pr_success(test_name)	\
+	pr_info(test_name " test passed")
+
+/* --------------- tests the basic life-cycle of a pool --------------- */
+
+static bool is_address_protected(void *p)
+{
+	struct page *page;
+	struct vmap_area *area;
+
+	if (unlikely(!is_vmalloc_addr(p)))
+		return false;
+	page = vmalloc_to_page(p);
+	if (unlikely(!page))
+		return false;
+	wmb(); /* Flush changes to the page table - is it needed? */
+	area = find_vmap_area((uintptr_t)p);
+	if (unlikely((!area) || (!area->vm) ||
+		     ((area->vm->flags & VM_PMALLOC_PROTECTED_MASK) !=
+		      VM_PMALLOC_PROTECTED_MASK)))
+		return false;
+	return true;
+}
+
+static bool create_and_destroy_pool(void)
+{
+	static struct pmalloc_pool *pool;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_RO);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	pmalloc_destroy_pool(pool);
+	pr_success("pool creation and destruction");
+	return true;
+}
+
+/*  verifies that it's possible to allocate from the pool */
+static bool test_alloc(void)
+{
+	static struct pmalloc_pool *pool;
+	static void *p;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_RO);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	p = pmalloc(pool,  SIZE_1 - 1);
+	pmalloc_destroy_pool(pool);
+	if (WARN(!p, MSG_NO_PMEM))
+		return false;
+	pr_success("allocation capability");
+	return true;
+}
+
+/* ----------------------- tests self protection ----------------------- */
+
+static bool test_auto_ro(void)
+{
+	struct pmalloc_pool *pool;
+	int *first_chunk;
+	int *second_chunk;
+	bool retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_AUTO_RO);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	first_chunk = (int *)pmalloc(pool, PMALLOC_DEFAULT_REFILL_SIZE);
+	if (WARN(!first_chunk, MSG_NO_PMEM))
+		goto error;
+	second_chunk = (int *)pmalloc(pool, PMALLOC_DEFAULT_REFILL_SIZE);
+	if (WARN(!second_chunk, MSG_NO_PMEM))
+		goto error;
+	if (WARN(!is_address_protected(first_chunk),
+		 "Failed to automatically write protect exhausted vmarea"))
+		goto error;
+	pr_success("AUTO_RO");
+	retval = true;
+error:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+static bool test_auto_wr(void)
+{
+	struct pmalloc_pool *pool;
+	int *first_chunk;
+	int *second_chunk;
+	bool retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_AUTO_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	first_chunk = (int *)pmalloc(pool, PMALLOC_DEFAULT_REFILL_SIZE);
+	if (WARN(!first_chunk, MSG_NO_PMEM))
+		goto error;
+	second_chunk = (int *)pmalloc(pool, PMALLOC_DEFAULT_REFILL_SIZE);
+	if (WARN(!second_chunk, MSG_NO_PMEM))
+		goto error;
+	if (WARN(!is_address_protected(first_chunk),
+		 "Failed to automatically write protect exhausted vmarea"))
+		goto error;
+	pr_success("AUTO_WR");
+	retval = true;
+error:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+static bool test_start_wr(void)
+{
+	struct pmalloc_pool *pool;
+	int *chunks[2];
+	bool retval = false;
+	int i;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_START_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	for (i = 0; i < 2; i++) {
+		chunks[i] = (int *)pmalloc(pool, 1);
+		if (WARN(!chunks[i], MSG_NO_PMEM))
+			goto error;
+		if (WARN(!is_address_protected(chunks[i]),
+			 "vmarea was not protected from the start"))
+			goto error;
+	}
+	if (WARN(vmalloc_to_page(chunks[0]) != vmalloc_to_page(chunks[1]),
+		 "START_WR: mostly empty vmap area not reused"))
+		goto error;
+	pr_success("START_WR");
+	retval = true;
+error:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+static bool test_self_protection(void)
+{
+	if (WARN(!(test_auto_ro() &&
+		   test_auto_wr() &&
+		   test_start_wr()),
+		 "self protection tests failed"))
+		return false;
+	pr_success("self protection");
+	return true;
+}
+
+/* ----------------- tests basic write rare functions ----------------- */
+
+#define INSERT_OFFSET (PAGE_SIZE * 3 / 2)
+#define INSERT_SIZE (PAGE_SIZE * 2)
+#define REGION_SIZE (PAGE_SIZE * 5)
+static bool test_wr_memset(void)
+{
+	struct pmalloc_pool *pool;
+	char *region;
+	unsigned int i;
+	int retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_START_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	region = pzalloc(pool, REGION_SIZE);
+	if (WARN(!region, MSG_NO_PMEM))
+		goto destroy_pool;
+	for (i = 0; i < REGION_SIZE; i++)
+		if (WARN(region[i], "Failed to memset wr memory"))
+			goto destroy_pool;
+	retval = !wr_memset(region + INSERT_OFFSET, 1, INSERT_SIZE);
+	if (WARN(retval, "wr_memset failed"))
+		goto destroy_pool;
+	for (i = 0; i < REGION_SIZE; i++)
+		if (i >= INSERT_OFFSET &&
+		    i < (INSERT_SIZE + INSERT_OFFSET)) {
+			if (WARN(!region[i],
+				 "Failed to alter target area"))
+				goto destroy_pool;
+		} else {
+			if (WARN(region[i] != 0,
+				 "Unexpected alteration outside region"))
+				goto destroy_pool;
+		}
+	retval = true;
+	pr_success("wr_memset");
+destroy_pool:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+static bool test_wr_strdup(void)
+{
+	const char src[] = "Some text for testing pstrdup()";
+	struct pmalloc_pool *pool;
+	char *dst;
+	int retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	dst = pstrdup(pool, src);
+	if (WARN(!dst || strcmp(src, dst), "pmalloc wr strdup failed"))
+		goto destroy_pool;
+	retval = true;
+	pr_success("pmalloc wr strdup");
+destroy_pool:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+/* Verify write rare across multiple pages, unaligned to PAGE_SIZE. */
+static bool test_wr_copy(void)
+{
+	struct pmalloc_pool *pool;
+	char *region;
+	char *mod;
+	unsigned int i;
+	int retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	region = pzalloc(pool, REGION_SIZE);
+	if (WARN(!region, MSG_NO_PMEM))
+		goto destroy_pool;
+	mod = vmalloc(INSERT_SIZE);
+	if (WARN(!mod, "Failed to allocate memory from vmalloc"))
+		goto destroy_pool;
+	memset(mod, 0xA5, INSERT_SIZE);
+	pmalloc_protect_pool(pool);
+	retval = !wr_memcpy(region + INSERT_OFFSET, mod, INSERT_SIZE);
+	if (WARN(retval, "wr_copy failed"))
+		goto free_mod;
+
+	for (i = 0; i < REGION_SIZE; i++)
+		if (i >= INSERT_OFFSET &&
+		    i < (INSERT_SIZE + INSERT_OFFSET)) {
+			if (WARN(region[i] != (char)0xA5,
+				 "Failed to alter target area"))
+				goto free_mod;
+		} else {
+			if (WARN(region[i] != 0,
+				 "Unexpected alteration outside region"))
+				goto free_mod;
+		}
+	retval = true;
+	pr_success("wr_copy");
+free_mod:
+	vfree(mod);
+destroy_pool:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+/* ----------------- tests specialized write actions ------------------- */
+
+#define TEST_ARRAY_SIZE 5
+#define TEST_ARRAY_TARGET (TEST_ARRAY_SIZE / 2)
+
+static bool test_wr_char(void)
+{
+	struct pmalloc_pool *pool;
+	char *array;
+	unsigned int i;
+	bool retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	array = pmalloc(pool, sizeof(char) * TEST_ARRAY_SIZE);
+	if (WARN(!array, MSG_NO_PMEM))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		array[i] = (char)0xA5;
+	pmalloc_protect_pool(pool);
+	if (WARN(!wr_char(array + TEST_ARRAY_TARGET, (char)0x5A),
+		 "Failed to alter char variable"))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		if (WARN(array[i] != (i == TEST_ARRAY_TARGET ?
+				      (char)0x5A : (char)0xA5),
+			 "Unexpected value in test array."))
+			goto destroy_pool;
+	retval = true;
+	pr_success("wr_char");
+destroy_pool:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+static bool test_wr_short(void)
+{
+	struct pmalloc_pool *pool;
+	short *array;
+	unsigned int i;
+	bool retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	array = pmalloc(pool, sizeof(short) * TEST_ARRAY_SIZE);
+	if (WARN(!array, MSG_NO_PMEM))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		array[i] = (short)0xA5;
+	pmalloc_protect_pool(pool);
+	if (WARN(!wr_short(array + TEST_ARRAY_TARGET, (short)0x5A),
+		 "Failed to alter short variable"))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		if (WARN(array[i] != (i == TEST_ARRAY_TARGET ?
+				      (short)0x5A : (short)0xA5),
+			 "Unexpected value in test array."))
+			goto destroy_pool;
+	retval = true;
+	pr_success("wr_short");
+destroy_pool:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+static bool test_wr_ushort(void)
+{
+	struct pmalloc_pool *pool;
+	unsigned short *array;
+	unsigned int i;
+	bool retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	array = pmalloc(pool, sizeof(unsigned short) * TEST_ARRAY_SIZE);
+	if (WARN(!array, MSG_NO_PMEM))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		array[i] = (unsigned short)0xA5;
+	pmalloc_protect_pool(pool);
+	if (WARN(!wr_ushort(array + TEST_ARRAY_TARGET,
+				    (unsigned short)0x5A),
+		 "Failed to alter unsigned short variable"))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		if (WARN(array[i] != (i == TEST_ARRAY_TARGET ?
+				      (unsigned short)0x5A :
+				      (unsigned short)0xA5),
+			 "Unexpected value in test array."))
+			goto destroy_pool;
+	retval = true;
+	pr_success("wr_ushort");
+destroy_pool:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+static bool test_wr_int(void)
+{
+	struct pmalloc_pool *pool;
+	int *array;
+	unsigned int i;
+	bool retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	array = pmalloc(pool, sizeof(int) * TEST_ARRAY_SIZE);
+	if (WARN(!array, MSG_NO_PMEM))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		array[i] = 0xA5;
+	pmalloc_protect_pool(pool);
+	if (WARN(!wr_int(array + TEST_ARRAY_TARGET, 0x5A),
+		 "Failed to alter int variable"))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		if (WARN(array[i] != (i == TEST_ARRAY_TARGET ? 0x5A : 0xA5),
+			 "Unexpected value in test array."))
+			goto destroy_pool;
+	retval = true;
+	pr_success("wr_int");
+destroy_pool:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+static bool test_wr_uint(void)
+{
+	struct pmalloc_pool *pool;
+	unsigned int *array;
+	unsigned int i;
+	bool retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	array = pmalloc(pool, sizeof(unsigned int) * TEST_ARRAY_SIZE);
+	if (WARN(!array, MSG_NO_PMEM))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		array[i] = 0xA5;
+	pmalloc_protect_pool(pool);
+	if (WARN(!wr_uint(array + TEST_ARRAY_TARGET, 0x5A),
+		 "Failed to alter unsigned int variable"))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		if (WARN(array[i] != (i == TEST_ARRAY_TARGET ? 0x5A : 0xA5),
+			 "Unexpected value in test array."))
+			goto destroy_pool;
+	retval = true;
+	pr_success("wr_uint");
+destroy_pool:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+static bool test_wr_long(void)
+{
+	struct pmalloc_pool *pool;
+	long *array;
+	unsigned int i;
+	bool retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	array = pmalloc(pool, sizeof(long) * TEST_ARRAY_SIZE);
+	if (WARN(!array, MSG_NO_PMEM))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		array[i] = 0xA5;
+	pmalloc_protect_pool(pool);
+	if (WARN(!wr_long(array + TEST_ARRAY_TARGET, 0x5A),
+		 "Failed to alter long variable"))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		if (WARN(array[i] != (i == TEST_ARRAY_TARGET ? 0x5A : 0xA5),
+			 "Unexpected value in test array."))
+			goto destroy_pool;
+	retval = true;
+	pr_success("wr_long");
+destroy_pool:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+static bool test_wr_ulong(void)
+{
+	struct pmalloc_pool *pool;
+	unsigned long *array;
+	unsigned int i;
+	bool retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	array = pmalloc(pool, sizeof(unsigned long) * TEST_ARRAY_SIZE);
+	if (WARN(!array, MSG_NO_PMEM))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		array[i] = 0xA5;
+	pmalloc_protect_pool(pool);
+	if (WARN(!wr_ulong(array + TEST_ARRAY_TARGET, 0x5A),
+		 "Failed to alter unsigned long variable"))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		if (WARN(array[i] != (i == TEST_ARRAY_TARGET ? 0x5A : 0xA5),
+			 "Unexpected value in test array."))
+			goto destroy_pool;
+	retval = true;
+	pr_success("wr_ulong");
+destroy_pool:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+static bool test_wr_longlong(void)
+{
+	struct pmalloc_pool *pool;
+	long long *array;
+	unsigned int i;
+	bool retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	array = pmalloc(pool, sizeof(long long) * TEST_ARRAY_SIZE);
+	if (WARN(!array, MSG_NO_PMEM))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		array[i] = 0xA5;
+	pmalloc_protect_pool(pool);
+	if (WARN(!wr_longlong(array + TEST_ARRAY_TARGET, 0x5A),
+		 "Failed to alter long variable"))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		if (WARN(array[i] != (i == TEST_ARRAY_TARGET ? 0x5A : 0xA5),
+			 "Unexpected value in test array."))
+			goto destroy_pool;
+	retval = true;
+	pr_success("wr_longlong");
+destroy_pool:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+static bool test_wr_ulonglong(void)
+{
+	struct pmalloc_pool *pool;
+	unsigned long long *array;
+	unsigned int i;
+	bool retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	array = pmalloc(pool, sizeof(unsigned long long) * TEST_ARRAY_SIZE);
+	if (WARN(!array, MSG_NO_PMEM))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		array[i] = 0xA5;
+	pmalloc_protect_pool(pool);
+	if (WARN(!wr_ulonglong(array + TEST_ARRAY_TARGET, 0x5A),
+		 "Failed to alter unsigned long long variable"))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		if (WARN(array[i] != (i == TEST_ARRAY_TARGET ? 0x5A : 0xA5),
+			 "Unexpected value in test array."))
+			goto destroy_pool;
+	retval = true;
+	pr_success("wr_ulonglong");
+destroy_pool:
+	pmalloc_destroy_pool(pool);
+	return retval;
+}
+
+static bool test_wr_ptr(void)
+{
+	struct pmalloc_pool *pool;
+	int **array;
+	unsigned int i;
+	bool retval = false;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return false;
+	array = pmalloc(pool, sizeof(int *) * TEST_ARRAY_SIZE);
+	if (WARN(!array, MSG_NO_PMEM))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		array[i] = NULL;
+	pmalloc_protect_pool(pool);
+	if (WARN(!wr_ptr(array + TEST_ARRAY_TARGET, array),
+		 "Failed to alter ptr variable"))
+		goto destroy_pool;
+	for (i = 0; i < TEST_ARRAY_SIZE; i++)
+		if (WARN(array[i] != (i == TEST_ARRAY_TARGET ?
+				      (void *)array : NULL),
+			 "Unexpected value in test array."))
+			goto destroy_pool;
+	retval = true;
+	pr_success("wr_ptr");
+destroy_pool:
+	pmalloc_destroy_pool(pool);
+	return retval;
+
+}
+
+static bool test_specialized_wrs(void)
+{
+	if (WARN(!(test_wr_char() &&
+		   test_wr_short() &&
+		   test_wr_ushort() &&
+		   test_wr_int() &&
+		   test_wr_uint() &&
+		   test_wr_long() &&
+		   test_wr_ulong() &&
+		   test_wr_longlong() &&
+		   test_wr_ulonglong() &&
+		   test_wr_ptr()),
+		 "specialized write rare failed"))
+		return false;
+	pr_success("specialized write rare");
+	return true;
+
+}
+
+/*
+ * test_pmalloc()  -main entry point for running the test cases
+ */
+static int __init test_pmalloc_init_module(void)
+{
+	if (WARN(!(create_and_destroy_pool() &&
+		   test_alloc() &&
+		   test_self_protection() &&
+		   test_wr_memset() &&
+		   test_wr_strdup() &&
+		   test_wr_copy() &&
+		   test_specialized_wrs()),
+		 "protected memory allocator test failed"))
+		return -EFAULT;
+	pr_success("protected memory allocator");
+	return 0;
+}
+
+module_init(test_pmalloc_init_module);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Igor Stoppa <igor.stoppa@huawei.com>");
+MODULE_DESCRIPTION("Test module for pmalloc.");
diff --git a/mm/test_write_rare.c b/mm/test_write_rare.c
new file mode 100644
index 000000000000..e19473bb319b
--- /dev/null
+++ b/mm/test_write_rare.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * test_write_rare.c
+ *
+ * (C) Copyright 2018 Huawei Technologies Co. Ltd.
+ * Author: Igor Stoppa <igor.stoppa@huawei.com>
+ *
+ * Caveat: the tests which perform modifications are run *during* init, so
+ * the memory they use could be still altered through a direct write
+ * operation. But the purpose of these tests is to confirm that the
+ * modification through remapping works correctly. This doesn't depend on
+ * the read/write status of the original mapping.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/bug.h>
+#include <linux/prmemextra.h>
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define pr_success(test_name)	\
+	pr_info(test_name " test passed")
+
+static int scalar __wr_after_init = 0xA5A5;
+
+/* The section must occupy a non-zero number of whole pages */
+static bool test_alignment(void)
+{
+	size_t pstart = (size_t)&__start_wr_after_init;
+	size_t pend = (size_t)&__end_wr_after_init;
+
+	if (WARN((pstart & ~PAGE_MASK) || (pend & ~PAGE_MASK) ||
+		 (pstart >= pend), "Boundaries test failed."))
+		return false;
+	pr_success("Boundaries");
+	return true;
+}
+
+/* Alter a scalar value */
+static bool test_simple_write(void)
+{
+	int new_val = 0x5A5A;
+
+	if (WARN(!__is_wr_after_init(&scalar, sizeof(scalar)),
+		 "The __wr_after_init modifier did NOT work."))
+		return false;
+
+	if (WARN(!wr(&scalar, &new_val) || scalar != new_val,
+		 "Scalar write rare test failed"))
+		return false;
+
+	pr_success("Scalar write rare");
+	return true;
+}
+
+#define LARGE_SIZE (PAGE_SIZE * 5)
+#define CHANGE_SIZE (PAGE_SIZE * 2)
+#define CHANGE_OFFSET (PAGE_SIZE / 2)
+
+static char large[LARGE_SIZE] __wr_after_init;
+
+
+/* Alter data across multiple pages */
+static bool test_cross_page_write(void)
+{
+	unsigned int i;
+	char *src;
+	bool check;
+
+	src = vmalloc(PAGE_SIZE * 2);
+	if (WARN(!src, "could not allocate memory"))
+		return false;
+
+	for (i = 0; i < LARGE_SIZE; i++)
+		large[i] = 0xA5;
+
+	for (i = 0; i < CHANGE_SIZE; i++)
+		src[i] = 0x5A;
+
+	check = wr_memcpy(large + CHANGE_OFFSET, src, CHANGE_SIZE);
+	vfree(src);
+	if (WARN(!check, "The wr_memcpy() failed"))
+		return false;
+
+	for (i = CHANGE_OFFSET; i < CHANGE_OFFSET + CHANGE_SIZE; i++)
+		if (WARN(large[i] != 0x5A,
+			 "Cross-page write rare test failed"))
+			return false;
+
+	pr_success("Cross-page write rare");
+	return true;
+}
+
+static bool test_memsetting(void)
+{
+	unsigned int i;
+
+	wr_memset(large, 0, LARGE_SIZE);
+	for (i = 0; i < LARGE_SIZE; i++)
+		if (WARN(large[i], "Failed to reset memory"))
+			return false;
+	wr_memset(large + CHANGE_OFFSET, 1, CHANGE_SIZE);
+	for (i = 0; i < CHANGE_OFFSET; i++)
+		if (WARN(large[i], "Failed to set memory"))
+			return false;
+	for (i = CHANGE_OFFSET; i < CHANGE_OFFSET + CHANGE_SIZE; i++)
+		if (WARN(!large[i], "Failed to set memory"))
+			return false;
+	for (i = CHANGE_OFFSET + CHANGE_SIZE; i < LARGE_SIZE; i++)
+		if (WARN(large[i], "Failed to set memory"))
+			return false;
+	pr_success("Memsetting");
+	return true;
+}
+
+#define INIT_VAL 1
+#define END_VAL 4
+
+/* Various tests for the shorthands provided for standard types. */
+static char char_var __wr_after_init = INIT_VAL;
+static bool test_char(void)
+{
+	return wr_char(&char_var, END_VAL) && char_var == END_VAL;
+}
+
+static short short_var __wr_after_init = INIT_VAL;
+static bool test_short(void)
+{
+	return wr_short(&short_var, END_VAL) &&
+		short_var == END_VAL;
+}
+
+static unsigned short ushort_var __wr_after_init = INIT_VAL;
+static bool test_ushort(void)
+{
+	return wr_ushort(&ushort_var, END_VAL) &&
+		ushort_var == END_VAL;
+}
+
+static int int_var __wr_after_init = INIT_VAL;
+static bool test_int(void)
+{
+	return wr_int(&int_var, END_VAL) &&
+		int_var == END_VAL;
+}
+
+static unsigned int uint_var __wr_after_init = INIT_VAL;
+static bool test_uint(void)
+{
+	return wr_uint(&uint_var, END_VAL) &&
+		uint_var == END_VAL;
+}
+
+static long long_var __wr_after_init = INIT_VAL;
+static bool test_long(void)
+{
+	return wr_long(&long_var, END_VAL) &&
+		long_var == END_VAL;
+}
+
+static unsigned long ulong_var __wr_after_init = INIT_VAL;
+static bool test_ulong(void)
+{
+	return wr_ulong(&ulong_var, END_VAL) &&
+		ulong_var == END_VAL;
+}
+
+static long long longlong_var __wr_after_init = INIT_VAL;
+static bool test_longlong(void)
+{
+	return wr_longlong(&longlong_var, END_VAL) &&
+		longlong_var == END_VAL;
+}
+
+static unsigned long long ulonglong_var __wr_after_init = INIT_VAL;
+static bool test_ulonglong(void)
+{
+	return wr_ulonglong(&ulonglong_var, END_VAL) &&
+		ulonglong_var == END_VAL;
+}
+
+static int referred_value = INIT_VAL;
+static int *reference __wr_after_init;
+static bool test_ptr(void)
+{
+	return wr_ptr(&reference, &referred_value) &&
+		reference == &referred_value;
+}
+
+static int *rcu_ptr __wr_after_init __aligned(sizeof(void *));
+static bool test_rcu_ptr(void)
+{
+	uintptr_t addr = wr_rcu_assign_pointer(rcu_ptr, &referred_value);
+
+	return  (addr == (uintptr_t)&referred_value) &&
+		referred_value == *(int *)addr;
+}
+
+static bool test_specialized_write_rare(void)
+{
+	if (WARN(!(test_char() && test_short() &&
+		   test_ushort() && test_int() &&
+		   test_uint() && test_long() && test_ulong() &&
+		   test_long() && test_ulong() &&
+		   test_longlong() && test_ulonglong() &&
+		   test_ptr() && test_rcu_ptr()),
+		 "Specialized write rare test failed"))
+		return false;
+	pr_success("Specialized write rare");
+	return true;
+}
+
+static int __init test_static_wr_init_module(void)
+{
+	if (WARN(!(test_alignment() &&
+		   test_simple_write() &&
+		   test_cross_page_write() &&
+		   test_memsetting() &&
+		   test_specialized_write_rare()),
+		 "static rare-write test failed"))
+		return -EFAULT;
+	pr_success("static write_rare");
+	return 0;
+}
+
+module_init(test_static_wr_init_module);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Igor Stoppa <igor.stoppa@huawei.com>");
+MODULE_DESCRIPTION("Test module for static write rare.");
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 07/17] prmem: lkdtm tests for memory protection
       [not found] <20181023213504.28905-1-igor.stoppa@huawei.com>
                   ` (4 preceding siblings ...)
  2018-10-23 21:34 ` [PATCH 06/17] prmem: test cases for memory protection Igor Stoppa
@ 2018-10-23 21:34 ` Igor Stoppa
  2018-10-23 21:34 ` [PATCH 08/17] prmem: struct page: track vmap_area Igor Stoppa
  2018-10-23 21:34 ` [PATCH 09/17] prmem: hardened usercopy Igor Stoppa
  7 siblings, 0 replies; 26+ messages in thread
From: Igor Stoppa @ 2018-10-23 21:34 UTC (permalink / raw)
  To: Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Greg Kroah-Hartman, Arnd Bergmann, linux-mm, linux-kernel

Various cases meant to verify that illegal operations on protected
memory will either BUG() or WARN().

The test cases fall into 2 main categories:
- trying to overwrite (directly) something that is write protected
- trying to use write rare functions on something that is not write rare

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>
CC: Kees Cook <keescook@chromium.org>
CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
CC: Arnd Bergmann <arnd@arndb.de>
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 drivers/misc/lkdtm/core.c  |  13 ++
 drivers/misc/lkdtm/lkdtm.h |  13 ++
 drivers/misc/lkdtm/perms.c | 248 +++++++++++++++++++++++++++++++++++++
 3 files changed, 274 insertions(+)

diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 2154d1bfd18b..41a3ba16bc57 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -155,6 +155,19 @@ static const struct crashtype crashtypes[] = {
 	CRASHTYPE(ACCESS_USERSPACE),
 	CRASHTYPE(WRITE_RO),
 	CRASHTYPE(WRITE_RO_AFTER_INIT),
+	CRASHTYPE(WRITE_WR_AFTER_INIT),
+	CRASHTYPE(WRITE_WR_AFTER_INIT_ON_RO_AFTER_INIT),
+	CRASHTYPE(WRITE_WR_AFTER_INIT_ON_CONST),
+#ifdef CONFIG_PRMEM
+	CRASHTYPE(WRITE_RO_PMALLOC),
+	CRASHTYPE(WRITE_AUTO_RO_PMALLOC),
+	CRASHTYPE(WRITE_WR_PMALLOC),
+	CRASHTYPE(WRITE_AUTO_WR_PMALLOC),
+	CRASHTYPE(WRITE_START_WR_PMALLOC),
+	CRASHTYPE(WRITE_WR_PMALLOC_ON_RO_PMALLOC),
+	CRASHTYPE(WRITE_WR_PMALLOC_ON_CONST),
+	CRASHTYPE(WRITE_WR_PMALLOC_ON_RO_AFT_INIT),
+#endif
 	CRASHTYPE(WRITE_KERN),
 	CRASHTYPE(REFCOUNT_INC_OVERFLOW),
 	CRASHTYPE(REFCOUNT_ADD_OVERFLOW),
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 9e513dcfd809..08368c4545f7 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -38,6 +38,19 @@ void lkdtm_READ_BUDDY_AFTER_FREE(void);
 void __init lkdtm_perms_init(void);
 void lkdtm_WRITE_RO(void);
 void lkdtm_WRITE_RO_AFTER_INIT(void);
+void lkdtm_WRITE_WR_AFTER_INIT(void);
+void lkdtm_WRITE_WR_AFTER_INIT_ON_RO_AFTER_INIT(void);
+void lkdtm_WRITE_WR_AFTER_INIT_ON_CONST(void);
+#ifdef CONFIG_PRMEM
+void lkdtm_WRITE_RO_PMALLOC(void);
+void lkdtm_WRITE_AUTO_RO_PMALLOC(void);
+void lkdtm_WRITE_WR_PMALLOC(void);
+void lkdtm_WRITE_AUTO_WR_PMALLOC(void);
+void lkdtm_WRITE_START_WR_PMALLOC(void);
+void lkdtm_WRITE_WR_PMALLOC_ON_RO_PMALLOC(void);
+void lkdtm_WRITE_WR_PMALLOC_ON_CONST(void);
+void lkdtm_WRITE_WR_PMALLOC_ON_RO_AFT_INIT(void);
+#endif
 void lkdtm_WRITE_KERN(void);
 void lkdtm_EXEC_DATA(void);
 void lkdtm_EXEC_STACK(void);
diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index 53b85c9d16b8..3c14fd4d90ac 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -9,6 +9,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mman.h>
 #include <linux/uaccess.h>
+#include <linux/prmemextra.h>
 #include <asm/cacheflush.h>
 
 /* Whether or not to fill the target memory area with do_nothing(). */
@@ -27,6 +28,10 @@ static const unsigned long rodata = 0xAA55AA55;
 /* This is marked __ro_after_init, so it should ultimately be .rodata. */
 static unsigned long ro_after_init __ro_after_init = 0x55AA5500;
 
+/* This is marked __wr_after_init, so it should be in .rodata. */
+static
+unsigned long wr_after_init __wr_after_init = 0x55AA5500;
+
 /*
  * This just returns to the caller. It is designed to be copied into
  * non-executable memory regions.
@@ -104,6 +109,247 @@ void lkdtm_WRITE_RO_AFTER_INIT(void)
 	*ptr ^= 0xabcd1234;
 }
 
+void lkdtm_WRITE_WR_AFTER_INIT(void)
+{
+	unsigned long *ptr = &wr_after_init;
+
+	/*
+	 * Verify we were written to during init. Since an Oops
+	 * is considered a "success", a failure is to just skip the
+	 * real test.
+	 */
+	if ((*ptr & 0xAA) != 0xAA) {
+		pr_info("%p was NOT written during init!?\n", ptr);
+		return;
+	}
+
+	pr_info("attempting bad wr_after_init write at %p\n", ptr);
+	*ptr ^= 0xabcd1234;
+}
+
+#define INIT_VAL 0x5A
+#define END_VAL 0xA5
+
+/* Verify that write rare will not work against read-only memory. */
+static int ro_after_init_data __ro_after_init = INIT_VAL;
+void lkdtm_WRITE_WR_AFTER_INIT_ON_RO_AFTER_INIT(void)
+{
+	pr_info("attempting illegal write rare to __ro_after_init");
+	if (wr_int(&ro_after_init_data, END_VAL) ||
+	     ro_after_init_data == END_VAL)
+		pr_info("Unexpected successful write to __ro_after_init");
+}
+
+/*
+ * "volatile" to force the compiler to not optimize away the reading back.
+ * Is there a better way to do it, than using volatile?
+ */
+static volatile const int const_data = INIT_VAL;
+void lkdtm_WRITE_WR_AFTER_INIT_ON_CONST(void)
+{
+	pr_info("attempting illegal write rare to const data");
+	if (wr_int((int *)&const_data, END_VAL) || const_data == END_VAL)
+		pr_info("Unexpected successful write to const memory");
+}
+
+#ifdef CONFIG_PRMEM
+
+#define MSG_NO_POOL "Cannot allocate memory for the pool."
+#define MSG_NO_PMEM "Cannot allocate memory from the pool."
+
+void lkdtm_WRITE_RO_PMALLOC(void)
+{
+	struct pmalloc_pool *pool;
+	int *i;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_RO);
+	if (!pool) {
+		pr_info(MSG_NO_POOL);
+		return;
+	}
+	i = pmalloc(pool, sizeof(int));
+	if (!i) {
+		pr_info(MSG_NO_PMEM);
+		pmalloc_destroy_pool(pool);
+		return;
+	}
+	*i = INT_MAX;
+	pmalloc_protect_pool(pool);
+	pr_info("attempting bad pmalloc write at %p\n", i);
+	*i = 0; /* Note: this will crash and leak the pool memory. */
+}
+
+void lkdtm_WRITE_AUTO_RO_PMALLOC(void)
+{
+	struct pmalloc_pool *pool;
+	int *i;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_AUTO_RO);
+	if (!pool) {
+		pr_info(MSG_NO_POOL);
+		return;
+	}
+	i = pmalloc(pool, sizeof(int));
+	if (!i) {
+		pr_info(MSG_NO_PMEM);
+		pmalloc_destroy_pool(pool);
+		return;
+	}
+	*i = INT_MAX;
+	pmalloc(pool, PMALLOC_DEFAULT_REFILL_SIZE);
+	pr_info("attempting bad pmalloc write at %p\n", i);
+	*i = 0; /* Note: this will crash and leak the pool memory. */
+}
+
+void lkdtm_WRITE_WR_PMALLOC(void)
+{
+	struct pmalloc_pool *pool;
+	int *i;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (!pool) {
+		pr_info(MSG_NO_POOL);
+		return;
+	}
+	i = pmalloc(pool, sizeof(int));
+	if (!i) {
+		pr_info(MSG_NO_PMEM);
+		pmalloc_destroy_pool(pool);
+		return;
+	}
+	*i = INT_MAX;
+	pmalloc_protect_pool(pool);
+	pr_info("attempting bad pmalloc write at %p\n", i);
+	*i = 0; /* Note: this will crash and leak the pool memory. */
+}
+
+void lkdtm_WRITE_AUTO_WR_PMALLOC(void)
+{
+	struct pmalloc_pool *pool;
+	int *i;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_AUTO_WR);
+	if (!pool) {
+		pr_info(MSG_NO_POOL);
+		return;
+	}
+	i = pmalloc(pool, sizeof(int));
+	if (!i) {
+		pr_info(MSG_NO_PMEM);
+		pmalloc_destroy_pool(pool);
+		return;
+	}
+	*i = INT_MAX;
+	pmalloc(pool, PMALLOC_DEFAULT_REFILL_SIZE);
+	pr_info("attempting bad pmalloc write at %p\n", i);
+	*i = 0; /* Note: this will crash and leak the pool memory. */
+}
+
+void lkdtm_WRITE_START_WR_PMALLOC(void)
+{
+	struct pmalloc_pool *pool;
+	int *i;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_START_WR);
+	if (!pool) {
+		pr_info(MSG_NO_POOL);
+		return;
+	}
+	i = pmalloc(pool, sizeof(int));
+	if (!i) {
+		pr_info(MSG_NO_PMEM);
+		pmalloc_destroy_pool(pool);
+		return;
+	}
+	*i = INT_MAX;
+	pr_info("attempting bad pmalloc write at %p\n", i);
+	*i = 0; /* Note: this will crash and leak the pool memory. */
+}
+
+void lkdtm_WRITE_WR_PMALLOC_ON_RO_PMALLOC(void)
+{
+	struct pmalloc_pool *pool;
+	int *var_ptr;
+
+	pool = pmalloc_create_pool(PMALLOC_MODE_RO);
+	if (!pool) {
+		pr_info(MSG_NO_POOL);
+		return;
+	}
+	var_ptr = pmalloc(pool, sizeof(int));
+	if (!var_ptr) {
+		pr_info(MSG_NO_PMEM);
+		pmalloc_destroy_pool(pool);
+		return;
+	}
+	*var_ptr = INIT_VAL;
+	pmalloc_protect_pool(pool);
+	pr_info("attempting illegal write rare to R/O pool");
+	if (wr_int(var_ptr, END_VAL))
+		pr_info("Unexpected successful write to R/O pool");
+	pmalloc_destroy_pool(pool);
+}
+
+void lkdtm_WRITE_WR_PMALLOC_ON_CONST(void)
+{
+	struct pmalloc_pool *pool;
+	int *dummy;
+	bool write_result;
+
+	/*
+	 * The pool operations are only meant to simulate an attacker
+	 * using a random pool as parameter for the attack against the
+	 * const.
+	 */
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (!pool) {
+		pr_info(MSG_NO_POOL);
+		return;
+	}
+	dummy = pmalloc(pool, sizeof(*dummy));
+	if (!dummy) {
+		pr_info(MSG_NO_PMEM);
+		pmalloc_destroy_pool(pool);
+		return;
+	}
+	*dummy = 1;
+	pmalloc_protect_pool(pool);
+	pr_info("attempting illegal write rare to const data");
+	write_result = wr_int((int *)&const_data, END_VAL);
+	pmalloc_destroy_pool(pool);
+	if (write_result || const_data != INIT_VAL)
+		pr_info("Unexpected successful write to const memory");
+}
+
+void lkdtm_WRITE_WR_PMALLOC_ON_RO_AFT_INIT(void)
+{
+	struct pmalloc_pool *pool;
+	int *dummy;
+	bool write_result;
+
+	/*
+	 * The pool operations are only meant to simulate an attacker
+	 * using a random pool as parameter for the attack against the
+	 * const.
+	 */
+	pool = pmalloc_create_pool(PMALLOC_MODE_WR);
+	if (WARN(!pool, MSG_NO_POOL))
+		return;
+	dummy = pmalloc(pool, sizeof(*dummy));
+	if (WARN(!dummy, MSG_NO_PMEM)) {
+		pmalloc_destroy_pool(pool);
+		return;
+	}
+	*dummy = 1;
+	pmalloc_protect_pool(pool);
+	pr_info("attempting illegal write rare to ro_after_init");
+	write_result = wr_int(&ro_after_init_data, END_VAL);
+	pmalloc_destroy_pool(pool);
+	WARN(write_result || ro_after_init_data != INIT_VAL,
+	     "Unexpected successful write to ro_after_init memory");
+}
+#endif
+
 void lkdtm_WRITE_KERN(void)
 {
 	size_t size;
@@ -200,4 +446,6 @@ void __init lkdtm_perms_init(void)
 	/* Make sure we can write to __ro_after_init values during __init */
 	ro_after_init |= 0xAA;
 
+	/* Make sure we can write to __wr_after_init during __init */
+	wr_after_init |= 0xAA;
 }
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 08/17] prmem: struct page: track vmap_area
       [not found] <20181023213504.28905-1-igor.stoppa@huawei.com>
                   ` (5 preceding siblings ...)
  2018-10-23 21:34 ` [PATCH 07/17] prmem: lkdtm tests " Igor Stoppa
@ 2018-10-23 21:34 ` Igor Stoppa
  2018-10-24  3:12   ` Matthew Wilcox
  2018-10-23 21:34 ` [PATCH 09/17] prmem: hardened usercopy Igor Stoppa
  7 siblings, 1 reply; 26+ messages in thread
From: Igor Stoppa @ 2018-10-23 21:34 UTC (permalink / raw)
  To: Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

When a page is used for virtual memory, it is often necessary to obtain
a handler to the corresponding vmap_area, which refers to the virtually
continuous area generated, when invoking vmalloc.

The struct page has a "private" field, which can be re-used, to store a
pointer to the parent area.

Note: in practice a virtual memory area is characterized both by a
struct vmap_area and a struct vm_struct.

The reason for referring from a page to its vmap_area, rather than to
the vm_struct, is that the vmap_area contains a struct vm_struct *vm
field, which can be used to reach also the information stored in the
corresponding vm_struct. This link, however, is unidirectional, and it's
not possible to easily identify the corresponding vmap_area, given a
reference to a vm_struct.

Furthermore, the struct vmap_area contains a list head node which is
normally used only when it's queued for free and can be put to some
other use during normal operations.

The connection between each page and its vmap_area avoids more expensive
searches through the btree of vmap_areas.

Therefore, it is possible for find_vamp_area to be again a static
function, while the rest of the code will rely on the direct reference
from struct page.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>
CC: Michal Hocko <mhocko@kernel.org>
CC: Vlastimil Babka <vbabka@suse.cz>
CC: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Tatashin <pasha.tatashin@oracle.com>
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 include/linux/mm_types.h | 25 ++++++++++++++++++-------
 include/linux/prmem.h    | 13 ++++++++-----
 include/linux/vmalloc.h  |  1 -
 mm/prmem.c               |  2 +-
 mm/test_pmalloc.c        | 12 ++++--------
 mm/vmalloc.c             |  9 ++++++++-
 6 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5ed8f6292a53..8403bdd12d1f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -87,13 +87,24 @@ struct page {
 			/* See page-flags.h for PAGE_MAPPING_FLAGS */
 			struct address_space *mapping;
 			pgoff_t index;		/* Our offset within mapping. */
-			/**
-			 * @private: Mapping-private opaque data.
-			 * Usually used for buffer_heads if PagePrivate.
-			 * Used for swp_entry_t if PageSwapCache.
-			 * Indicates order in the buddy system if PageBuddy.
-			 */
-			unsigned long private;
+			union {
+				/**
+				 * @private: Mapping-private opaque data.
+				 * Usually used for buffer_heads if
+				 * PagePrivate.
+				 * Used for swp_entry_t if PageSwapCache.
+				 * Indicates order in the buddy system if
+				 * PageBuddy.
+				 */
+				unsigned long private;
+				/**
+				 * @area: reference to the containing area
+				 * For pages that are mapped into a virtually
+				 * contiguous area, avoids performing a more
+				 * expensive lookup.
+				 */
+				struct vmap_area *area;
+			};
 		};
 		struct {	/* slab, slob and slub */
 			union {
diff --git a/include/linux/prmem.h b/include/linux/prmem.h
index 26fd48410d97..cf713fc1c8bb 100644
--- a/include/linux/prmem.h
+++ b/include/linux/prmem.h
@@ -54,14 +54,17 @@ static __always_inline bool __is_wr_after_init(const void *ptr, size_t size)
 
 static __always_inline bool __is_wr_pool(const void *ptr, size_t size)
 {
-	struct vmap_area *area;
+	struct vm_struct *vm;
+	struct page *page;
 
 	if (!is_vmalloc_addr(ptr))
 		return false;
-	area = find_vmap_area((unsigned long)ptr);
-	return area && area->vm && (area->vm->size >= size) &&
-		((area->vm->flags & (VM_PMALLOC | VM_PMALLOC_WR)) ==
-		 (VM_PMALLOC | VM_PMALLOC_WR));
+	page = vmalloc_to_page(ptr);
+	if (!(page && page->area && page->area->vm))
+		return false;
+	vm = page->area->vm;
+	return ((vm->size >= size) &&
+		((vm->flags & VM_PMALLOC_WR_MASK) == VM_PMALLOC_WR_MASK));
 }
 
 /**
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 4d14a3b8089e..43a444f8b1e9 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -143,7 +143,6 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
 					const void *caller);
 extern struct vm_struct *remove_vm_area(const void *addr);
 extern struct vm_struct *find_vm_area(const void *addr);
-extern struct vmap_area *find_vmap_area(unsigned long addr);
 
 extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
 			struct page **pages);
diff --git a/mm/prmem.c b/mm/prmem.c
index 7dd13ea43304..96abf04909e7 100644
--- a/mm/prmem.c
+++ b/mm/prmem.c
@@ -150,7 +150,7 @@ static int grow(struct pmalloc_pool *pool, size_t min_size)
 	if (WARN(!addr, "Failed to allocate %zd bytes", PAGE_ALIGN(size)))
 		return -ENOMEM;
 
-	new_area = find_vmap_area((uintptr_t)addr);
+	new_area = vmalloc_to_page(addr)->area;
 	tag_mask = VM_PMALLOC;
 	if (pool->mode & PMALLOC_WR)
 		tag_mask |= VM_PMALLOC_WR;
diff --git a/mm/test_pmalloc.c b/mm/test_pmalloc.c
index f9ee8fb29eea..c64872ff05ea 100644
--- a/mm/test_pmalloc.c
+++ b/mm/test_pmalloc.c
@@ -38,15 +38,11 @@ static bool is_address_protected(void *p)
 	if (unlikely(!is_vmalloc_addr(p)))
 		return false;
 	page = vmalloc_to_page(p);
-	if (unlikely(!page))
+	if (unlikely(!(page && page->area && page->area->vm)))
 		return false;
-	wmb(); /* Flush changes to the page table - is it needed? */
-	area = find_vmap_area((uintptr_t)p);
-	if (unlikely((!area) || (!area->vm) ||
-		     ((area->vm->flags & VM_PMALLOC_PROTECTED_MASK) !=
-		      VM_PMALLOC_PROTECTED_MASK)))
-		return false;
-	return true;
+	area = page->area;
+	return (area->vm->flags & VM_PMALLOC_PROTECTED_MASK) ==
+		VM_PMALLOC_PROTECTED_MASK;
 }
 
 static bool create_and_destroy_pool(void)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 15850005fea5..ffef705f0523 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -742,7 +742,7 @@ static void free_unmap_vmap_area(struct vmap_area *va)
 	free_vmap_area_noflush(va);
 }
 
-struct vmap_area *find_vmap_area(unsigned long addr)
+static struct vmap_area *find_vmap_area(unsigned long addr)
 {
 	struct vmap_area *va;
 
@@ -1523,6 +1523,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
 			struct page *page = area->pages[i];
 
 			BUG_ON(!page);
+			page->area = NULL;
 			__free_pages(page, 0);
 		}
 
@@ -1731,8 +1732,10 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
 			const void *caller)
 {
 	struct vm_struct *area;
+	struct vmap_area *va;
 	void *addr;
 	unsigned long real_size = size;
+	unsigned int i;
 
 	size = PAGE_ALIGN(size);
 	if (!size || (size >> PAGE_SHIFT) > totalram_pages)
@@ -1747,6 +1750,10 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
 	if (!addr)
 		return NULL;
 
+	va = __find_vmap_area((unsigned long)addr);
+	for (i = 0; i < va->vm->nr_pages; i++)
+		va->vm->pages[i]->area = va;
+
 	/*
 	 * In this function, newly allocated vm_struct has VM_UNINITIALIZED
 	 * flag. It means that vm_struct is not fully initialized.
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 09/17] prmem: hardened usercopy
       [not found] <20181023213504.28905-1-igor.stoppa@huawei.com>
                   ` (6 preceding siblings ...)
  2018-10-23 21:34 ` [PATCH 08/17] prmem: struct page: track vmap_area Igor Stoppa
@ 2018-10-23 21:34 ` Igor Stoppa
  2018-10-29 11:45   ` Chris von Recklinghausen
  7 siblings, 1 reply; 26+ messages in thread
From: Igor Stoppa @ 2018-10-23 21:34 UTC (permalink / raw)
  To: Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Chris von Recklinghausen, linux-mm, linux-kernel

Prevent leaks of protected memory to userspace.
The protection from overwrited from userspace is already available, once
the memory is write protected.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>
CC: Kees Cook <keescook@chromium.org>
CC: Chris von Recklinghausen <crecklin@redhat.com>
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
 include/linux/prmem.h | 24 ++++++++++++++++++++++++
 mm/usercopy.c         |  5 +++++
 2 files changed, 29 insertions(+)

diff --git a/include/linux/prmem.h b/include/linux/prmem.h
index cf713fc1c8bb..919d853ddc15 100644
--- a/include/linux/prmem.h
+++ b/include/linux/prmem.h
@@ -273,6 +273,30 @@ struct pmalloc_pool {
 	uint8_t mode;
 };
 
+void __noreturn usercopy_abort(const char *name, const char *detail,
+			       bool to_user, unsigned long offset,
+			       unsigned long len);
+
+/**
+ * check_pmalloc_object() - helper for hardened usercopy
+ * @ptr: the beginning of the memory to check
+ * @n: the size of the memory to check
+ * @to_user: copy to userspace or from userspace
+ *
+ * If the check is ok, it will fall-through, otherwise it will abort.
+ * The function is inlined, to minimize the performance impact of the
+ * extra check that can end up on a hot path.
+ * Non-exhaustive micro benchmarking with QEMU x86_64 shows a reduction of
+ * the time spent in this fragment by 60%, when inlined.
+ */
+static inline
+void check_pmalloc_object(const void *ptr, unsigned long n, bool to_user)
+{
+	if (unlikely(__is_wr_after_init(ptr, n) || __is_wr_pool(ptr, n)))
+		usercopy_abort("pmalloc", "accessing pmalloc obj", to_user,
+			       (const unsigned long)ptr, n);
+}
+
 /*
  * The write rare functionality is fully implemented as __always_inline,
  * to prevent having an internal function call that is capable of modifying
diff --git a/mm/usercopy.c b/mm/usercopy.c
index 852eb4e53f06..a080dd37b684 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -22,8 +22,10 @@
 #include <linux/thread_info.h>
 #include <linux/atomic.h>
 #include <linux/jump_label.h>
+#include <linux/prmem.h>
 #include <asm/sections.h>
 
+
 /*
  * Checks if a given pointer and length is contained by the current
  * stack frame (if possible).
@@ -284,6 +286,9 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user)
 
 	/* Check for object in kernel to avoid text exposure. */
 	check_kernel_text_object((const unsigned long)ptr, n, to_user);
+
+	/* Check if object is from a pmalloc chunk. */
+	check_pmalloc_object(ptr, n, to_user);
 }
 EXPORT_SYMBOL(__check_object_size);
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 08/17] prmem: struct page: track vmap_area
  2018-10-23 21:34 ` [PATCH 08/17] prmem: struct page: track vmap_area Igor Stoppa
@ 2018-10-24  3:12   ` Matthew Wilcox
  2018-10-24 23:01     ` Igor Stoppa
  0 siblings, 1 reply; 26+ messages in thread
From: Matthew Wilcox @ 2018-10-24  3:12 UTC (permalink / raw)
  To: Igor Stoppa
  Cc: Mimi Zohar, Kees Cook, Dave Chinner, James Morris, Michal Hocko,
	kernel-hardening, linux-integrity, linux-security-module,
	igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

On Wed, Oct 24, 2018 at 12:34:55AM +0300, Igor Stoppa wrote:
> The connection between each page and its vmap_area avoids more expensive
> searches through the btree of vmap_areas.

Typo -- it's an rbtree.

> +++ b/include/linux/mm_types.h
> @@ -87,13 +87,24 @@ struct page {
>  			/* See page-flags.h for PAGE_MAPPING_FLAGS */
>  			struct address_space *mapping;
>  			pgoff_t index;		/* Our offset within mapping. */
> -			/**
> -			 * @private: Mapping-private opaque data.
> -			 * Usually used for buffer_heads if PagePrivate.
> -			 * Used for swp_entry_t if PageSwapCache.
> -			 * Indicates order in the buddy system if PageBuddy.
> -			 */
> -			unsigned long private;
> +			union {
> +				/**
> +				 * @private: Mapping-private opaque data.
> +				 * Usually used for buffer_heads if
> +				 * PagePrivate.
> +				 * Used for swp_entry_t if PageSwapCache.
> +				 * Indicates order in the buddy system if
> +				 * PageBuddy.
> +				 */
> +				unsigned long private;
> +				/**
> +				 * @area: reference to the containing area
> +				 * For pages that are mapped into a virtually
> +				 * contiguous area, avoids performing a more
> +				 * expensive lookup.
> +				 */
> +				struct vmap_area *area;
> +			};

Not like this.  Make it part of a different struct in the existing union,
not a part of the pagecache struct.  And there's no need to use ->private
explicitly.

> @@ -1747,6 +1750,10 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
>  	if (!addr)
>  		return NULL;
>  
> +	va = __find_vmap_area((unsigned long)addr);
> +	for (i = 0; i < va->vm->nr_pages; i++)
> +		va->vm->pages[i]->area = va;

I don't like it that you're calling this for _every_ vmalloc() caller
when most of them will never use this.  Perhaps have page->va be initially
NULL and then cache the lookup in it when it's accessed for the first time.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 06/17] prmem: test cases for memory protection
  2018-10-23 21:34 ` [PATCH 06/17] prmem: test cases for memory protection Igor Stoppa
@ 2018-10-24  3:27   ` Randy Dunlap
  2018-10-24 14:24     ` Igor Stoppa
  2018-10-25 16:43   ` Dave Hansen
  1 sibling, 1 reply; 26+ messages in thread
From: Randy Dunlap @ 2018-10-24  3:27 UTC (permalink / raw)
  To: Igor Stoppa, Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

On 10/23/18 2:34 PM, Igor Stoppa wrote:
> diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
> index 9a7b8b049d04..57de5b3c0bae 100644
> --- a/mm/Kconfig.debug
> +++ b/mm/Kconfig.debug
> @@ -94,3 +94,12 @@ config DEBUG_RODATA_TEST
>      depends on STRICT_KERNEL_RWX
>      ---help---
>        This option enables a testcase for the setting rodata read-only.
> +
> +config DEBUG_PRMEM_TEST
> +    tristate "Run self test for protected memory"
> +    depends on STRICT_KERNEL_RWX
> +    select PRMEM
> +    default n
> +    help
> +      Tries to verify that the memory protection works correctly and that
> +      the memory is effectively protected.

Hi,

a. It seems backwards (or upside down) to have a test case select a feature (PRMEM)
instead of depending on that feature.

b. Since PRMEM depends on MMU (in patch 04/17), the "select" here could try to
enabled PRMEM even when MMU is not enabled.

Changing this to "depends on PRMEM" would solve both of these issues.

c. Don't use "default n".  That is already the default.


thanks,
-- 
~Randy

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 06/17] prmem: test cases for memory protection
  2018-10-24  3:27   ` Randy Dunlap
@ 2018-10-24 14:24     ` Igor Stoppa
  0 siblings, 0 replies; 26+ messages in thread
From: Igor Stoppa @ 2018-10-24 14:24 UTC (permalink / raw)
  To: Randy Dunlap, Mimi Zohar, Kees Cook, Matthew Wilcox,
	Dave Chinner, James Morris, Michal Hocko, kernel-hardening,
	linux-integrity, linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

Hi,

On 24/10/18 06:27, Randy Dunlap wrote:

> a. It seems backwards (or upside down) to have a test case select a feature (PRMEM)
> instead of depending on that feature.
> 
> b. Since PRMEM depends on MMU (in patch 04/17), the "select" here could try to
> enabled PRMEM even when MMU is not enabled.
> 
> Changing this to "depends on PRMEM" would solve both of these issues.

The weird dependency you pointed out is partially caused by the 
incompleteness of PRMEM.

What I have in mind is to have a fallback version of it for systems 
without MMU capable of write protection.
Possibly defaulting to kvmalloc.
In that case there would not be any need for a configuration option.

> c. Don't use "default n".  That is already the default.

ok

--
igor

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 08/17] prmem: struct page: track vmap_area
  2018-10-24  3:12   ` Matthew Wilcox
@ 2018-10-24 23:01     ` Igor Stoppa
  2018-10-25  2:13       ` Matthew Wilcox
  0 siblings, 1 reply; 26+ messages in thread
From: Igor Stoppa @ 2018-10-24 23:01 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Mimi Zohar, Kees Cook, Dave Chinner, James Morris, Michal Hocko,
	kernel-hardening, linux-integrity, linux-security-module,
	igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel



On 24/10/2018 06:12, Matthew Wilcox wrote:
> On Wed, Oct 24, 2018 at 12:34:55AM +0300, Igor Stoppa wrote:
>> The connection between each page and its vmap_area avoids more expensive
>> searches through the btree of vmap_areas.
> 
> Typo -- it's an rbtree.

ack

>> +++ b/include/linux/mm_types.h
>> @@ -87,13 +87,24 @@ struct page {
>>   			/* See page-flags.h for PAGE_MAPPING_FLAGS */
>>   			struct address_space *mapping;
>>   			pgoff_t index;		/* Our offset within mapping. */
>> -			/**
>> -			 * @private: Mapping-private opaque data.
>> -			 * Usually used for buffer_heads if PagePrivate.
>> -			 * Used for swp_entry_t if PageSwapCache.
>> -			 * Indicates order in the buddy system if PageBuddy.
>> -			 */
>> -			unsigned long private;
>> +			union {
>> +				/**
>> +				 * @private: Mapping-private opaque data.
>> +				 * Usually used for buffer_heads if
>> +				 * PagePrivate.
>> +				 * Used for swp_entry_t if PageSwapCache.
>> +				 * Indicates order in the buddy system if
>> +				 * PageBuddy.
>> +				 */
>> +				unsigned long private;
>> +				/**
>> +				 * @area: reference to the containing area
>> +				 * For pages that are mapped into a virtually
>> +				 * contiguous area, avoids performing a more
>> +				 * expensive lookup.
>> +				 */
>> +				struct vmap_area *area;
>> +			};
> 
> Not like this.  Make it part of a different struct in the existing union,
> not a part of the pagecache struct.  And there's no need to use ->private
> explicitly.

Ok, I'll have a look at the googledoc you made

>> @@ -1747,6 +1750,10 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
>>   	if (!addr)
>>   		return NULL;
>>   
>> +	va = __find_vmap_area((unsigned long)addr);
>> +	for (i = 0; i < va->vm->nr_pages; i++)
>> +		va->vm->pages[i]->area = va;
> 
> I don't like it that you're calling this for _every_ vmalloc() caller
> when most of them will never use this.  Perhaps have page->va be initially
> NULL and then cache the lookup in it when it's accessed for the first time.
> 

If __find_vmap_area() was part of the API, this loop could be left out 
from __vmalloc_node_range() and the user of the allocation could 
initialize the field, if needed.

What is the reason for keeping __find_vmap_area() private?

--
igor

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 02/17] prmem: write rare for static allocation
  2018-10-23 21:34 ` [PATCH 02/17] prmem: write rare for static allocation Igor Stoppa
@ 2018-10-25  0:24   ` Dave Hansen
  2018-10-29 18:03     ` Igor Stoppa
  2018-10-26  9:41   ` Peter Zijlstra
  1 sibling, 1 reply; 26+ messages in thread
From: Dave Hansen @ 2018-10-25  0:24 UTC (permalink / raw)
  To: Igor Stoppa, Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

> +static __always_inline bool __is_wr_after_init(const void *ptr, size_t size)
> +{
> +	size_t start = (size_t)&__start_wr_after_init;
> +	size_t end = (size_t)&__end_wr_after_init;
> +	size_t low = (size_t)ptr;
> +	size_t high = (size_t)ptr + size;
> +
> +	return likely(start <= low && low < high && high <= end);
> +}

size_t is an odd type choice for doing address arithmetic.

> +/**
> + * wr_memset() - sets n bytes of the destination to the c value
> + * @dst: beginning of the memory to write to
> + * @c: byte to replicate
> + * @size: amount of bytes to copy
> + *
> + * Returns true on success, false otherwise.
> + */
> +static __always_inline
> +bool wr_memset(const void *dst, const int c, size_t n_bytes)
> +{
> +	size_t size;
> +	unsigned long flags;
> +	uintptr_t d = (uintptr_t)dst;
> +
> +	if (WARN(!__is_wr_after_init(dst, n_bytes), WR_ERR_RANGE_MSG))
> +		return false;
> +	while (n_bytes) {
> +		struct page *page;
> +		uintptr_t base;
> +		uintptr_t offset;
> +		uintptr_t offset_complement;

Again, these are really odd choices for types.  vmap() returns a void*
pointer, on which you can do arithmetic.  Why bother keeping another
type to which you have to cast to and from?

BTW, our usual "pointer stored in an integer type" is 'unsigned long',
if a pointer needs to be manipulated.

> +		local_irq_save(flags);

Why are you doing the local_irq_save()?

> +		page = virt_to_page(d);
> +		offset = d & ~PAGE_MASK;
> +		offset_complement = PAGE_SIZE - offset;
> +		size = min(n_bytes, offset_complement);
> +		base = (uintptr_t)vmap(&page, 1, VM_MAP, PAGE_KERNEL);

Can you even call vmap() (which sleeps) with interrupts off?

> +		if (WARN(!base, WR_ERR_PAGE_MSG)) {
> +			local_irq_restore(flags);
> +			return false;
> +		}

You really need some kmap_atomic()-style accessors to wrap this stuff
for you.  This little pattern is repeated over and over.

...
> +const char WR_ERR_RANGE_MSG[] = "Write rare on invalid memory range.";
> +const char WR_ERR_PAGE_MSG[] = "Failed to remap write rare page.";

Doesn't the compiler de-duplicate duplicated strings for you?  Is there
any reason to declare these like this?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 03/17] prmem: vmalloc support for dynamic allocation
  2018-10-23 21:34 ` [PATCH 03/17] prmem: vmalloc support for dynamic allocation Igor Stoppa
@ 2018-10-25  0:26   ` Dave Hansen
  2018-10-29 18:07     ` Igor Stoppa
  0 siblings, 1 reply; 26+ messages in thread
From: Dave Hansen @ 2018-10-25  0:26 UTC (permalink / raw)
  To: Igor Stoppa, Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Andrew Morton, Chintan Pandya, Joe Perches, Luis R. Rodriguez,
	Thomas Gleixner, Kate Stewart, Greg Kroah-Hartman,
	Philippe Ombredanne, linux-mm, linux-kernel

On 10/23/18 2:34 PM, Igor Stoppa wrote:
> +#define VM_PMALLOC		0x00000100	/* pmalloc area - see docs */
> +#define VM_PMALLOC_WR		0x00000200	/* pmalloc write rare area */
> +#define VM_PMALLOC_PROTECTED	0x00000400	/* pmalloc protected area */

Please introduce things as you use them.  It's impossible to review a
patch that just says "see docs" that doesn't contain any docs. :)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 05/17] prmem: shorthands for write rare on common types
  2018-10-23 21:34 ` [PATCH 05/17] prmem: shorthands for write rare on common types Igor Stoppa
@ 2018-10-25  0:28   ` Dave Hansen
  2018-10-29 18:12     ` Igor Stoppa
  0 siblings, 1 reply; 26+ messages in thread
From: Dave Hansen @ 2018-10-25  0:28 UTC (permalink / raw)
  To: Igor Stoppa, Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

On 10/23/18 2:34 PM, Igor Stoppa wrote:
> Wrappers around the basic write rare functionality, addressing several
> common data types found in the kernel, allowing to specify the new
> values through immediates, like constants and defines.

I have to wonder whether this is the right way, or whether a
size-agnostic function like put_user() is the right way.  put_user() is
certainly easier to use.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 08/17] prmem: struct page: track vmap_area
  2018-10-24 23:01     ` Igor Stoppa
@ 2018-10-25  2:13       ` Matthew Wilcox
  2018-10-29 18:21         ` Igor Stoppa
  0 siblings, 1 reply; 26+ messages in thread
From: Matthew Wilcox @ 2018-10-25  2:13 UTC (permalink / raw)
  To: Igor Stoppa
  Cc: Mimi Zohar, Kees Cook, Dave Chinner, James Morris, Michal Hocko,
	kernel-hardening, linux-integrity, linux-security-module,
	igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

On Thu, Oct 25, 2018 at 02:01:02AM +0300, Igor Stoppa wrote:
> > > @@ -1747,6 +1750,10 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
> > >   	if (!addr)
> > >   		return NULL;
> > > +	va = __find_vmap_area((unsigned long)addr);
> > > +	for (i = 0; i < va->vm->nr_pages; i++)
> > > +		va->vm->pages[i]->area = va;
> > 
> > I don't like it that you're calling this for _every_ vmalloc() caller
> > when most of them will never use this.  Perhaps have page->va be initially
> > NULL and then cache the lookup in it when it's accessed for the first time.
> > 
> 
> If __find_vmap_area() was part of the API, this loop could be left out from
> __vmalloc_node_range() and the user of the allocation could initialize the
> field, if needed.
> 
> What is the reason for keeping __find_vmap_area() private?

Well, for one, you're walking the rbtree without holding the spinlock,
so you're going to get crashes.  I don't see why we shouldn't export
find_vmap_area() though.

Another way we could approach this is to embed the vmap_area in the
vm_struct.  It'd require a bit of juggling of the alloc/free paths in
vmalloc, but it might be worthwhile.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 06/17] prmem: test cases for memory protection
  2018-10-23 21:34 ` [PATCH 06/17] prmem: test cases for memory protection Igor Stoppa
  2018-10-24  3:27   ` Randy Dunlap
@ 2018-10-25 16:43   ` Dave Hansen
  2018-10-29 18:16     ` Igor Stoppa
  1 sibling, 1 reply; 26+ messages in thread
From: Dave Hansen @ 2018-10-25 16:43 UTC (permalink / raw)
  To: Igor Stoppa, Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

> +static bool is_address_protected(void *p)
> +{
> +	struct page *page;
> +	struct vmap_area *area;
> +
> +	if (unlikely(!is_vmalloc_addr(p)))
> +		return false;
> +	page = vmalloc_to_page(p);
> +	if (unlikely(!page))
> +		return false;
> +	wmb(); /* Flush changes to the page table - is it needed? */

No.

The rest of this is just pretty verbose and seems to have been very
heavily copied and pasted.  I guess that's OK for test code, though.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 02/17] prmem: write rare for static allocation
  2018-10-23 21:34 ` [PATCH 02/17] prmem: write rare for static allocation Igor Stoppa
  2018-10-25  0:24   ` Dave Hansen
@ 2018-10-26  9:41   ` Peter Zijlstra
  2018-10-29 20:01     ` Igor Stoppa
  1 sibling, 1 reply; 26+ messages in thread
From: Peter Zijlstra @ 2018-10-26  9:41 UTC (permalink / raw)
  To: Igor Stoppa
  Cc: Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module, igor.stoppa, Dave Hansen, Jonathan Corbet,
	Laura Abbott, Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

On Wed, Oct 24, 2018 at 12:34:49AM +0300, Igor Stoppa wrote:
> +static __always_inline

That's far too large for inline.

> +bool wr_memset(const void *dst, const int c, size_t n_bytes)
> +{
> +	size_t size;
> +	unsigned long flags;
> +	uintptr_t d = (uintptr_t)dst;
> +
> +	if (WARN(!__is_wr_after_init(dst, n_bytes), WR_ERR_RANGE_MSG))
> +		return false;
> +	while (n_bytes) {
> +		struct page *page;
> +		uintptr_t base;
> +		uintptr_t offset;
> +		uintptr_t offset_complement;
> +
> +		local_irq_save(flags);
> +		page = virt_to_page(d);
> +		offset = d & ~PAGE_MASK;
> +		offset_complement = PAGE_SIZE - offset;
> +		size = min(n_bytes, offset_complement);
> +		base = (uintptr_t)vmap(&page, 1, VM_MAP, PAGE_KERNEL);
> +		if (WARN(!base, WR_ERR_PAGE_MSG)) {
> +			local_irq_restore(flags);
> +			return false;
> +		}
> +		memset((void *)(base + offset), c, size);
> +		vunmap((void *)base);

BUG

> +		d += size;
> +		n_bytes -= size;
> +		local_irq_restore(flags);
> +	}
> +	return true;
> +}
> +
> +static __always_inline

Similarly large

> +bool wr_memcpy(const void *dst, const void *src, size_t n_bytes)
> +{
> +	size_t size;
> +	unsigned long flags;
> +	uintptr_t d = (uintptr_t)dst;
> +	uintptr_t s = (uintptr_t)src;
> +
> +	if (WARN(!__is_wr_after_init(dst, n_bytes), WR_ERR_RANGE_MSG))
> +		return false;
> +	while (n_bytes) {
> +		struct page *page;
> +		uintptr_t base;
> +		uintptr_t offset;
> +		uintptr_t offset_complement;
> +
> +		local_irq_save(flags);
> +		page = virt_to_page(d);
> +		offset = d & ~PAGE_MASK;
> +		offset_complement = PAGE_SIZE - offset;
> +		size = (size_t)min(n_bytes, offset_complement);
> +		base = (uintptr_t)vmap(&page, 1, VM_MAP, PAGE_KERNEL);
> +		if (WARN(!base, WR_ERR_PAGE_MSG)) {
> +			local_irq_restore(flags);
> +			return false;
> +		}
> +		__write_once_size((void *)(base + offset), (void *)s, size);
> +		vunmap((void *)base);

Similarly BUG.

> +		d += size;
> +		s += size;
> +		n_bytes -= size;
> +		local_irq_restore(flags);
> +	}
> +	return true;
> +}

> +static __always_inline

Guess what..

> +uintptr_t __wr_rcu_ptr(const void *dst_p_p, const void *src_p)
> +{
> +	unsigned long flags;
> +	struct page *page;
> +	void *base;
> +	uintptr_t offset;
> +	const size_t size = sizeof(void *);
> +
> +	if (WARN(!__is_wr_after_init(dst_p_p, size), WR_ERR_RANGE_MSG))
> +		return (uintptr_t)NULL;
> +	local_irq_save(flags);
> +	page = virt_to_page(dst_p_p);
> +	offset = (uintptr_t)dst_p_p & ~PAGE_MASK;
> +	base = vmap(&page, 1, VM_MAP, PAGE_KERNEL);
> +	if (WARN(!base, WR_ERR_PAGE_MSG)) {
> +		local_irq_restore(flags);
> +		return (uintptr_t)NULL;
> +	}
> +	rcu_assign_pointer((*(void **)(offset + (uintptr_t)base)), src_p);
> +	vunmap(base);

Also still bug.

> +	local_irq_restore(flags);
> +	return (uintptr_t)src_p;
> +}

Also, I see an amount of duplication here that shows you're not nearly
lazy enough.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/17] prmem: hardened usercopy
  2018-10-23 21:34 ` [PATCH 09/17] prmem: hardened usercopy Igor Stoppa
@ 2018-10-29 11:45   ` Chris von Recklinghausen
  2018-10-29 18:24     ` Igor Stoppa
  0 siblings, 1 reply; 26+ messages in thread
From: Chris von Recklinghausen @ 2018-10-29 11:45 UTC (permalink / raw)
  To: Igor Stoppa, Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	linux-mm, linux-kernel

On 10/23/2018 05:34 PM, Igor Stoppa wrote:
> Prevent leaks of protected memory to userspace.
> The protection from overwrited from userspace is already available, once
> the memory is write protected.
>
> Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>
> CC: Kees Cook <keescook@chromium.org>
> CC: Chris von Recklinghausen <crecklin@redhat.com>
> CC: linux-mm@kvack.org
> CC: linux-kernel@vger.kernel.org
> ---
>  include/linux/prmem.h | 24 ++++++++++++++++++++++++
>  mm/usercopy.c         |  5 +++++
>  2 files changed, 29 insertions(+)
>
> diff --git a/include/linux/prmem.h b/include/linux/prmem.h
> index cf713fc1c8bb..919d853ddc15 100644
> --- a/include/linux/prmem.h
> +++ b/include/linux/prmem.h
> @@ -273,6 +273,30 @@ struct pmalloc_pool {
>  	uint8_t mode;
>  };
>  
> +void __noreturn usercopy_abort(const char *name, const char *detail,
> +			       bool to_user, unsigned long offset,
> +			       unsigned long len);
> +
> +/**
> + * check_pmalloc_object() - helper for hardened usercopy
> + * @ptr: the beginning of the memory to check
> + * @n: the size of the memory to check
> + * @to_user: copy to userspace or from userspace
> + *
> + * If the check is ok, it will fall-through, otherwise it will abort.
> + * The function is inlined, to minimize the performance impact of the
> + * extra check that can end up on a hot path.
> + * Non-exhaustive micro benchmarking with QEMU x86_64 shows a reduction of
> + * the time spent in this fragment by 60%, when inlined.
> + */
> +static inline
> +void check_pmalloc_object(const void *ptr, unsigned long n, bool to_user)
> +{
> +	if (unlikely(__is_wr_after_init(ptr, n) || __is_wr_pool(ptr, n)))
> +		usercopy_abort("pmalloc", "accessing pmalloc obj", to_user,
> +			       (const unsigned long)ptr, n);
> +}
> +
>  /*
>   * The write rare functionality is fully implemented as __always_inline,
>   * to prevent having an internal function call that is capable of modifying
> diff --git a/mm/usercopy.c b/mm/usercopy.c
> index 852eb4e53f06..a080dd37b684 100644
> --- a/mm/usercopy.c
> +++ b/mm/usercopy.c
> @@ -22,8 +22,10 @@
>  #include <linux/thread_info.h>
>  #include <linux/atomic.h>
>  #include <linux/jump_label.h>
> +#include <linux/prmem.h>
>  #include <asm/sections.h>
>  
> +
>  /*
>   * Checks if a given pointer and length is contained by the current
>   * stack frame (if possible).
> @@ -284,6 +286,9 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user)
>  
>  	/* Check for object in kernel to avoid text exposure. */
>  	check_kernel_text_object((const unsigned long)ptr, n, to_user);
> +
> +	/* Check if object is from a pmalloc chunk. */
> +	check_pmalloc_object(ptr, n, to_user);
>  }
>  EXPORT_SYMBOL(__check_object_size);
>  

Could you add code somewhere (lkdtm driver if possible) to demonstrate
the issue and verify the code change?

Thanks,

Chris

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 02/17] prmem: write rare for static allocation
  2018-10-25  0:24   ` Dave Hansen
@ 2018-10-29 18:03     ` Igor Stoppa
  0 siblings, 0 replies; 26+ messages in thread
From: Igor Stoppa @ 2018-10-29 18:03 UTC (permalink / raw)
  To: Dave Hansen, Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

On 25/10/2018 01:24, Dave Hansen wrote:
>> +static __always_inline bool __is_wr_after_init(const void *ptr, size_t size)
>> +{
>> +	size_t start = (size_t)&__start_wr_after_init;
>> +	size_t end = (size_t)&__end_wr_after_init;
>> +	size_t low = (size_t)ptr;
>> +	size_t high = (size_t)ptr + size;
>> +
>> +	return likely(start <= low && low < high && high <= end);
>> +}
> 
> size_t is an odd type choice for doing address arithmetic.

it seemed more portable than unsigned long

>> +/**
>> + * wr_memset() - sets n bytes of the destination to the c value
>> + * @dst: beginning of the memory to write to
>> + * @c: byte to replicate
>> + * @size: amount of bytes to copy
>> + *
>> + * Returns true on success, false otherwise.
>> + */
>> +static __always_inline
>> +bool wr_memset(const void *dst, const int c, size_t n_bytes)
>> +{
>> +	size_t size;
>> +	unsigned long flags;
>> +	uintptr_t d = (uintptr_t)dst;
>> +
>> +	if (WARN(!__is_wr_after_init(dst, n_bytes), WR_ERR_RANGE_MSG))
>> +		return false;
>> +	while (n_bytes) {
>> +		struct page *page;
>> +		uintptr_t base;
>> +		uintptr_t offset;
>> +		uintptr_t offset_complement;
> 
> Again, these are really odd choices for types.  vmap() returns a void*
> pointer, on which you can do arithmetic.  

I wasn't sure of how much I could rely on the compiler not doing some 
unwanted optimizations.

> Why bother keeping another
> type to which you have to cast to and from?

For the above reason. If I'm worrying unnecessarily, I can switch back 
to void *
It certainly is easier to use.

> BTW, our usual "pointer stored in an integer type" is 'unsigned long',
> if a pointer needs to be manipulated.

yes, I noticed that, but it seemed strange ...
size_t corresponds to unsigned long, afaik

but it seems that I have not fully understood where to use it

anyway, I can stick to the convention with unsigned long

> 
>> +		local_irq_save(flags);
> 
> Why are you doing the local_irq_save()?

The idea was to avoid the case where an attack would somehow freeze the 
core doing the write-rare operation, while the temporary mapping is 
accessible.

I have seen comments about using mappings that are private to the 
current core (and I will reply to those comments as well), but this 
approach seems architecture-dependent, while I was looking for a 
solution that, albeit not 100% reliable, would work on any system with 
an MMU. This would not prevent each arch to come up with own custom 
implementation that provides better coverage, performance, etc.

>> +		page = virt_to_page(d);
>> +		offset = d & ~PAGE_MASK;
>> +		offset_complement = PAGE_SIZE - offset;
>> +		size = min(n_bytes, offset_complement);
>> +		base = (uintptr_t)vmap(&page, 1, VM_MAP, PAGE_KERNEL);
> 
> Can you even call vmap() (which sleeps) with interrupts off?

I accidentally disabled sleeping while atomic debugging and I totally 
missed this problem :-(

However, to answer your question, nothing exploded while I was testing 
(without that type of debugging).

I suspect I was just "lucky". Or maybe I was simply not triggering the 
sleeping sub-case.

As I understood the code, sleeping _might_ happen, but it's not going to 
happen systematically.

I wonder if I could split vmap() into two parts: first the sleeping one, 
with interrupts enabled, then the non sleeping one, with interrupts 
disabled.

I need to read the code more carefully, but it seems that sleeping might 
happen when memory for the mapping meta data is not immediately available.

BTW, wouldn't the might_sleep() call belong more to the part which 
really sleeps, instead than to the whole vmap() ?

>> +		if (WARN(!base, WR_ERR_PAGE_MSG)) {
>> +			local_irq_restore(flags);
>> +			return false;
>> +		}
> 
> You really need some kmap_atomic()-style accessors to wrap this stuff
> for you.  This little pattern is repeated over and over.

I really need to learn more about the way the kernel works and is 
structured. It's a work in progress. Thanks for the advice.

> ...
>> +const char WR_ERR_RANGE_MSG[] = "Write rare on invalid memory range.";
>> +const char WR_ERR_PAGE_MSG[] = "Failed to remap write rare page.";
> 
> Doesn't the compiler de-duplicate duplicated strings for you?  Is there
> any reason to declare these like this?

I noticed I have made some accidental modifications in a couple of 
cases, when replicating the command.

So I thought that if I really want to use the same string, why not doing 
it explicitly? It seemed also easier, in case I want to tweak the 
message. I need to do it only in one place.

--
igor

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 03/17] prmem: vmalloc support for dynamic allocation
  2018-10-25  0:26   ` Dave Hansen
@ 2018-10-29 18:07     ` Igor Stoppa
  0 siblings, 0 replies; 26+ messages in thread
From: Igor Stoppa @ 2018-10-29 18:07 UTC (permalink / raw)
  To: Dave Hansen, Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Andrew Morton, Chintan Pandya, Joe Perches, Luis R. Rodriguez,
	Thomas Gleixner, Kate Stewart, Greg Kroah-Hartman,
	Philippe Ombredanne, linux-mm, linux-kernel



On 25/10/2018 01:26, Dave Hansen wrote:
> On 10/23/18 2:34 PM, Igor Stoppa wrote:
>> +#define VM_PMALLOC		0x00000100	/* pmalloc area - see docs */
>> +#define VM_PMALLOC_WR		0x00000200	/* pmalloc write rare area */
>> +#define VM_PMALLOC_PROTECTED	0x00000400	/* pmalloc protected area */
> 
> Please introduce things as you use them.  It's impossible to review a
> patch that just says "see docs" that doesn't contain any docs. :)

Yes, otoh it's a big pain in the neck to keep the docs split into 
smaller patches interleaved with the code, at least while the code is 
still in a flux.

And since the docs refer to the sources, for the automated documentation 
of the API, I cannot just put the documentation at the beginning of the 
patchset.

Can I keep the docs as they are, for now, till the code is more stable?

--
igor

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 05/17] prmem: shorthands for write rare on common types
  2018-10-25  0:28   ` Dave Hansen
@ 2018-10-29 18:12     ` Igor Stoppa
  0 siblings, 0 replies; 26+ messages in thread
From: Igor Stoppa @ 2018-10-29 18:12 UTC (permalink / raw)
  To: Dave Hansen, Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel



On 25/10/2018 01:28, Dave Hansen wrote:
> On 10/23/18 2:34 PM, Igor Stoppa wrote:
>> Wrappers around the basic write rare functionality, addressing several
>> common data types found in the kernel, allowing to specify the new
>> values through immediates, like constants and defines.
> 
> I have to wonder whether this is the right way, or whether a
> size-agnostic function like put_user() is the right way.  put_user() is
> certainly easier to use.

I definitely did not like it either.
But it was the best that came to my mind ...
The main purpose of this code was to show what I wanted to do.
Once more, thanks for pointing out a better way to do it.

--
igor

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 06/17] prmem: test cases for memory protection
  2018-10-25 16:43   ` Dave Hansen
@ 2018-10-29 18:16     ` Igor Stoppa
  0 siblings, 0 replies; 26+ messages in thread
From: Igor Stoppa @ 2018-10-29 18:16 UTC (permalink / raw)
  To: Dave Hansen, Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel



On 25/10/2018 17:43, Dave Hansen wrote:
>> +static bool is_address_protected(void *p)
>> +{
>> +	struct page *page;
>> +	struct vmap_area *area;
>> +
>> +	if (unlikely(!is_vmalloc_addr(p)))
>> +		return false;
>> +	page = vmalloc_to_page(p);
>> +	if (unlikely(!page))
>> +		return false;
>> +	wmb(); /* Flush changes to the page table - is it needed? */
> 
> No.

ok

> The rest of this is just pretty verbose and seems to have been very
> heavily copied and pasted.  I guess that's OK for test code, though.

I was tempted to play with macros, as templates to generate tests on the 
fly, according to the type being passed.

But I was afraid it might generate an even stronger rejection than the 
rest of the patchset already has.

Would it be acceptable/preferable?

--
igor

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 08/17] prmem: struct page: track vmap_area
  2018-10-25  2:13       ` Matthew Wilcox
@ 2018-10-29 18:21         ` Igor Stoppa
  0 siblings, 0 replies; 26+ messages in thread
From: Igor Stoppa @ 2018-10-29 18:21 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Mimi Zohar, Kees Cook, Dave Chinner, James Morris, Michal Hocko,
	kernel-hardening, linux-integrity, linux-security-module,
	igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel

On 25/10/2018 03:13, Matthew Wilcox wrote:
> On Thu, Oct 25, 2018 at 02:01:02AM +0300, Igor Stoppa wrote:
>>>> @@ -1747,6 +1750,10 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
>>>>    	if (!addr)
>>>>    		return NULL;
>>>> +	va = __find_vmap_area((unsigned long)addr);
>>>> +	for (i = 0; i < va->vm->nr_pages; i++)
>>>> +		va->vm->pages[i]->area = va;
>>>
>>> I don't like it that you're calling this for _every_ vmalloc() caller
>>> when most of them will never use this.  Perhaps have page->va be initially
>>> NULL and then cache the lookup in it when it's accessed for the first time.
>>>
>>
>> If __find_vmap_area() was part of the API, this loop could be left out from
>> __vmalloc_node_range() and the user of the allocation could initialize the
>> field, if needed.
>>
>> What is the reason for keeping __find_vmap_area() private?
> 
> Well, for one, you're walking the rbtree without holding the spinlock,
> so you're going to get crashes.  I don't see why we shouldn't export
> find_vmap_area() though.

Argh, yes, sorry. But find_vmap_area() would be enough for what I need.

> Another way we could approach this is to embed the vmap_area in the
> vm_struct.  It'd require a bit of juggling of the alloc/free paths in
> vmalloc, but it might be worthwhile.

I have a feeling of unease about the whole vmap_area / vm_struct 
duality. They clearly are different types, with different purposes, but 
here and there there are functions that are named after some "area", yet 
they actually refer to vm_struct pointers.

I wouldn't mind spending some time understanding the reason for this 
apparently bizarre choice, but after I have emerged from the prmem swamp.

For now I'd stick to find_vmap_area().

--
igor

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/17] prmem: hardened usercopy
  2018-10-29 11:45   ` Chris von Recklinghausen
@ 2018-10-29 18:24     ` Igor Stoppa
  0 siblings, 0 replies; 26+ messages in thread
From: Igor Stoppa @ 2018-10-29 18:24 UTC (permalink / raw)
  To: crecklin, Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module
  Cc: igor.stoppa, Dave Hansen, Jonathan Corbet, Laura Abbott,
	linux-mm, linux-kernel



On 29/10/2018 11:45, Chris von Recklinghausen wrote:

[...]

> Could you add code somewhere (lkdtm driver if possible) to demonstrate
> the issue and verify the code change?

Sure.

Eventually, I'd like to add test cases for each functionality.
I didn't do it right away for those parts which are either not 
immediately needed for the main functionality or I'm still not confident 
enough that they won't change radically.

--

igor

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 02/17] prmem: write rare for static allocation
  2018-10-26  9:41   ` Peter Zijlstra
@ 2018-10-29 20:01     ` Igor Stoppa
  0 siblings, 0 replies; 26+ messages in thread
From: Igor Stoppa @ 2018-10-29 20:01 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Mimi Zohar, Kees Cook, Matthew Wilcox, Dave Chinner,
	James Morris, Michal Hocko, kernel-hardening, linux-integrity,
	linux-security-module, igor.stoppa, Dave Hansen, Jonathan Corbet,
	Laura Abbott, Vlastimil Babka, Kirill A. Shutemov, Andrew Morton,
	Pavel Tatashin, linux-mm, linux-kernel



On 26/10/2018 10:41, Peter Zijlstra wrote:
> On Wed, Oct 24, 2018 at 12:34:49AM +0300, Igor Stoppa wrote:
>> +static __always_inline
> 
> That's far too large for inline.

The reason for it is that it's supposed to minimize the presence of 
gadgets that might be used in JOP attacks.
I am ready to stand corrected, if I'm wrong, but this is the reason why 
I did it.

Regarding the function being too large, yes, I would not normally choose 
it for inlining.

Actually, I would not normally use "__always_inline" and instead I would 
limit myself to plain "inline", at most.

> 
>> +bool wr_memset(const void *dst, const int c, size_t n_bytes)
>> +{
>> +	size_t size;
>> +	unsigned long flags;
>> +	uintptr_t d = (uintptr_t)dst;
>> +
>> +	if (WARN(!__is_wr_after_init(dst, n_bytes), WR_ERR_RANGE_MSG))
>> +		return false;
>> +	while (n_bytes) {
>> +		struct page *page;
>> +		uintptr_t base;
>> +		uintptr_t offset;
>> +		uintptr_t offset_complement;
>> +
>> +		local_irq_save(flags);
>> +		page = virt_to_page(d);
>> +		offset = d & ~PAGE_MASK;
>> +		offset_complement = PAGE_SIZE - offset;
>> +		size = min(n_bytes, offset_complement);
>> +		base = (uintptr_t)vmap(&page, 1, VM_MAP, PAGE_KERNEL);
>> +		if (WARN(!base, WR_ERR_PAGE_MSG)) {
>> +			local_irq_restore(flags);
>> +			return false;
>> +		}
>> +		memset((void *)(base + offset), c, size);
>> +		vunmap((void *)base);
> 
> BUG

yes, somehow I managed to drop this debug configuration from the debug 
builds I made.

[...]

> Also, I see an amount of duplication here that shows you're not nearly
> lazy enough.

I did notice a certain amount of duplication, but I didn't know how to 
exploit it.

--
igor

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2018-10-29 20:01 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20181023213504.28905-1-igor.stoppa@huawei.com>
2018-10-23 21:34 ` [PATCH 02/17] prmem: write rare for static allocation Igor Stoppa
2018-10-25  0:24   ` Dave Hansen
2018-10-29 18:03     ` Igor Stoppa
2018-10-26  9:41   ` Peter Zijlstra
2018-10-29 20:01     ` Igor Stoppa
2018-10-23 21:34 ` [PATCH 03/17] prmem: vmalloc support for dynamic allocation Igor Stoppa
2018-10-25  0:26   ` Dave Hansen
2018-10-29 18:07     ` Igor Stoppa
2018-10-23 21:34 ` [PATCH 04/17] prmem: " Igor Stoppa
2018-10-23 21:34 ` [PATCH 05/17] prmem: shorthands for write rare on common types Igor Stoppa
2018-10-25  0:28   ` Dave Hansen
2018-10-29 18:12     ` Igor Stoppa
2018-10-23 21:34 ` [PATCH 06/17] prmem: test cases for memory protection Igor Stoppa
2018-10-24  3:27   ` Randy Dunlap
2018-10-24 14:24     ` Igor Stoppa
2018-10-25 16:43   ` Dave Hansen
2018-10-29 18:16     ` Igor Stoppa
2018-10-23 21:34 ` [PATCH 07/17] prmem: lkdtm tests " Igor Stoppa
2018-10-23 21:34 ` [PATCH 08/17] prmem: struct page: track vmap_area Igor Stoppa
2018-10-24  3:12   ` Matthew Wilcox
2018-10-24 23:01     ` Igor Stoppa
2018-10-25  2:13       ` Matthew Wilcox
2018-10-29 18:21         ` Igor Stoppa
2018-10-23 21:34 ` [PATCH 09/17] prmem: hardened usercopy Igor Stoppa
2018-10-29 11:45   ` Chris von Recklinghausen
2018-10-29 18:24     ` Igor Stoppa

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).