All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch 1/2] mm, mempool: poison elements backed by slab allocator
@ 2015-03-09  7:21 ` David Rientjes
  0 siblings, 0 replies; 14+ messages in thread
From: David Rientjes @ 2015-03-09  7:21 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Sebastian Ott, Mikulas Patocka, Catalin Marinas, linux-kernel, linux-mm

Mempools keep elements in a reserved pool for contexts in which
allocation may not be possible.  When an element is allocated from the
reserved pool, its memory contents is the same as when it was added to
the reserved pool.

Because of this, elements lack any free poisoning to detect
use-after-free errors.

This patch adds free poisoning for elements backed by the slab allocator.
This is possible because the mempool layer knows the object size of each
element.

When an element is added to the reserved pool, it is poisoned with
POISON_FREE.  When it is removed from the reserved pool, the contents are
checked for POISON_FREE.  If there is a mismatch, a warning is emitted to
the kernel log.

This is only effective for configs with CONFIG_DEBUG_VM.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 mm/mempool.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 63 insertions(+), 2 deletions(-)

diff --git a/mm/mempool.c b/mm/mempool.c
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -16,16 +16,77 @@
 #include <linux/blkdev.h>
 #include <linux/writeback.h>
 
+#ifdef CONFIG_DEBUG_VM
+static void poison_error(mempool_t *pool, void *element, size_t size,
+			 size_t byte)
+{
+	const int nr = pool->curr_nr;
+	const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0);
+	const int end = min_t(int, byte + (BITS_PER_LONG / 8), size);
+	int i;
+
+	pr_err("BUG: mempool element poison mismatch\n");
+	pr_err("Mempool %p size %ld\n", pool, size);
+	pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : "");
+	for (i = start; i < end; i++)
+		pr_cont("%x ", *(u8 *)(element + i));
+	pr_cont("%s\n", end < size ? "..." : "");
+	dump_stack();
+}
+
+static void check_slab_element(mempool_t *pool, void *element)
+{
+	if (pool->free == mempool_free_slab || pool->free == mempool_kfree) {
+		size_t size = ksize(element);
+		u8 *obj = element;
+		size_t i;
+
+		for (i = 0; i < size; i++) {
+			u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
+
+			if (obj[i] != exp) {
+				poison_error(pool, element, size, i);
+				return;
+			}
+		}
+		memset(obj, POISON_INUSE, size);
+	}
+}
+
+static void poison_slab_element(mempool_t *pool, void *element)
+{
+	if (pool->alloc == mempool_alloc_slab ||
+	    pool->alloc == mempool_kmalloc) {
+		size_t size = ksize(element);
+		u8 *obj = element;
+
+		memset(obj, POISON_FREE, size - 1);
+		obj[size - 1] = POISON_END;
+	}
+}
+#else /* CONFIG_DEBUG_VM */
+static inline void check_slab_element(mempool_t *pool, void *element)
+{
+}
+static inline void poison_slab_element(mempool_t *pool, void *element)
+{
+}
+#endif /* CONFIG_DEBUG_VM */
+
 static void add_element(mempool_t *pool, void *element)
 {
 	BUG_ON(pool->curr_nr >= pool->min_nr);
+	poison_slab_element(pool, element);
 	pool->elements[pool->curr_nr++] = element;
 }
 
 static void *remove_element(mempool_t *pool)
 {
-	BUG_ON(pool->curr_nr <= 0);
-	return pool->elements[--pool->curr_nr];
+	void *element = pool->elements[--pool->curr_nr];
+
+	BUG_ON(pool->curr_nr < 0);
+	check_slab_element(pool, element);
+	return element;
 }
 
 /**

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [patch 1/2] mm, mempool: poison elements backed by slab allocator
@ 2015-03-09  7:21 ` David Rientjes
  0 siblings, 0 replies; 14+ messages in thread
From: David Rientjes @ 2015-03-09  7:21 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Sebastian Ott, Mikulas Patocka, Catalin Marinas, linux-kernel, linux-mm

Mempools keep elements in a reserved pool for contexts in which
allocation may not be possible.  When an element is allocated from the
reserved pool, its memory contents is the same as when it was added to
the reserved pool.

Because of this, elements lack any free poisoning to detect
use-after-free errors.

This patch adds free poisoning for elements backed by the slab allocator.
This is possible because the mempool layer knows the object size of each
element.

When an element is added to the reserved pool, it is poisoned with
POISON_FREE.  When it is removed from the reserved pool, the contents are
checked for POISON_FREE.  If there is a mismatch, a warning is emitted to
the kernel log.

This is only effective for configs with CONFIG_DEBUG_VM.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 mm/mempool.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 63 insertions(+), 2 deletions(-)

diff --git a/mm/mempool.c b/mm/mempool.c
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -16,16 +16,77 @@
 #include <linux/blkdev.h>
 #include <linux/writeback.h>
 
+#ifdef CONFIG_DEBUG_VM
+static void poison_error(mempool_t *pool, void *element, size_t size,
+			 size_t byte)
+{
+	const int nr = pool->curr_nr;
+	const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0);
+	const int end = min_t(int, byte + (BITS_PER_LONG / 8), size);
+	int i;
+
+	pr_err("BUG: mempool element poison mismatch\n");
+	pr_err("Mempool %p size %ld\n", pool, size);
+	pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : "");
+	for (i = start; i < end; i++)
+		pr_cont("%x ", *(u8 *)(element + i));
+	pr_cont("%s\n", end < size ? "..." : "");
+	dump_stack();
+}
+
+static void check_slab_element(mempool_t *pool, void *element)
+{
+	if (pool->free == mempool_free_slab || pool->free == mempool_kfree) {
+		size_t size = ksize(element);
+		u8 *obj = element;
+		size_t i;
+
+		for (i = 0; i < size; i++) {
+			u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
+
+			if (obj[i] != exp) {
+				poison_error(pool, element, size, i);
+				return;
+			}
+		}
+		memset(obj, POISON_INUSE, size);
+	}
+}
+
+static void poison_slab_element(mempool_t *pool, void *element)
+{
+	if (pool->alloc == mempool_alloc_slab ||
+	    pool->alloc == mempool_kmalloc) {
+		size_t size = ksize(element);
+		u8 *obj = element;
+
+		memset(obj, POISON_FREE, size - 1);
+		obj[size - 1] = POISON_END;
+	}
+}
+#else /* CONFIG_DEBUG_VM */
+static inline void check_slab_element(mempool_t *pool, void *element)
+{
+}
+static inline void poison_slab_element(mempool_t *pool, void *element)
+{
+}
+#endif /* CONFIG_DEBUG_VM */
+
 static void add_element(mempool_t *pool, void *element)
 {
 	BUG_ON(pool->curr_nr >= pool->min_nr);
+	poison_slab_element(pool, element);
 	pool->elements[pool->curr_nr++] = element;
 }
 
 static void *remove_element(mempool_t *pool)
 {
-	BUG_ON(pool->curr_nr <= 0);
-	return pool->elements[--pool->curr_nr];
+	void *element = pool->elements[--pool->curr_nr];
+
+	BUG_ON(pool->curr_nr < 0);
+	check_slab_element(pool, element);
+	return element;
 }
 
 /**

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [patch 2/2] mm, mempool: poison elements backed by page allocator
  2015-03-09  7:21 ` David Rientjes
@ 2015-03-09  7:22   ` David Rientjes
  -1 siblings, 0 replies; 14+ messages in thread
From: David Rientjes @ 2015-03-09  7:22 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Sebastian Ott, Mikulas Patocka, Catalin Marinas, linux-kernel, linux-mm

Elements backed by the slab allocator are poisoned when added to a
mempool's reserved pool.

It is also possible to poison elements backed by the page allocator
because the mempool layer knows the allocation order.

This patch extends mempool element poisoning to include memory backed by
the page allocator.

This is only effective for configs with CONFIG_DEBUG_VM.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 mm/mempool.c | 74 ++++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 49 insertions(+), 25 deletions(-)

diff --git a/mm/mempool.c b/mm/mempool.c
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -6,6 +6,7 @@
  *  extreme VM load.
  *
  *  started by Ingo Molnar, Copyright (C) 2001
+ *  debugging by David Rientjes, Copyright (C) 2015
  */
 
 #include <linux/mm.h>
@@ -34,41 +35,64 @@ static void poison_error(mempool_t *pool, void *element, size_t size,
 	dump_stack();
 }
 
-static void check_slab_element(mempool_t *pool, void *element)
+static void __check_element(mempool_t *pool, void *element, size_t size)
 {
-	if (pool->free == mempool_free_slab || pool->free == mempool_kfree) {
-		size_t size = ksize(element);
-		u8 *obj = element;
-		size_t i;
-
-		for (i = 0; i < size; i++) {
-			u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
-
-			if (obj[i] != exp) {
-				poison_error(pool, element, size, i);
-				return;
-			}
+	u8 *obj = element;
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
+
+		if (obj[i] != exp) {
+			poison_error(pool, element, size, i);
+			return;
 		}
-		memset(obj, POISON_INUSE, size);
+	}
+	memset(obj, POISON_INUSE, size);
+}
+
+static void check_element(mempool_t *pool, void *element)
+{
+	/* Mempools backed by slab allocator */
+	if (pool->free == mempool_free_slab || pool->free == mempool_kfree)
+		__check_element(pool, element, ksize(element));
+
+	/* Mempools backed by page allocator */
+	if (pool->free == mempool_free_pages) {
+		int order = (int)(long)pool->pool_data;
+		void *addr = page_address(element);
+
+		__check_element(pool, addr, 1UL << (PAGE_SHIFT + order));
 	}
 }
 
-static void poison_slab_element(mempool_t *pool, void *element)
+static void __poison_element(void *element, size_t size)
 {
-	if (pool->alloc == mempool_alloc_slab ||
-	    pool->alloc == mempool_kmalloc) {
-		size_t size = ksize(element);
-		u8 *obj = element;
+	u8 *obj = element;
+
+	memset(obj, POISON_FREE, size - 1);
+	obj[size - 1] = POISON_END;
+}
+
+static void poison_element(mempool_t *pool, void *element)
+{
+	/* Mempools backed by slab allocator */
+	if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+		__poison_element(element, ksize(element));
+
+	/* Mempools backed by page allocator */
+	if (pool->alloc == mempool_alloc_pages) {
+		int order = (int)(long)pool->pool_data;
+		void *addr = page_address(element);
 
-		memset(obj, POISON_FREE, size - 1);
-		obj[size - 1] = POISON_END;
+		__poison_element(addr, 1UL << (PAGE_SHIFT + order));
 	}
 }
 #else /* CONFIG_DEBUG_VM */
-static inline void check_slab_element(mempool_t *pool, void *element)
+static inline void check_element(mempool_t *pool, void *element)
 {
 }
-static inline void poison_slab_element(mempool_t *pool, void *element)
+static inline void poison_element(mempool_t *pool, void *element)
 {
 }
 #endif /* CONFIG_DEBUG_VM */
@@ -76,7 +100,7 @@ static inline void poison_slab_element(mempool_t *pool, void *element)
 static void add_element(mempool_t *pool, void *element)
 {
 	BUG_ON(pool->curr_nr >= pool->min_nr);
-	poison_slab_element(pool, element);
+	poison_element(pool, element);
 	pool->elements[pool->curr_nr++] = element;
 }
 
@@ -85,7 +109,7 @@ static void *remove_element(mempool_t *pool)
 	void *element = pool->elements[--pool->curr_nr];
 
 	BUG_ON(pool->curr_nr < 0);
-	check_slab_element(pool, element);
+	check_element(pool, element);
 	return element;
 }
 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [patch 2/2] mm, mempool: poison elements backed by page allocator
@ 2015-03-09  7:22   ` David Rientjes
  0 siblings, 0 replies; 14+ messages in thread
From: David Rientjes @ 2015-03-09  7:22 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Sebastian Ott, Mikulas Patocka, Catalin Marinas, linux-kernel, linux-mm

Elements backed by the slab allocator are poisoned when added to a
mempool's reserved pool.

It is also possible to poison elements backed by the page allocator
because the mempool layer knows the allocation order.

This patch extends mempool element poisoning to include memory backed by
the page allocator.

This is only effective for configs with CONFIG_DEBUG_VM.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 mm/mempool.c | 74 ++++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 49 insertions(+), 25 deletions(-)

diff --git a/mm/mempool.c b/mm/mempool.c
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -6,6 +6,7 @@
  *  extreme VM load.
  *
  *  started by Ingo Molnar, Copyright (C) 2001
+ *  debugging by David Rientjes, Copyright (C) 2015
  */
 
 #include <linux/mm.h>
@@ -34,41 +35,64 @@ static void poison_error(mempool_t *pool, void *element, size_t size,
 	dump_stack();
 }
 
-static void check_slab_element(mempool_t *pool, void *element)
+static void __check_element(mempool_t *pool, void *element, size_t size)
 {
-	if (pool->free == mempool_free_slab || pool->free == mempool_kfree) {
-		size_t size = ksize(element);
-		u8 *obj = element;
-		size_t i;
-
-		for (i = 0; i < size; i++) {
-			u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
-
-			if (obj[i] != exp) {
-				poison_error(pool, element, size, i);
-				return;
-			}
+	u8 *obj = element;
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
+
+		if (obj[i] != exp) {
+			poison_error(pool, element, size, i);
+			return;
 		}
-		memset(obj, POISON_INUSE, size);
+	}
+	memset(obj, POISON_INUSE, size);
+}
+
+static void check_element(mempool_t *pool, void *element)
+{
+	/* Mempools backed by slab allocator */
+	if (pool->free == mempool_free_slab || pool->free == mempool_kfree)
+		__check_element(pool, element, ksize(element));
+
+	/* Mempools backed by page allocator */
+	if (pool->free == mempool_free_pages) {
+		int order = (int)(long)pool->pool_data;
+		void *addr = page_address(element);
+
+		__check_element(pool, addr, 1UL << (PAGE_SHIFT + order));
 	}
 }
 
-static void poison_slab_element(mempool_t *pool, void *element)
+static void __poison_element(void *element, size_t size)
 {
-	if (pool->alloc == mempool_alloc_slab ||
-	    pool->alloc == mempool_kmalloc) {
-		size_t size = ksize(element);
-		u8 *obj = element;
+	u8 *obj = element;
+
+	memset(obj, POISON_FREE, size - 1);
+	obj[size - 1] = POISON_END;
+}
+
+static void poison_element(mempool_t *pool, void *element)
+{
+	/* Mempools backed by slab allocator */
+	if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+		__poison_element(element, ksize(element));
+
+	/* Mempools backed by page allocator */
+	if (pool->alloc == mempool_alloc_pages) {
+		int order = (int)(long)pool->pool_data;
+		void *addr = page_address(element);
 
-		memset(obj, POISON_FREE, size - 1);
-		obj[size - 1] = POISON_END;
+		__poison_element(addr, 1UL << (PAGE_SHIFT + order));
 	}
 }
 #else /* CONFIG_DEBUG_VM */
-static inline void check_slab_element(mempool_t *pool, void *element)
+static inline void check_element(mempool_t *pool, void *element)
 {
 }
-static inline void poison_slab_element(mempool_t *pool, void *element)
+static inline void poison_element(mempool_t *pool, void *element)
 {
 }
 #endif /* CONFIG_DEBUG_VM */
@@ -76,7 +100,7 @@ static inline void poison_slab_element(mempool_t *pool, void *element)
 static void add_element(mempool_t *pool, void *element)
 {
 	BUG_ON(pool->curr_nr >= pool->min_nr);
-	poison_slab_element(pool, element);
+	poison_element(pool, element);
 	pool->elements[pool->curr_nr++] = element;
 }
 
@@ -85,7 +109,7 @@ static void *remove_element(mempool_t *pool)
 	void *element = pool->elements[--pool->curr_nr];
 
 	BUG_ON(pool->curr_nr < 0);
-	check_slab_element(pool, element);
+	check_element(pool, element);
 	return element;
 }
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [patch 1/2] mm, mempool: poison elements backed by slab allocator
  2015-03-09  7:21 ` David Rientjes
@ 2015-03-12 20:28   ` Andrew Morton
  -1 siblings, 0 replies; 14+ messages in thread
From: Andrew Morton @ 2015-03-12 20:28 UTC (permalink / raw)
  To: David Rientjes
  Cc: Sebastian Ott, Mikulas Patocka, Catalin Marinas, linux-kernel, linux-mm

On Mon, 9 Mar 2015 00:21:56 -0700 (PDT) David Rientjes <rientjes@google.com> wrote:

> Mempools keep elements in a reserved pool for contexts in which
> allocation may not be possible.  When an element is allocated from the
> reserved pool, its memory contents is the same as when it was added to
> the reserved pool.
> 
> Because of this, elements lack any free poisoning to detect
> use-after-free errors.
> 
> This patch adds free poisoning for elements backed by the slab allocator.
> This is possible because the mempool layer knows the object size of each
> element.
> 
> When an element is added to the reserved pool, it is poisoned with
> POISON_FREE.  When it is removed from the reserved pool, the contents are
> checked for POISON_FREE.  If there is a mismatch, a warning is emitted to
> the kernel log.
> 
> This is only effective for configs with CONFIG_DEBUG_VM.

At present CONFIG_DEBUG_VM is pretty lightweight (I hope) and using it
for mempool poisoning might be inappropriately costly.  Would it be
better to tie this to something else?  Either standalone or reuse some
slab debug option, perhaps.

Did you measure the overhead btw?  It might be significant with fast
devices.

> --- a/mm/mempool.c
> +++ b/mm/mempool.c
> @@ -16,16 +16,77 @@
>  #include <linux/blkdev.h>
>  #include <linux/writeback.h>
>  
> +#ifdef CONFIG_DEBUG_VM
> +static void poison_error(mempool_t *pool, void *element, size_t size,
> +			 size_t byte)
> +{
> +	const int nr = pool->curr_nr;
> +	const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0);
> +	const int end = min_t(int, byte + (BITS_PER_LONG / 8), size);
> +	int i;
> +
> +	pr_err("BUG: mempool element poison mismatch\n");
> +	pr_err("Mempool %p size %ld\n", pool, size);
> +	pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : "");
> +	for (i = start; i < end; i++)
> +		pr_cont("%x ", *(u8 *)(element + i));
> +	pr_cont("%s\n", end < size ? "..." : "");
> +	dump_stack();
> +}

"byte" wasn't a very useful identifier, and it's called "i" in
check_slab_element().  Rename it to "offset" in both places?

> +static void check_slab_element(mempool_t *pool, void *element)
> +{
> +	if (pool->free == mempool_free_slab || pool->free == mempool_kfree) {
> +		size_t size = ksize(element);
> +		u8 *obj = element;
> +		size_t i;
> +
> +		for (i = 0; i < size; i++) {
> +			u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
> +
> +			if (obj[i] != exp) {
> +				poison_error(pool, element, size, i);
> +				return;
> +			}
> +		}
> +		memset(obj, POISON_INUSE, size);
> +	}
> +}

I question the reuse of POISON_FREE/POISON_INUSE.  If this thing
triggers, it may be hard to tell if it was due to a slab thing or to a
mempool thing.  Using a distinct poison pattern for mempool would clear
that up?

> ...

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [patch 1/2] mm, mempool: poison elements backed by slab allocator
@ 2015-03-12 20:28   ` Andrew Morton
  0 siblings, 0 replies; 14+ messages in thread
From: Andrew Morton @ 2015-03-12 20:28 UTC (permalink / raw)
  To: David Rientjes
  Cc: Sebastian Ott, Mikulas Patocka, Catalin Marinas, linux-kernel, linux-mm

On Mon, 9 Mar 2015 00:21:56 -0700 (PDT) David Rientjes <rientjes@google.com> wrote:

> Mempools keep elements in a reserved pool for contexts in which
> allocation may not be possible.  When an element is allocated from the
> reserved pool, its memory contents is the same as when it was added to
> the reserved pool.
> 
> Because of this, elements lack any free poisoning to detect
> use-after-free errors.
> 
> This patch adds free poisoning for elements backed by the slab allocator.
> This is possible because the mempool layer knows the object size of each
> element.
> 
> When an element is added to the reserved pool, it is poisoned with
> POISON_FREE.  When it is removed from the reserved pool, the contents are
> checked for POISON_FREE.  If there is a mismatch, a warning is emitted to
> the kernel log.
> 
> This is only effective for configs with CONFIG_DEBUG_VM.

At present CONFIG_DEBUG_VM is pretty lightweight (I hope) and using it
for mempool poisoning might be inappropriately costly.  Would it be
better to tie this to something else?  Either standalone or reuse some
slab debug option, perhaps.

Did you measure the overhead btw?  It might be significant with fast
devices.

> --- a/mm/mempool.c
> +++ b/mm/mempool.c
> @@ -16,16 +16,77 @@
>  #include <linux/blkdev.h>
>  #include <linux/writeback.h>
>  
> +#ifdef CONFIG_DEBUG_VM
> +static void poison_error(mempool_t *pool, void *element, size_t size,
> +			 size_t byte)
> +{
> +	const int nr = pool->curr_nr;
> +	const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0);
> +	const int end = min_t(int, byte + (BITS_PER_LONG / 8), size);
> +	int i;
> +
> +	pr_err("BUG: mempool element poison mismatch\n");
> +	pr_err("Mempool %p size %ld\n", pool, size);
> +	pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : "");
> +	for (i = start; i < end; i++)
> +		pr_cont("%x ", *(u8 *)(element + i));
> +	pr_cont("%s\n", end < size ? "..." : "");
> +	dump_stack();
> +}

"byte" wasn't a very useful identifier, and it's called "i" in
check_slab_element().  Rename it to "offset" in both places?

> +static void check_slab_element(mempool_t *pool, void *element)
> +{
> +	if (pool->free == mempool_free_slab || pool->free == mempool_kfree) {
> +		size_t size = ksize(element);
> +		u8 *obj = element;
> +		size_t i;
> +
> +		for (i = 0; i < size; i++) {
> +			u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
> +
> +			if (obj[i] != exp) {
> +				poison_error(pool, element, size, i);
> +				return;
> +			}
> +		}
> +		memset(obj, POISON_INUSE, size);
> +	}
> +}

I question the reuse of POISON_FREE/POISON_INUSE.  If this thing
triggers, it may be hard to tell if it was due to a slab thing or to a
mempool thing.  Using a distinct poison pattern for mempool would clear
that up?

> ...

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [patch 1/2] mm, mempool: poison elements backed by slab allocator
  2015-03-12 20:28   ` Andrew Morton
@ 2015-03-14  0:06     ` David Rientjes
  -1 siblings, 0 replies; 14+ messages in thread
From: David Rientjes @ 2015-03-14  0:06 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Sebastian Ott, Mikulas Patocka, Catalin Marinas, linux-kernel, linux-mm

On Thu, 12 Mar 2015, Andrew Morton wrote:

> > Mempools keep elements in a reserved pool for contexts in which
> > allocation may not be possible.  When an element is allocated from the
> > reserved pool, its memory contents is the same as when it was added to
> > the reserved pool.
> > 
> > Because of this, elements lack any free poisoning to detect
> > use-after-free errors.
> > 
> > This patch adds free poisoning for elements backed by the slab allocator.
> > This is possible because the mempool layer knows the object size of each
> > element.
> > 
> > When an element is added to the reserved pool, it is poisoned with
> > POISON_FREE.  When it is removed from the reserved pool, the contents are
> > checked for POISON_FREE.  If there is a mismatch, a warning is emitted to
> > the kernel log.
> > 
> > This is only effective for configs with CONFIG_DEBUG_VM.
> 
> At present CONFIG_DEBUG_VM is pretty lightweight (I hope) and using it
> for mempool poisoning might be inappropriately costly.  Would it be
> better to tie this to something else?  Either standalone or reuse some
> slab debug option, perhaps.
> 

Ok, I agree.  I'll use CONFIG_DEBUG_SLAB and CONFIG_SLUB_DEBUG_ON and 
allow it to be enabled by slub debugging when that is enabled.  It 
probably doesn't make a lot of sense to do mempool poisoning without slab 
poisoning.

> Did you measure the overhead btw?  It might be significant with fast
> devices.
> 

It's certainly costly: with a new 128-byte slab cache, allocating 64 
objects took about 480 cycles longer per object to do the poison checking 
and in-use poisoning on one of my 2.2GHz machines (~90 cycles/object 
without CONFIG_DEBUG_VM).  To do the free poisoning, it was about ~130 
cycles longer per object (~140 cycles/object without CONFIG_DEBUG_VM).

For cache cold pages from the page allocator, it's more expensive, 
allocating and freeing 64 pages, it's ~620 cycles longer per page and 
freeing is an additional ~60 cycles/page.

Keep in mind that overhead is only incurred when the mempool alloc 
function fails to allocate memory directly from the slab allocator or page 
allocator in the given context and on mempool_create() to create the new 
mempool.

I didn't benchmark high-order page poisoning, but that's only used by 
bcache and I'm looking at that separately: allocating high-order pages 
from a mempool sucks.

> > --- a/mm/mempool.c
> > +++ b/mm/mempool.c
> > @@ -16,16 +16,77 @@
> >  #include <linux/blkdev.h>
> >  #include <linux/writeback.h>
> >  
> > +#ifdef CONFIG_DEBUG_VM
> > +static void poison_error(mempool_t *pool, void *element, size_t size,
> > +			 size_t byte)
> > +{
> > +	const int nr = pool->curr_nr;
> > +	const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0);
> > +	const int end = min_t(int, byte + (BITS_PER_LONG / 8), size);
> > +	int i;
> > +
> > +	pr_err("BUG: mempool element poison mismatch\n");
> > +	pr_err("Mempool %p size %ld\n", pool, size);
> > +	pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : "");
> > +	for (i = start; i < end; i++)
> > +		pr_cont("%x ", *(u8 *)(element + i));
> > +	pr_cont("%s\n", end < size ? "..." : "");
> > +	dump_stack();
> > +}
> 
> "byte" wasn't a very useful identifier, and it's called "i" in
> check_slab_element().  Rename it to "offset" in both places?
> 
> > +static void check_slab_element(mempool_t *pool, void *element)
> > +{
> > +	if (pool->free == mempool_free_slab || pool->free == mempool_kfree) {
> > +		size_t size = ksize(element);
> > +		u8 *obj = element;
> > +		size_t i;
> > +
> > +		for (i = 0; i < size; i++) {
> > +			u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
> > +
> > +			if (obj[i] != exp) {
> > +				poison_error(pool, element, size, i);
> > +				return;
> > +			}
> > +		}
> > +		memset(obj, POISON_INUSE, size);
> > +	}
> > +}
> 
> I question the reuse of POISON_FREE/POISON_INUSE.  If this thing
> triggers, it may be hard to tell if it was due to a slab thing or to a
> mempool thing.  Using a distinct poison pattern for mempool would clear
> that up?
> 

Hmm, I think it would actually make it more confusing: mempools only 
allocate from the reserved pool (those poisoned by this patchset) when 
doing kmalloc() or kmem_cache_free() in context fails.  Normally, the 
reserved pool isn't used because there are free objects sitting on slab 
free or partial slabs and the context is irrelevant.  If slab poisoning is 
enabled, they are already POISON_FREE as anticipated.  We only fallback to 
the reserved pool when new slab needs to be allocated and fails in the 
given context, so the poison value would differ depending on where the 
objects came from.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [patch 1/2] mm, mempool: poison elements backed by slab allocator
@ 2015-03-14  0:06     ` David Rientjes
  0 siblings, 0 replies; 14+ messages in thread
From: David Rientjes @ 2015-03-14  0:06 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Sebastian Ott, Mikulas Patocka, Catalin Marinas, linux-kernel, linux-mm

On Thu, 12 Mar 2015, Andrew Morton wrote:

> > Mempools keep elements in a reserved pool for contexts in which
> > allocation may not be possible.  When an element is allocated from the
> > reserved pool, its memory contents is the same as when it was added to
> > the reserved pool.
> > 
> > Because of this, elements lack any free poisoning to detect
> > use-after-free errors.
> > 
> > This patch adds free poisoning for elements backed by the slab allocator.
> > This is possible because the mempool layer knows the object size of each
> > element.
> > 
> > When an element is added to the reserved pool, it is poisoned with
> > POISON_FREE.  When it is removed from the reserved pool, the contents are
> > checked for POISON_FREE.  If there is a mismatch, a warning is emitted to
> > the kernel log.
> > 
> > This is only effective for configs with CONFIG_DEBUG_VM.
> 
> At present CONFIG_DEBUG_VM is pretty lightweight (I hope) and using it
> for mempool poisoning might be inappropriately costly.  Would it be
> better to tie this to something else?  Either standalone or reuse some
> slab debug option, perhaps.
> 

Ok, I agree.  I'll use CONFIG_DEBUG_SLAB and CONFIG_SLUB_DEBUG_ON and 
allow it to be enabled by slub debugging when that is enabled.  It 
probably doesn't make a lot of sense to do mempool poisoning without slab 
poisoning.

> Did you measure the overhead btw?  It might be significant with fast
> devices.
> 

It's certainly costly: with a new 128-byte slab cache, allocating 64 
objects took about 480 cycles longer per object to do the poison checking 
and in-use poisoning on one of my 2.2GHz machines (~90 cycles/object 
without CONFIG_DEBUG_VM).  To do the free poisoning, it was about ~130 
cycles longer per object (~140 cycles/object without CONFIG_DEBUG_VM).

For cache cold pages from the page allocator, it's more expensive, 
allocating and freeing 64 pages, it's ~620 cycles longer per page and 
freeing is an additional ~60 cycles/page.

Keep in mind that overhead is only incurred when the mempool alloc 
function fails to allocate memory directly from the slab allocator or page 
allocator in the given context and on mempool_create() to create the new 
mempool.

I didn't benchmark high-order page poisoning, but that's only used by 
bcache and I'm looking at that separately: allocating high-order pages 
from a mempool sucks.

> > --- a/mm/mempool.c
> > +++ b/mm/mempool.c
> > @@ -16,16 +16,77 @@
> >  #include <linux/blkdev.h>
> >  #include <linux/writeback.h>
> >  
> > +#ifdef CONFIG_DEBUG_VM
> > +static void poison_error(mempool_t *pool, void *element, size_t size,
> > +			 size_t byte)
> > +{
> > +	const int nr = pool->curr_nr;
> > +	const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0);
> > +	const int end = min_t(int, byte + (BITS_PER_LONG / 8), size);
> > +	int i;
> > +
> > +	pr_err("BUG: mempool element poison mismatch\n");
> > +	pr_err("Mempool %p size %ld\n", pool, size);
> > +	pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : "");
> > +	for (i = start; i < end; i++)
> > +		pr_cont("%x ", *(u8 *)(element + i));
> > +	pr_cont("%s\n", end < size ? "..." : "");
> > +	dump_stack();
> > +}
> 
> "byte" wasn't a very useful identifier, and it's called "i" in
> check_slab_element().  Rename it to "offset" in both places?
> 
> > +static void check_slab_element(mempool_t *pool, void *element)
> > +{
> > +	if (pool->free == mempool_free_slab || pool->free == mempool_kfree) {
> > +		size_t size = ksize(element);
> > +		u8 *obj = element;
> > +		size_t i;
> > +
> > +		for (i = 0; i < size; i++) {
> > +			u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
> > +
> > +			if (obj[i] != exp) {
> > +				poison_error(pool, element, size, i);
> > +				return;
> > +			}
> > +		}
> > +		memset(obj, POISON_INUSE, size);
> > +	}
> > +}
> 
> I question the reuse of POISON_FREE/POISON_INUSE.  If this thing
> triggers, it may be hard to tell if it was due to a slab thing or to a
> mempool thing.  Using a distinct poison pattern for mempool would clear
> that up?
> 

Hmm, I think it would actually make it more confusing: mempools only 
allocate from the reserved pool (those poisoned by this patchset) when 
doing kmalloc() or kmem_cache_free() in context fails.  Normally, the 
reserved pool isn't used because there are free objects sitting on slab 
free or partial slabs and the context is irrelevant.  If slab poisoning is 
enabled, they are already POISON_FREE as anticipated.  We only fallback to 
the reserved pool when new slab needs to be allocated and fails in the 
given context, so the poison value would differ depending on where the 
objects came from.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [patch 1/2] mm, mempool: poison elements backed by slab allocator
  2015-03-09  7:21 ` David Rientjes
@ 2015-03-16 10:46   ` Rasmus Villemoes
  -1 siblings, 0 replies; 14+ messages in thread
From: Rasmus Villemoes @ 2015-03-16 10:46 UTC (permalink / raw)
  To: David Rientjes
  Cc: Andrew Morton, Sebastian Ott, Mikulas Patocka, Catalin Marinas,
	linux-kernel, linux-mm

On Mon, Mar 09 2015, David Rientjes <rientjes@google.com> wrote:

> Mempools keep elements in a reserved pool for contexts in which
> allocation may not be possible.  When an element is allocated from the
> reserved pool, its memory contents is the same as when it was added to
> the reserved pool.
>
> Because of this, elements lack any free poisoning to detect
> use-after-free errors.
>
> This patch adds free poisoning for elements backed by the slab allocator.
> This is possible because the mempool layer knows the object size of each
> element.
>
> When an element is added to the reserved pool, it is poisoned with
> POISON_FREE.  When it is removed from the reserved pool, the contents are
> checked for POISON_FREE.  If there is a mismatch, a warning is emitted to
> the kernel log.
>
> +
> +static void poison_slab_element(mempool_t *pool, void *element)
> +{
> +	if (pool->alloc == mempool_alloc_slab ||
> +	    pool->alloc == mempool_kmalloc) {
> +		size_t size = ksize(element);
> +		u8 *obj = element;
> +
> +		memset(obj, POISON_FREE, size - 1);
> +		obj[size - 1] = POISON_END;
> +	}
> +}

Maybe a stupid question, but what happens if the underlying slab
allocator has non-trivial ->ctor?

Rasmus

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [patch 1/2] mm, mempool: poison elements backed by slab allocator
@ 2015-03-16 10:46   ` Rasmus Villemoes
  0 siblings, 0 replies; 14+ messages in thread
From: Rasmus Villemoes @ 2015-03-16 10:46 UTC (permalink / raw)
  To: David Rientjes
  Cc: Andrew Morton, Sebastian Ott, Mikulas Patocka, Catalin Marinas,
	linux-kernel, linux-mm

On Mon, Mar 09 2015, David Rientjes <rientjes@google.com> wrote:

> Mempools keep elements in a reserved pool for contexts in which
> allocation may not be possible.  When an element is allocated from the
> reserved pool, its memory contents is the same as when it was added to
> the reserved pool.
>
> Because of this, elements lack any free poisoning to detect
> use-after-free errors.
>
> This patch adds free poisoning for elements backed by the slab allocator.
> This is possible because the mempool layer knows the object size of each
> element.
>
> When an element is added to the reserved pool, it is poisoned with
> POISON_FREE.  When it is removed from the reserved pool, the contents are
> checked for POISON_FREE.  If there is a mismatch, a warning is emitted to
> the kernel log.
>
> +
> +static void poison_slab_element(mempool_t *pool, void *element)
> +{
> +	if (pool->alloc == mempool_alloc_slab ||
> +	    pool->alloc == mempool_kmalloc) {
> +		size_t size = ksize(element);
> +		u8 *obj = element;
> +
> +		memset(obj, POISON_FREE, size - 1);
> +		obj[size - 1] = POISON_END;
> +	}
> +}

Maybe a stupid question, but what happens if the underlying slab
allocator has non-trivial ->ctor?

Rasmus

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [patch 1/2] mm, mempool: poison elements backed by slab allocator
  2015-03-16 10:46   ` Rasmus Villemoes
@ 2015-03-19 23:20     ` David Rientjes
  -1 siblings, 0 replies; 14+ messages in thread
From: David Rientjes @ 2015-03-19 23:20 UTC (permalink / raw)
  To: Rasmus Villemoes, Dave Kleikamp, Christoph Hellwig
  Cc: Andrew Morton, Sebastian Ott, Mikulas Patocka, Catalin Marinas,
	jfs-discussion, linux-kernel, linux-mm

On Mon, 16 Mar 2015, Rasmus Villemoes wrote:

> > Mempools keep elements in a reserved pool for contexts in which
> > allocation may not be possible.  When an element is allocated from the
> > reserved pool, its memory contents is the same as when it was added to
> > the reserved pool.
> >
> > Because of this, elements lack any free poisoning to detect
> > use-after-free errors.
> >
> > This patch adds free poisoning for elements backed by the slab allocator.
> > This is possible because the mempool layer knows the object size of each
> > element.
> >
> > When an element is added to the reserved pool, it is poisoned with
> > POISON_FREE.  When it is removed from the reserved pool, the contents are
> > checked for POISON_FREE.  If there is a mismatch, a warning is emitted to
> > the kernel log.
> >
> > +
> > +static void poison_slab_element(mempool_t *pool, void *element)
> > +{
> > +	if (pool->alloc == mempool_alloc_slab ||
> > +	    pool->alloc == mempool_kmalloc) {
> > +		size_t size = ksize(element);
> > +		u8 *obj = element;
> > +
> > +		memset(obj, POISON_FREE, size - 1);
> > +		obj[size - 1] = POISON_END;
> > +	}
> > +}
> 
> Maybe a stupid question, but what happens if the underlying slab
> allocator has non-trivial ->ctor?
> 

Not a stupid question at all, it's very legitimate, thanks for thinking 
about it!

Using slab constructors with mempools is inherently risky because you 
don't know where the element is coming from when returned by 
mempool_alloc(): was it able to be allocated in GFP_NOFS context from the 
slab allocator, or is it coming from mempool's reserved pool of elements?

In the former, the constructor is properly called and in the latter it's 
not called: we simply pop the element from the reserved pool and return it 
to the caller.

For that reason, without mempool element poisoning, we need to take care 
that objects are properly deconstructed when freed to the reserved pool so 
that they are in a state we expect when returned from mempool_alloc().  
Thus, at least part of the slab constructor must be duplicated before 
calling mempool_free().

There's one user in the tree that actually does this: it's the 
mempool_create_slab_pool() in fs/jfs/jfs_metapage.c, and it does exactly 
what is described above: it clears necessary fields before doing 
mempool_free() that are duplicated in the slab object constructor.

We'd like to be able to do this:

diff --git a/mm/mempool.c b/mm/mempool.c
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -15,6 +15,7 @@
 #include <linux/mempool.h>
 #include <linux/blkdev.h>
 #include <linux/writeback.h>
+#include "slab.h"
 
 static void add_element(mempool_t *pool, void *element)
 {
@@ -332,6 +333,7 @@ EXPORT_SYMBOL(mempool_free);
 void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
 {
 	struct kmem_cache *mem = pool_data;
+	BUG_ON(mem->ctor);
 	return kmem_cache_alloc(mem, gfp_mask);
 }
 EXPORT_SYMBOL(mempool_alloc_slab);

Since it would be difficult to reproduce an error with an improperly 
decosntructed mempool element when used with a mempool based on a slab 
cache that has a constructor: normally, slab objects are allocatable even 
with GFP_NOFS since there are free objects available and there's less of a 
liklihood that we'll need to use the mempool reserved pool.

But we obviously can't do that if jfs is actively using mempools based on 
slab caches with object constructors.  I think it would be much better to 
simply initialize objects as they are allocated, regardless of whether 
they come from the slab allocator or mempool reserved pool, and avoid 
trying to set the state up properly before mempool_free().

This patch properly initializes all fields that are currently done by the 
object constructor (with the exception of mp->flags since it is 
immediately overwritten by the caller anyway).  It also removes META_free 
since nothing is checking for it.

Jfs folks, would this be acceptable to you?
---
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -183,30 +183,23 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
 
 #endif
 
-static void init_once(void *foo)
-{
-	struct metapage *mp = (struct metapage *)foo;
-
-	mp->lid = 0;
-	mp->lsn = 0;
-	mp->flag = 0;
-	mp->data = NULL;
-	mp->clsn = 0;
-	mp->log = NULL;
-	set_bit(META_free, &mp->flag);
-	init_waitqueue_head(&mp->wait);
-}
-
 static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
 {
-	return mempool_alloc(metapage_mempool, gfp_mask);
+	struct metapage *mp = mempool_alloc(metapage_mempool, gfp_mask);
+
+	if (mp) {
+		mp->lid = 0;
+		mp->lsn = 0;
+		mp->data = NULL;
+		mp->clsn = 0;
+		mp->log = NULL;
+		init_waitqueue_head(&mp->wait);
+	}
+	return mp;
 }
 
 static inline void free_metapage(struct metapage *mp)
 {
-	mp->flag = 0;
-	set_bit(META_free, &mp->flag);
-
 	mempool_free(mp, metapage_mempool);
 }
 
@@ -216,7 +209,7 @@ int __init metapage_init(void)
 	 * Allocate the metapage structures
 	 */
 	metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
-					   0, 0, init_once);
+					   0, 0, NULL);
 	if (metapage_cache == NULL)
 		return -ENOMEM;
 
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index a78beda..337e9e5 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -48,7 +48,6 @@ struct metapage {
 
 /* metapage flag */
 #define META_locked	0
-#define META_free	1
 #define META_dirty	2
 #define META_sync	3
 #define META_discard	4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [patch 1/2] mm, mempool: poison elements backed by slab allocator
@ 2015-03-19 23:20     ` David Rientjes
  0 siblings, 0 replies; 14+ messages in thread
From: David Rientjes @ 2015-03-19 23:20 UTC (permalink / raw)
  To: Rasmus Villemoes, Dave Kleikamp, Christoph Hellwig
  Cc: Andrew Morton, Sebastian Ott, Mikulas Patocka, Catalin Marinas,
	jfs-discussion, linux-kernel, linux-mm

On Mon, 16 Mar 2015, Rasmus Villemoes wrote:

> > Mempools keep elements in a reserved pool for contexts in which
> > allocation may not be possible.  When an element is allocated from the
> > reserved pool, its memory contents is the same as when it was added to
> > the reserved pool.
> >
> > Because of this, elements lack any free poisoning to detect
> > use-after-free errors.
> >
> > This patch adds free poisoning for elements backed by the slab allocator.
> > This is possible because the mempool layer knows the object size of each
> > element.
> >
> > When an element is added to the reserved pool, it is poisoned with
> > POISON_FREE.  When it is removed from the reserved pool, the contents are
> > checked for POISON_FREE.  If there is a mismatch, a warning is emitted to
> > the kernel log.
> >
> > +
> > +static void poison_slab_element(mempool_t *pool, void *element)
> > +{
> > +	if (pool->alloc == mempool_alloc_slab ||
> > +	    pool->alloc == mempool_kmalloc) {
> > +		size_t size = ksize(element);
> > +		u8 *obj = element;
> > +
> > +		memset(obj, POISON_FREE, size - 1);
> > +		obj[size - 1] = POISON_END;
> > +	}
> > +}
> 
> Maybe a stupid question, but what happens if the underlying slab
> allocator has non-trivial ->ctor?
> 

Not a stupid question at all, it's very legitimate, thanks for thinking 
about it!

Using slab constructors with mempools is inherently risky because you 
don't know where the element is coming from when returned by 
mempool_alloc(): was it able to be allocated in GFP_NOFS context from the 
slab allocator, or is it coming from mempool's reserved pool of elements?

In the former, the constructor is properly called and in the latter it's 
not called: we simply pop the element from the reserved pool and return it 
to the caller.

For that reason, without mempool element poisoning, we need to take care 
that objects are properly deconstructed when freed to the reserved pool so 
that they are in a state we expect when returned from mempool_alloc().  
Thus, at least part of the slab constructor must be duplicated before 
calling mempool_free().

There's one user in the tree that actually does this: it's the 
mempool_create_slab_pool() in fs/jfs/jfs_metapage.c, and it does exactly 
what is described above: it clears necessary fields before doing 
mempool_free() that are duplicated in the slab object constructor.

We'd like to be able to do this:

diff --git a/mm/mempool.c b/mm/mempool.c
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -15,6 +15,7 @@
 #include <linux/mempool.h>
 #include <linux/blkdev.h>
 #include <linux/writeback.h>
+#include "slab.h"
 
 static void add_element(mempool_t *pool, void *element)
 {
@@ -332,6 +333,7 @@ EXPORT_SYMBOL(mempool_free);
 void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
 {
 	struct kmem_cache *mem = pool_data;
+	BUG_ON(mem->ctor);
 	return kmem_cache_alloc(mem, gfp_mask);
 }
 EXPORT_SYMBOL(mempool_alloc_slab);

Since it would be difficult to reproduce an error with an improperly 
decosntructed mempool element when used with a mempool based on a slab 
cache that has a constructor: normally, slab objects are allocatable even 
with GFP_NOFS since there are free objects available and there's less of a 
liklihood that we'll need to use the mempool reserved pool.

But we obviously can't do that if jfs is actively using mempools based on 
slab caches with object constructors.  I think it would be much better to 
simply initialize objects as they are allocated, regardless of whether 
they come from the slab allocator or mempool reserved pool, and avoid 
trying to set the state up properly before mempool_free().

This patch properly initializes all fields that are currently done by the 
object constructor (with the exception of mp->flags since it is 
immediately overwritten by the caller anyway).  It also removes META_free 
since nothing is checking for it.

Jfs folks, would this be acceptable to you?
---
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -183,30 +183,23 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
 
 #endif
 
-static void init_once(void *foo)
-{
-	struct metapage *mp = (struct metapage *)foo;
-
-	mp->lid = 0;
-	mp->lsn = 0;
-	mp->flag = 0;
-	mp->data = NULL;
-	mp->clsn = 0;
-	mp->log = NULL;
-	set_bit(META_free, &mp->flag);
-	init_waitqueue_head(&mp->wait);
-}
-
 static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
 {
-	return mempool_alloc(metapage_mempool, gfp_mask);
+	struct metapage *mp = mempool_alloc(metapage_mempool, gfp_mask);
+
+	if (mp) {
+		mp->lid = 0;
+		mp->lsn = 0;
+		mp->data = NULL;
+		mp->clsn = 0;
+		mp->log = NULL;
+		init_waitqueue_head(&mp->wait);
+	}
+	return mp;
 }
 
 static inline void free_metapage(struct metapage *mp)
 {
-	mp->flag = 0;
-	set_bit(META_free, &mp->flag);
-
 	mempool_free(mp, metapage_mempool);
 }
 
@@ -216,7 +209,7 @@ int __init metapage_init(void)
 	 * Allocate the metapage structures
 	 */
 	metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
-					   0, 0, init_once);
+					   0, 0, NULL);
 	if (metapage_cache == NULL)
 		return -ENOMEM;
 
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index a78beda..337e9e5 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -48,7 +48,6 @@ struct metapage {
 
 /* metapage flag */
 #define META_locked	0
-#define META_free	1
 #define META_dirty	2
 #define META_sync	3
 #define META_discard	4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [patch 1/2] mm, mempool: poison elements backed by slab allocator
  2015-03-19 23:20     ` David Rientjes
@ 2015-03-19 23:26       ` Dave Kleikamp
  -1 siblings, 0 replies; 14+ messages in thread
From: Dave Kleikamp @ 2015-03-19 23:26 UTC (permalink / raw)
  To: David Rientjes, Rasmus Villemoes, Christoph Hellwig
  Cc: Andrew Morton, Sebastian Ott, Mikulas Patocka, Catalin Marinas,
	jfs-discussion, linux-kernel, linux-mm

On 03/19/2015 06:20 PM, David Rientjes wrote:
> On Mon, 16 Mar 2015, Rasmus Villemoes wrote:
> 
>>> Mempools keep elements in a reserved pool for contexts in which
>>> allocation may not be possible.  When an element is allocated from the
>>> reserved pool, its memory contents is the same as when it was added to
>>> the reserved pool.
>>>
>>> Because of this, elements lack any free poisoning to detect
>>> use-after-free errors.
>>>
>>> This patch adds free poisoning for elements backed by the slab allocator.
>>> This is possible because the mempool layer knows the object size of each
>>> element.
>>>
>>> When an element is added to the reserved pool, it is poisoned with
>>> POISON_FREE.  When it is removed from the reserved pool, the contents are
>>> checked for POISON_FREE.  If there is a mismatch, a warning is emitted to
>>> the kernel log.
>>>
>>> +
>>> +static void poison_slab_element(mempool_t *pool, void *element)
>>> +{
>>> +	if (pool->alloc == mempool_alloc_slab ||
>>> +	    pool->alloc == mempool_kmalloc) {
>>> +		size_t size = ksize(element);
>>> +		u8 *obj = element;
>>> +
>>> +		memset(obj, POISON_FREE, size - 1);
>>> +		obj[size - 1] = POISON_END;
>>> +	}
>>> +}
>>
>> Maybe a stupid question, but what happens if the underlying slab
>> allocator has non-trivial ->ctor?
>>
> 
> Not a stupid question at all, it's very legitimate, thanks for thinking 
> about it!
> 
> Using slab constructors with mempools is inherently risky because you 
> don't know where the element is coming from when returned by 
> mempool_alloc(): was it able to be allocated in GFP_NOFS context from the 
> slab allocator, or is it coming from mempool's reserved pool of elements?
> 
> In the former, the constructor is properly called and in the latter it's 
> not called: we simply pop the element from the reserved pool and return it 
> to the caller.
> 
> For that reason, without mempool element poisoning, we need to take care 
> that objects are properly deconstructed when freed to the reserved pool so 
> that they are in a state we expect when returned from mempool_alloc().  
> Thus, at least part of the slab constructor must be duplicated before 
> calling mempool_free().
> 
> There's one user in the tree that actually does this: it's the 
> mempool_create_slab_pool() in fs/jfs/jfs_metapage.c, and it does exactly 
> what is described above: it clears necessary fields before doing 
> mempool_free() that are duplicated in the slab object constructor.
> 
> We'd like to be able to do this:
> 
> diff --git a/mm/mempool.c b/mm/mempool.c
> --- a/mm/mempool.c
> +++ b/mm/mempool.c
> @@ -15,6 +15,7 @@
>  #include <linux/mempool.h>
>  #include <linux/blkdev.h>
>  #include <linux/writeback.h>
> +#include "slab.h"
>  
>  static void add_element(mempool_t *pool, void *element)
>  {
> @@ -332,6 +333,7 @@ EXPORT_SYMBOL(mempool_free);
>  void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
>  {
>  	struct kmem_cache *mem = pool_data;
> +	BUG_ON(mem->ctor);
>  	return kmem_cache_alloc(mem, gfp_mask);
>  }
>  EXPORT_SYMBOL(mempool_alloc_slab);
> 
> Since it would be difficult to reproduce an error with an improperly 
> decosntructed mempool element when used with a mempool based on a slab 
> cache that has a constructor: normally, slab objects are allocatable even 
> with GFP_NOFS since there are free objects available and there's less of a 
> liklihood that we'll need to use the mempool reserved pool.
> 
> But we obviously can't do that if jfs is actively using mempools based on 
> slab caches with object constructors.  I think it would be much better to 
> simply initialize objects as they are allocated, regardless of whether 
> they come from the slab allocator or mempool reserved pool, and avoid 
> trying to set the state up properly before mempool_free().
> 
> This patch properly initializes all fields that are currently done by the 
> object constructor (with the exception of mp->flags since it is 
> immediately overwritten by the caller anyway).  It also removes META_free 
> since nothing is checking for it.
> 
> Jfs folks, would this be acceptable to you?

Sure. I have no objection.

> ---
> diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
> --- a/fs/jfs/jfs_metapage.c
> +++ b/fs/jfs/jfs_metapage.c
> @@ -183,30 +183,23 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
>  
>  #endif
>  
> -static void init_once(void *foo)
> -{
> -	struct metapage *mp = (struct metapage *)foo;
> -
> -	mp->lid = 0;
> -	mp->lsn = 0;
> -	mp->flag = 0;
> -	mp->data = NULL;
> -	mp->clsn = 0;
> -	mp->log = NULL;
> -	set_bit(META_free, &mp->flag);
> -	init_waitqueue_head(&mp->wait);
> -}
> -
>  static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
>  {
> -	return mempool_alloc(metapage_mempool, gfp_mask);
> +	struct metapage *mp = mempool_alloc(metapage_mempool, gfp_mask);
> +
> +	if (mp) {
> +		mp->lid = 0;
> +		mp->lsn = 0;
> +		mp->data = NULL;
> +		mp->clsn = 0;
> +		mp->log = NULL;
> +		init_waitqueue_head(&mp->wait);
> +	}
> +	return mp;
>  }
>  
>  static inline void free_metapage(struct metapage *mp)
>  {
> -	mp->flag = 0;
> -	set_bit(META_free, &mp->flag);
> -
>  	mempool_free(mp, metapage_mempool);
>  }
>  
> @@ -216,7 +209,7 @@ int __init metapage_init(void)
>  	 * Allocate the metapage structures
>  	 */
>  	metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
> -					   0, 0, init_once);
> +					   0, 0, NULL);
>  	if (metapage_cache == NULL)
>  		return -ENOMEM;
>  
> diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
> index a78beda..337e9e5 100644
> --- a/fs/jfs/jfs_metapage.h
> +++ b/fs/jfs/jfs_metapage.h
> @@ -48,7 +48,6 @@ struct metapage {
>  
>  /* metapage flag */
>  #define META_locked	0
> -#define META_free	1
>  #define META_dirty	2
>  #define META_sync	3
>  #define META_discard	4
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [patch 1/2] mm, mempool: poison elements backed by slab allocator
@ 2015-03-19 23:26       ` Dave Kleikamp
  0 siblings, 0 replies; 14+ messages in thread
From: Dave Kleikamp @ 2015-03-19 23:26 UTC (permalink / raw)
  To: David Rientjes, Rasmus Villemoes, Christoph Hellwig
  Cc: Andrew Morton, Sebastian Ott, Mikulas Patocka, Catalin Marinas,
	jfs-discussion, linux-kernel, linux-mm

On 03/19/2015 06:20 PM, David Rientjes wrote:
> On Mon, 16 Mar 2015, Rasmus Villemoes wrote:
> 
>>> Mempools keep elements in a reserved pool for contexts in which
>>> allocation may not be possible.  When an element is allocated from the
>>> reserved pool, its memory contents is the same as when it was added to
>>> the reserved pool.
>>>
>>> Because of this, elements lack any free poisoning to detect
>>> use-after-free errors.
>>>
>>> This patch adds free poisoning for elements backed by the slab allocator.
>>> This is possible because the mempool layer knows the object size of each
>>> element.
>>>
>>> When an element is added to the reserved pool, it is poisoned with
>>> POISON_FREE.  When it is removed from the reserved pool, the contents are
>>> checked for POISON_FREE.  If there is a mismatch, a warning is emitted to
>>> the kernel log.
>>>
>>> +
>>> +static void poison_slab_element(mempool_t *pool, void *element)
>>> +{
>>> +	if (pool->alloc == mempool_alloc_slab ||
>>> +	    pool->alloc == mempool_kmalloc) {
>>> +		size_t size = ksize(element);
>>> +		u8 *obj = element;
>>> +
>>> +		memset(obj, POISON_FREE, size - 1);
>>> +		obj[size - 1] = POISON_END;
>>> +	}
>>> +}
>>
>> Maybe a stupid question, but what happens if the underlying slab
>> allocator has non-trivial ->ctor?
>>
> 
> Not a stupid question at all, it's very legitimate, thanks for thinking 
> about it!
> 
> Using slab constructors with mempools is inherently risky because you 
> don't know where the element is coming from when returned by 
> mempool_alloc(): was it able to be allocated in GFP_NOFS context from the 
> slab allocator, or is it coming from mempool's reserved pool of elements?
> 
> In the former, the constructor is properly called and in the latter it's 
> not called: we simply pop the element from the reserved pool and return it 
> to the caller.
> 
> For that reason, without mempool element poisoning, we need to take care 
> that objects are properly deconstructed when freed to the reserved pool so 
> that they are in a state we expect when returned from mempool_alloc().  
> Thus, at least part of the slab constructor must be duplicated before 
> calling mempool_free().
> 
> There's one user in the tree that actually does this: it's the 
> mempool_create_slab_pool() in fs/jfs/jfs_metapage.c, and it does exactly 
> what is described above: it clears necessary fields before doing 
> mempool_free() that are duplicated in the slab object constructor.
> 
> We'd like to be able to do this:
> 
> diff --git a/mm/mempool.c b/mm/mempool.c
> --- a/mm/mempool.c
> +++ b/mm/mempool.c
> @@ -15,6 +15,7 @@
>  #include <linux/mempool.h>
>  #include <linux/blkdev.h>
>  #include <linux/writeback.h>
> +#include "slab.h"
>  
>  static void add_element(mempool_t *pool, void *element)
>  {
> @@ -332,6 +333,7 @@ EXPORT_SYMBOL(mempool_free);
>  void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
>  {
>  	struct kmem_cache *mem = pool_data;
> +	BUG_ON(mem->ctor);
>  	return kmem_cache_alloc(mem, gfp_mask);
>  }
>  EXPORT_SYMBOL(mempool_alloc_slab);
> 
> Since it would be difficult to reproduce an error with an improperly 
> decosntructed mempool element when used with a mempool based on a slab 
> cache that has a constructor: normally, slab objects are allocatable even 
> with GFP_NOFS since there are free objects available and there's less of a 
> liklihood that we'll need to use the mempool reserved pool.
> 
> But we obviously can't do that if jfs is actively using mempools based on 
> slab caches with object constructors.  I think it would be much better to 
> simply initialize objects as they are allocated, regardless of whether 
> they come from the slab allocator or mempool reserved pool, and avoid 
> trying to set the state up properly before mempool_free().
> 
> This patch properly initializes all fields that are currently done by the 
> object constructor (with the exception of mp->flags since it is 
> immediately overwritten by the caller anyway).  It also removes META_free 
> since nothing is checking for it.
> 
> Jfs folks, would this be acceptable to you?

Sure. I have no objection.

> ---
> diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
> --- a/fs/jfs/jfs_metapage.c
> +++ b/fs/jfs/jfs_metapage.c
> @@ -183,30 +183,23 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
>  
>  #endif
>  
> -static void init_once(void *foo)
> -{
> -	struct metapage *mp = (struct metapage *)foo;
> -
> -	mp->lid = 0;
> -	mp->lsn = 0;
> -	mp->flag = 0;
> -	mp->data = NULL;
> -	mp->clsn = 0;
> -	mp->log = NULL;
> -	set_bit(META_free, &mp->flag);
> -	init_waitqueue_head(&mp->wait);
> -}
> -
>  static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
>  {
> -	return mempool_alloc(metapage_mempool, gfp_mask);
> +	struct metapage *mp = mempool_alloc(metapage_mempool, gfp_mask);
> +
> +	if (mp) {
> +		mp->lid = 0;
> +		mp->lsn = 0;
> +		mp->data = NULL;
> +		mp->clsn = 0;
> +		mp->log = NULL;
> +		init_waitqueue_head(&mp->wait);
> +	}
> +	return mp;
>  }
>  
>  static inline void free_metapage(struct metapage *mp)
>  {
> -	mp->flag = 0;
> -	set_bit(META_free, &mp->flag);
> -
>  	mempool_free(mp, metapage_mempool);
>  }
>  
> @@ -216,7 +209,7 @@ int __init metapage_init(void)
>  	 * Allocate the metapage structures
>  	 */
>  	metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
> -					   0, 0, init_once);
> +					   0, 0, NULL);
>  	if (metapage_cache == NULL)
>  		return -ENOMEM;
>  
> diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
> index a78beda..337e9e5 100644
> --- a/fs/jfs/jfs_metapage.h
> +++ b/fs/jfs/jfs_metapage.h
> @@ -48,7 +48,6 @@ struct metapage {
>  
>  /* metapage flag */
>  #define META_locked	0
> -#define META_free	1
>  #define META_dirty	2
>  #define META_sync	3
>  #define META_discard	4
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-03-19 23:28 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-09  7:21 [patch 1/2] mm, mempool: poison elements backed by slab allocator David Rientjes
2015-03-09  7:21 ` David Rientjes
2015-03-09  7:22 ` [patch 2/2] mm, mempool: poison elements backed by page allocator David Rientjes
2015-03-09  7:22   ` David Rientjes
2015-03-12 20:28 ` [patch 1/2] mm, mempool: poison elements backed by slab allocator Andrew Morton
2015-03-12 20:28   ` Andrew Morton
2015-03-14  0:06   ` David Rientjes
2015-03-14  0:06     ` David Rientjes
2015-03-16 10:46 ` Rasmus Villemoes
2015-03-16 10:46   ` Rasmus Villemoes
2015-03-19 23:20   ` David Rientjes
2015-03-19 23:20     ` David Rientjes
2015-03-19 23:26     ` Dave Kleikamp
2015-03-19 23:26       ` Dave Kleikamp

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.