* [RFC PATCH V2 1/9] mm/slab: move SLUB alloc hooks to common mm/slab.h
2015-12-08 16:18 ` [RFC PATCH V2 0/9] slab: cleanup and bulk API for SLAB Jesper Dangaard Brouer
@ 2015-12-08 16:18 ` Jesper Dangaard Brouer
2015-12-09 15:43 ` Christoph Lameter
2015-12-08 16:18 ` [RFC PATCH V2 2/9] mm: generalize avoid fault-inject on bootstrap kmem_cache Jesper Dangaard Brouer
` (7 subsequent siblings)
8 siblings, 1 reply; 41+ messages in thread
From: Jesper Dangaard Brouer @ 2015-12-08 16:18 UTC (permalink / raw)
To: linux-mm
Cc: Christoph Lameter, Vladimir Davydov, Jesper Dangaard Brouer,
Joonsoo Kim, Linus Torvalds, Andrew Morton
First step towards sharing alloc_hook's between SLUB and SLAB
allocators. Move the SLUB allocators *_alloc_hook to the common
mm/slab.h for internal slab definitions.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
mm/slab.h | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
mm/slub.c | 54 ----------------------------------------------
2 files changed, 71 insertions(+), 54 deletions(-)
diff --git a/mm/slab.h b/mm/slab.h
index 7b6087197997..588bc5281fc8 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -38,6 +38,17 @@ struct kmem_cache {
#endif
#include <linux/memcontrol.h>
+#include <linux/fault-inject.h>
+/* Q: Howto handle this nicely? below includes are needed for alloc hooks
+ *
+ * e.g. mm/mempool.c and mm/slab_common.c does not include kmemcheck.h
+ * including it here solves the probem, but should they include it
+ * themselves?
+ */
+#include <linux/kmemcheck.h>
+// Below includes are already included in other users of "mm/slab.h"
+//#include <linux/kasan.h>
+//#include <linux/kmemleak.h>
/*
* State of the slab allocator.
@@ -319,6 +330,66 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
return s;
}
+#ifdef CONFIG_SLUB
+static inline size_t slab_ksize(const struct kmem_cache *s)
+{
+#ifdef CONFIG_SLUB_DEBUG
+ /*
+ * Debugging requires use of the padding between object
+ * and whatever may come after it.
+ */
+ if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
+ return s->object_size;
+#endif
+ /*
+ * If we have the need to store the freelist pointer
+ * back there or track user information then we can
+ * only use the space before that information.
+ */
+ if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
+ return s->inuse;
+ /*
+ * Else we can use all the padding etc for the allocation
+ */
+ return s->size;
+}
+#else /* !CONFIG_SLUB */
+static inline size_t slab_ksize(const struct kmem_cache *s)
+{
+ return s->object_size;
+}
+#endif
+
+static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
+ gfp_t flags)
+{
+ flags &= gfp_allowed_mask;
+ lockdep_trace_alloc(flags);
+ might_sleep_if(gfpflags_allow_blocking(flags));
+
+ if (should_failslab(s->object_size, flags, s->flags))
+ return NULL;
+
+ return memcg_kmem_get_cache(s, flags);
+}
+
+static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
+ size_t size, void **p)
+{
+ size_t i;
+
+ flags &= gfp_allowed_mask;
+ for (i = 0; i < size; i++) {
+ void *object = p[i];
+
+ kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
+ kmemleak_alloc_recursive(object, s->object_size, 1,
+ s->flags, flags);
+ kasan_slab_alloc(s, object);
+ }
+ memcg_kmem_put_cache(s);
+}
+
#ifndef CONFIG_SLOB
/*
* The slab lists for all objects.
diff --git a/mm/slub.c b/mm/slub.c
index 46997517406e..6bc179952150 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -284,30 +284,6 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
return (p - addr) / s->size;
}
-static inline size_t slab_ksize(const struct kmem_cache *s)
-{
-#ifdef CONFIG_SLUB_DEBUG
- /*
- * Debugging requires use of the padding between object
- * and whatever may come after it.
- */
- if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
- return s->object_size;
-
-#endif
- /*
- * If we have the need to store the freelist pointer
- * back there or track user information then we can
- * only use the space before that information.
- */
- if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
- return s->inuse;
- /*
- * Else we can use all the padding etc for the allocation
- */
- return s->size;
-}
-
static inline int order_objects(int order, unsigned long size, int reserved)
{
return ((PAGE_SIZE << order) - reserved) / size;
@@ -1279,36 +1255,6 @@ static inline void kfree_hook(const void *x)
kasan_kfree_large(x);
}
-static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
- gfp_t flags)
-{
- flags &= gfp_allowed_mask;
- lockdep_trace_alloc(flags);
- might_sleep_if(gfpflags_allow_blocking(flags));
-
- if (should_failslab(s->object_size, flags, s->flags))
- return NULL;
-
- return memcg_kmem_get_cache(s, flags);
-}
-
-static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
- size_t size, void **p)
-{
- size_t i;
-
- flags &= gfp_allowed_mask;
- for (i = 0; i < size; i++) {
- void *object = p[i];
-
- kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
- kmemleak_alloc_recursive(object, s->object_size, 1,
- s->flags, flags);
- kasan_slab_alloc(s, object);
- }
- memcg_kmem_put_cache(s);
-}
-
static inline void slab_free_hook(struct kmem_cache *s, void *x)
{
kmemleak_free_recursive(x, s->flags);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 41+ messages in thread
* Re: [RFC PATCH V2 1/9] mm/slab: move SLUB alloc hooks to common mm/slab.h
2015-12-08 16:18 ` [RFC PATCH V2 1/9] mm/slab: move SLUB alloc hooks to common mm/slab.h Jesper Dangaard Brouer
@ 2015-12-09 15:43 ` Christoph Lameter
0 siblings, 0 replies; 41+ messages in thread
From: Christoph Lameter @ 2015-12-09 15:43 UTC (permalink / raw)
To: Jesper Dangaard Brouer
Cc: linux-mm, Vladimir Davydov, Joonsoo Kim, Linus Torvalds, Andrew Morton
On Tue, 8 Dec 2015, Jesper Dangaard Brouer wrote:
> +/* Q: Howto handle this nicely? below includes are needed for alloc hooks
> + *
> + * e.g. mm/mempool.c and mm/slab_common.c does not include kmemcheck.h
> + * including it here solves the probem, but should they include it
> + * themselves?
> + */
Including in mm/slab.h is enough.
> +#ifdef CONFIG_SLUB
Move this into slab_ksize?
> +static inline size_t slab_ksize(const struct kmem_cache *s)
> +{
> +#ifdef CONFIG_SLUB_DEBUG
> + /*
> + * Debugging requires use of the padding between object
> + * and whatever may come after it.
> + */
> + if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
> + return s->object_size;
> +#endif
> + /*
> + * If we have the need to store the freelist pointer
> + * back there or track user information then we can
> + * only use the space before that information.
> + */
> + if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
> + return s->inuse;
> + /*
> + * Else we can use all the padding etc for the allocation
> + */
> + return s->size;
> +}
> +#else /* !CONFIG_SLUB */
Abnd drop the else branch?
> +static inline size_t slab_ksize(const struct kmem_cache *s)
> +{
> + return s->object_size;
> +}
> +#endif
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 41+ messages in thread
* [RFC PATCH V2 2/9] mm: generalize avoid fault-inject on bootstrap kmem_cache
2015-12-08 16:18 ` [RFC PATCH V2 0/9] slab: cleanup and bulk API for SLAB Jesper Dangaard Brouer
2015-12-08 16:18 ` [RFC PATCH V2 1/9] mm/slab: move SLUB alloc hooks to common mm/slab.h Jesper Dangaard Brouer
@ 2015-12-08 16:18 ` Jesper Dangaard Brouer
2015-12-09 2:36 ` Joonsoo Kim
2015-12-08 16:18 ` [RFC PATCH V2 3/9] slab: use slab_pre_alloc_hook in SLAB allocator Jesper Dangaard Brouer
` (6 subsequent siblings)
8 siblings, 1 reply; 41+ messages in thread
From: Jesper Dangaard Brouer @ 2015-12-08 16:18 UTC (permalink / raw)
To: linux-mm
Cc: Christoph Lameter, Vladimir Davydov, Jesper Dangaard Brouer,
Joonsoo Kim, Linus Torvalds, Andrew Morton
Move slab_should_failslab() check from SLAB allocator to generic
slab_pre_alloc_hook(). The check guards against slab alloc
fault-injects failures for the bootstrap slab that is used for
allocating "kmem_cache" objects to the allocator itself.
I'm not really happy with this code...
---
mm/failslab.c | 2 ++
mm/slab.c | 8 --------
mm/slab.h | 23 ++++++++++++++++++++++-
3 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/mm/failslab.c b/mm/failslab.c
index 79171b4a5826..a2ad28ba696c 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -13,6 +13,8 @@ static struct {
bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags)
{
+ // Should we place bootstrap kmem_cache check here???
+
if (gfpflags & __GFP_NOFAIL)
return false;
diff --git a/mm/slab.c b/mm/slab.c
index 4765c97ce690..4684c2496982 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2917,14 +2917,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
#endif
-static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
-{
- if (unlikely(cachep == kmem_cache))
- return false;
-
- return should_failslab(cachep->object_size, flags, cachep->flags);
-}
-
static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *objp;
diff --git a/mm/slab.h b/mm/slab.h
index 588bc5281fc8..4e7b0e62f3f4 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -360,6 +360,27 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
}
#endif
+/* FIXME: This construct sucks, because this compare+branch needs to
+ * get removed by compiler then !CONFIG_FAILSLAB (maybe compiler is
+ * smart enough to realize only "false" can be generated).
+ *
+ * Comments please: Pulling out CONFIG_FAILSLAB here looks ugly...
+ * should we instead change API of should_failslab() ??
+ *
+ * Next question: is the bootstrap cache check okay to add for all
+ * allocators? (this would be the easiest, else need more ugly ifdef's)
+ */
+static inline bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
+{
+ /* No fault-injection for bootstrap cache */
+#ifdef CONFIG_FAILSLAB
+ if (unlikely(cachep == kmem_cache))
+ return false;
+#endif
+
+ return should_failslab(cachep->object_size, flags, cachep->flags);
+}
+
static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
gfp_t flags)
{
@@ -367,7 +388,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
lockdep_trace_alloc(flags);
might_sleep_if(gfpflags_allow_blocking(flags));
- if (should_failslab(s->object_size, flags, s->flags))
+ if (slab_should_failslab(s, flags))
return NULL;
return memcg_kmem_get_cache(s, flags);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 41+ messages in thread
* Re: [RFC PATCH V2 2/9] mm: generalize avoid fault-inject on bootstrap kmem_cache
2015-12-08 16:18 ` [RFC PATCH V2 2/9] mm: generalize avoid fault-inject on bootstrap kmem_cache Jesper Dangaard Brouer
@ 2015-12-09 2:36 ` Joonsoo Kim
0 siblings, 0 replies; 41+ messages in thread
From: Joonsoo Kim @ 2015-12-09 2:36 UTC (permalink / raw)
To: Jesper Dangaard Brouer
Cc: linux-mm, Christoph Lameter, Vladimir Davydov, Linus Torvalds,
Andrew Morton
On Tue, Dec 08, 2015 at 05:18:32PM +0100, Jesper Dangaard Brouer wrote:
> Move slab_should_failslab() check from SLAB allocator to generic
> slab_pre_alloc_hook(). The check guards against slab alloc
> fault-injects failures for the bootstrap slab that is used for
> allocating "kmem_cache" objects to the allocator itself.
>
> I'm not really happy with this code...
> ---
> mm/failslab.c | 2 ++
> mm/slab.c | 8 --------
> mm/slab.h | 23 ++++++++++++++++++++++-
> 3 files changed, 24 insertions(+), 9 deletions(-)
>
> diff --git a/mm/failslab.c b/mm/failslab.c
> index 79171b4a5826..a2ad28ba696c 100644
> --- a/mm/failslab.c
> +++ b/mm/failslab.c
> @@ -13,6 +13,8 @@ static struct {
>
> bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags)
> {
> + // Should we place bootstrap kmem_cache check here???
> +
> if (gfpflags & __GFP_NOFAIL)
> return false;
>
> diff --git a/mm/slab.c b/mm/slab.c
> index 4765c97ce690..4684c2496982 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -2917,14 +2917,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
> #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
> #endif
>
> -static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
> -{
> - if (unlikely(cachep == kmem_cache))
> - return false;
> -
> - return should_failslab(cachep->object_size, flags, cachep->flags);
> -}
> -
> static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
> {
> void *objp;
> diff --git a/mm/slab.h b/mm/slab.h
> index 588bc5281fc8..4e7b0e62f3f4 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -360,6 +360,27 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
> }
> #endif
>
> +/* FIXME: This construct sucks, because this compare+branch needs to
> + * get removed by compiler then !CONFIG_FAILSLAB (maybe compiler is
> + * smart enough to realize only "false" can be generated).
> + *
> + * Comments please: Pulling out CONFIG_FAILSLAB here looks ugly...
> + * should we instead change API of should_failslab() ??
> + *
> + * Next question: is the bootstrap cache check okay to add for all
> + * allocators? (this would be the easiest, else need more ugly ifdef's)
> + */
> +static inline bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
> +{
> + /* No fault-injection for bootstrap cache */
> +#ifdef CONFIG_FAILSLAB
> + if (unlikely(cachep == kmem_cache))
> + return false;
> +#endif
> +
> + return should_failslab(cachep->object_size, flags, cachep->flags);
> +}
> +
> static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
> gfp_t flags)
> {
> @@ -367,7 +388,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
> lockdep_trace_alloc(flags);
> might_sleep_if(gfpflags_allow_blocking(flags));
>
> - if (should_failslab(s->object_size, flags, s->flags))
> + if (slab_should_failslab(s, flags))
> return NULL;
It'd be better to remove slab_should_failslab() and insert following code
snippet here.
if (IS_ENABLED(CONFIG_FAILSLAB) &&
cachep != kmem_cache &&
should_failslab())
return NULL;
Thanks.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 41+ messages in thread
* [RFC PATCH V2 3/9] slab: use slab_pre_alloc_hook in SLAB allocator
2015-12-08 16:18 ` [RFC PATCH V2 0/9] slab: cleanup and bulk API for SLAB Jesper Dangaard Brouer
2015-12-08 16:18 ` [RFC PATCH V2 1/9] mm/slab: move SLUB alloc hooks to common mm/slab.h Jesper Dangaard Brouer
2015-12-08 16:18 ` [RFC PATCH V2 2/9] mm: generalize avoid fault-inject on bootstrap kmem_cache Jesper Dangaard Brouer
@ 2015-12-08 16:18 ` Jesper Dangaard Brouer
2015-12-08 16:18 ` [RFC PATCH V2 4/9] mm: kmemcheck skip object if slab allocation failed Jesper Dangaard Brouer
` (5 subsequent siblings)
8 siblings, 0 replies; 41+ messages in thread
From: Jesper Dangaard Brouer @ 2015-12-08 16:18 UTC (permalink / raw)
To: linux-mm
Cc: Christoph Lameter, Vladimir Davydov, Jesper Dangaard Brouer,
Joonsoo Kim, Linus Torvalds, Andrew Morton
Dedublicate code in slab_alloc() and slab_alloc_node() by using
the slab_pre_alloc_hook() call.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
mm/slab.c | 18 ++++--------------
1 file changed, 4 insertions(+), 14 deletions(-)
diff --git a/mm/slab.c b/mm/slab.c
index 4684c2496982..17fd6268ad41 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3140,15 +3140,10 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
void *ptr;
int slab_node = numa_mem_id();
- flags &= gfp_allowed_mask;
-
- lockdep_trace_alloc(flags);
-
- if (slab_should_failslab(cachep, flags))
+ cachep = slab_pre_alloc_hook(cachep, flags);
+ if (!cachep)
return NULL;
- cachep = memcg_kmem_get_cache(cachep, flags);
-
cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
@@ -3228,15 +3223,10 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
unsigned long save_flags;
void *objp;
- flags &= gfp_allowed_mask;
-
- lockdep_trace_alloc(flags);
-
- if (slab_should_failslab(cachep, flags))
+ cachep = slab_pre_alloc_hook(cachep, flags);
+ if (!cachep)
return NULL;
- cachep = memcg_kmem_get_cache(cachep, flags);
-
cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
objp = __do_cache_alloc(cachep, flags);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 41+ messages in thread
* [RFC PATCH V2 4/9] mm: kmemcheck skip object if slab allocation failed
2015-12-08 16:18 ` [RFC PATCH V2 0/9] slab: cleanup and bulk API for SLAB Jesper Dangaard Brouer
` (2 preceding siblings ...)
2015-12-08 16:18 ` [RFC PATCH V2 3/9] slab: use slab_pre_alloc_hook in SLAB allocator Jesper Dangaard Brouer
@ 2015-12-08 16:18 ` Jesper Dangaard Brouer
2015-12-08 16:18 ` [RFC PATCH V2 5/9] slab: use slab_post_alloc_hook in SLAB allocator Jesper Dangaard Brouer
` (4 subsequent siblings)
8 siblings, 0 replies; 41+ messages in thread
From: Jesper Dangaard Brouer @ 2015-12-08 16:18 UTC (permalink / raw)
To: linux-mm
Cc: Christoph Lameter, Vladimir Davydov, Jesper Dangaard Brouer,
Joonsoo Kim, Linus Torvalds, Andrew Morton
In the SLAB allocator kmemcheck_slab_alloc() is guarded against
being called in case the object is NULL. In SLUB allocator this
NULL pointer invocation can happen, which seems like an oversight.
Move the NULL pointer check into kmemcheck code (kmemcheck_slab_alloc)
so the check gets moved out of the fastpath, when not compiled
with CONFIG_KMEMCHECK.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
mm/kmemcheck.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c
index cab58bb592d8..6f4f424037c0 100644
--- a/mm/kmemcheck.c
+++ b/mm/kmemcheck.c
@@ -60,6 +60,9 @@ void kmemcheck_free_shadow(struct page *page, int order)
void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
size_t size)
{
+ if (unlikely(!object)) /* Skip object if allocation failed */
+ return;
+
/*
* Has already been memset(), which initializes the shadow for us
* as well.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 41+ messages in thread
* [RFC PATCH V2 5/9] slab: use slab_post_alloc_hook in SLAB allocator
2015-12-08 16:18 ` [RFC PATCH V2 0/9] slab: cleanup and bulk API for SLAB Jesper Dangaard Brouer
` (3 preceding siblings ...)
2015-12-08 16:18 ` [RFC PATCH V2 4/9] mm: kmemcheck skip object if slab allocation failed Jesper Dangaard Brouer
@ 2015-12-08 16:18 ` Jesper Dangaard Brouer
2015-12-08 16:18 ` [RFC PATCH V2 6/9] slab: implement bulk alloc " Jesper Dangaard Brouer
` (3 subsequent siblings)
8 siblings, 0 replies; 41+ messages in thread
From: Jesper Dangaard Brouer @ 2015-12-08 16:18 UTC (permalink / raw)
To: linux-mm
Cc: Christoph Lameter, Vladimir Davydov, Jesper Dangaard Brouer,
Joonsoo Kim, Linus Torvalds, Andrew Morton
Reviewers notice that the order in slab_post_alloc_hook() of
kmemcheck_slab_alloc() and kmemleak_alloc_recursive() gets
swapped compared to slab.c.
Also notice memset now occurs before calling kmemcheck_slab_alloc()
and kmemleak_alloc_recursive().
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
mm/slab.c | 22 ++++++----------------
1 file changed, 6 insertions(+), 16 deletions(-)
diff --git a/mm/slab.c b/mm/slab.c
index 17fd6268ad41..47e7bcab8c3b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3172,16 +3172,11 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
out:
local_irq_restore(save_flags);
ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
- kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
- flags);
- if (likely(ptr)) {
- kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size);
- if (unlikely(flags & __GFP_ZERO))
- memset(ptr, 0, cachep->object_size);
- }
+ if (unlikely(flags & __GFP_ZERO) && ptr)
+ memset(ptr, 0, cachep->object_size);
- memcg_kmem_put_cache(cachep);
+ slab_post_alloc_hook(cachep, flags, 1, &ptr);
return ptr;
}
@@ -3232,17 +3227,12 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
objp = __do_cache_alloc(cachep, flags);
local_irq_restore(save_flags);
objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
- kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags,
- flags);
prefetchw(objp);
- if (likely(objp)) {
- kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size);
- if (unlikely(flags & __GFP_ZERO))
- memset(objp, 0, cachep->object_size);
- }
+ if (unlikely(flags & __GFP_ZERO) && objp)
+ memset(objp, 0, cachep->object_size);
- memcg_kmem_put_cache(cachep);
+ slab_post_alloc_hook(cachep, flags, 1, &objp);
return objp;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 41+ messages in thread
* [RFC PATCH V2 6/9] slab: implement bulk alloc in SLAB allocator
2015-12-08 16:18 ` [RFC PATCH V2 0/9] slab: cleanup and bulk API for SLAB Jesper Dangaard Brouer
` (4 preceding siblings ...)
2015-12-08 16:18 ` [RFC PATCH V2 5/9] slab: use slab_post_alloc_hook in SLAB allocator Jesper Dangaard Brouer
@ 2015-12-08 16:18 ` Jesper Dangaard Brouer
2015-12-08 16:18 ` [RFC PATCH V2 7/9] slab: avoid running debug SLAB code with IRQs disabled for alloc_bulk Jesper Dangaard Brouer
` (2 subsequent siblings)
8 siblings, 0 replies; 41+ messages in thread
From: Jesper Dangaard Brouer @ 2015-12-08 16:18 UTC (permalink / raw)
To: linux-mm
Cc: Christoph Lameter, Vladimir Davydov, Jesper Dangaard Brouer,
Joonsoo Kim, Linus Torvalds, Andrew Morton
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
mm/slab.c | 37 +++++++++++++++++++++++++++++++++++--
1 file changed, 35 insertions(+), 2 deletions(-)
diff --git a/mm/slab.c b/mm/slab.c
index 47e7bcab8c3b..70be9235e083 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3392,9 +3392,42 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
EXPORT_SYMBOL(kmem_cache_free_bulk);
int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
- void **p)
+ void **p)
{
- return __kmem_cache_alloc_bulk(s, flags, size, p);
+ size_t i;
+
+ s = slab_pre_alloc_hook(s, flags);
+ if (!s)
+ return 0;
+
+ cache_alloc_debugcheck_before(s, flags);
+
+ local_irq_disable();
+ for (i = 0; i < size; i++) {
+ void *objp = __do_cache_alloc(s, flags);
+
+ /* this call could be done outside IRQ disabled section */
+ objp = cache_alloc_debugcheck_after(s, flags, objp, _RET_IP_);
+
+ if (unlikely(!objp))
+ goto error;
+ p[i] = objp;
+ }
+ local_irq_enable();
+
+ /* Clear memory outside IRQ disabled section */
+ if (unlikely(flags & __GFP_ZERO))
+ for (i = 0; i < size; i++)
+ memset(p[i], 0, s->object_size);
+
+ slab_post_alloc_hook(s, flags, size, p);
+ /* FIXME: Trace call missing. Christoph would like a bulk variant */
+ return size;
+error:
+ local_irq_enable();
+ slab_post_alloc_hook(s, flags, i, p);
+ __kmem_cache_free_bulk(s, i, p);
+ return 0;
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 41+ messages in thread
* [RFC PATCH V2 7/9] slab: avoid running debug SLAB code with IRQs disabled for alloc_bulk
2015-12-08 16:18 ` [RFC PATCH V2 0/9] slab: cleanup and bulk API for SLAB Jesper Dangaard Brouer
` (5 preceding siblings ...)
2015-12-08 16:18 ` [RFC PATCH V2 6/9] slab: implement bulk alloc " Jesper Dangaard Brouer
@ 2015-12-08 16:18 ` Jesper Dangaard Brouer
2015-12-08 16:19 ` [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator Jesper Dangaard Brouer
2015-12-08 16:19 ` [RFC PATCH V2 9/9] slab: annotate code to generate more compact asm code Jesper Dangaard Brouer
8 siblings, 0 replies; 41+ messages in thread
From: Jesper Dangaard Brouer @ 2015-12-08 16:18 UTC (permalink / raw)
To: linux-mm
Cc: Christoph Lameter, Vladimir Davydov, Jesper Dangaard Brouer,
Joonsoo Kim, Linus Torvalds, Andrew Morton
Move the call to cache_alloc_debugcheck_after() outside the IRQ
disabled section in kmem_cache_alloc_bulk().
When CONFIG_DEBUG_SLAB is disabled the compiler should remove
this code.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
mm/slab.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/mm/slab.c b/mm/slab.c
index 70be9235e083..35a4fb0970bd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3391,6 +3391,16 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
+static __always_inline void
+cache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags,
+ size_t size, void **p, unsigned long caller)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++)
+ p[i] = cache_alloc_debugcheck_after(s, flags, p[i], _RET_IP_);
+}
+
int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
{
@@ -3406,15 +3416,14 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
for (i = 0; i < size; i++) {
void *objp = __do_cache_alloc(s, flags);
- /* this call could be done outside IRQ disabled section */
- objp = cache_alloc_debugcheck_after(s, flags, objp, _RET_IP_);
-
if (unlikely(!objp))
goto error;
p[i] = objp;
}
local_irq_enable();
+ cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_);
+
/* Clear memory outside IRQ disabled section */
if (unlikely(flags & __GFP_ZERO))
for (i = 0; i < size; i++)
@@ -3425,6 +3434,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
return size;
error:
local_irq_enable();
+ cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
slab_post_alloc_hook(s, flags, i, p);
__kmem_cache_free_bulk(s, i, p);
return 0;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 41+ messages in thread
* [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator
2015-12-08 16:18 ` [RFC PATCH V2 0/9] slab: cleanup and bulk API for SLAB Jesper Dangaard Brouer
` (6 preceding siblings ...)
2015-12-08 16:18 ` [RFC PATCH V2 7/9] slab: avoid running debug SLAB code with IRQs disabled for alloc_bulk Jesper Dangaard Brouer
@ 2015-12-08 16:19 ` Jesper Dangaard Brouer
2015-12-09 16:06 ` Christoph Lameter
2015-12-08 16:19 ` [RFC PATCH V2 9/9] slab: annotate code to generate more compact asm code Jesper Dangaard Brouer
8 siblings, 1 reply; 41+ messages in thread
From: Jesper Dangaard Brouer @ 2015-12-08 16:19 UTC (permalink / raw)
To: linux-mm
Cc: Christoph Lameter, Vladimir Davydov, Jesper Dangaard Brouer,
Joonsoo Kim, Linus Torvalds, Andrew Morton
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
mm/slab.c | 29 +++++++++++++++++++++++------
1 file changed, 23 insertions(+), 6 deletions(-)
diff --git a/mm/slab.c b/mm/slab.c
index 35a4fb0970bd..72c7958b4075 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3385,12 +3385,6 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
}
EXPORT_SYMBOL(kmem_cache_alloc);
-void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
-{
- __kmem_cache_free_bulk(s, size, p);
-}
-EXPORT_SYMBOL(kmem_cache_free_bulk);
-
static __always_inline void
cache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags,
size_t size, void **p, unsigned long caller)
@@ -3584,6 +3578,29 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
}
EXPORT_SYMBOL(kmem_cache_free);
+void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
+{
+ struct kmem_cache *s;
+ size_t i;
+
+ local_irq_disable();
+ for (i = 0; i < size; i++) {
+ void *objp = p[i];
+
+ s = cache_from_obj(orig_s, objp);
+
+ debug_check_no_locks_freed(objp, s->object_size);
+ if (!(s->flags & SLAB_DEBUG_OBJECTS))
+ debug_check_no_obj_freed(objp, s->object_size);
+
+ __cache_free(s, objp, _RET_IP_);
+ }
+ local_irq_enable();
+
+ /* FIXME: add tracing */
+}
+EXPORT_SYMBOL(kmem_cache_free_bulk);
+
/**
* kfree - free previously allocated memory
* @objp: pointer returned by kmalloc.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 41+ messages in thread
* Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator
2015-12-08 16:19 ` [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator Jesper Dangaard Brouer
@ 2015-12-09 16:06 ` Christoph Lameter
2015-12-09 18:53 ` Jesper Dangaard Brouer
2015-12-14 15:19 ` Jesper Dangaard Brouer
0 siblings, 2 replies; 41+ messages in thread
From: Christoph Lameter @ 2015-12-09 16:06 UTC (permalink / raw)
To: Jesper Dangaard Brouer
Cc: linux-mm, Vladimir Davydov, Joonsoo Kim, Linus Torvalds, Andrew Morton
On Tue, 8 Dec 2015, Jesper Dangaard Brouer wrote:
> +void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
Drop orig_s as a parameter? This makes the function have less code and
makes it more universally useful for freeing large amount of objects.
> +{
> + struct kmem_cache *s;
> + size_t i;
> +
> + local_irq_disable();
> + for (i = 0; i < size; i++) {
> + void *objp = p[i];
> +
> + s = cache_from_obj(orig_s, objp);
And just use the cache the object belongs to.
s = virt_to_head_page(objp)->slab_cache;
> +
> + debug_check_no_locks_freed(objp, s->object_size);
> + if (!(s->flags & SLAB_DEBUG_OBJECTS))
> + debug_check_no_obj_freed(objp, s->object_size);
> +
> + __cache_free(s, objp, _RET_IP_);
> + }
> + local_irq_enable();
> +
> + /* FIXME: add tracing */
> +}
> +EXPORT_SYMBOL(kmem_cache_free_bulk);
Could we do the following API change patch before this series so that
kmem_cache_free_bulk is properly generalized?
From: Christoph Lameter <cl@linux.com>
Subject: slab bulk api: Remove the kmem_cache parameter from kmem_cache_bulk_free()
It is desirable and necessary to free objects from different kmem_caches.
It is required in order to support memcg object freeing across different5
cgroups.
So drop the pointless parameter and allow freeing of arbitrary lists
of slab allocated objects.
This patch also does the proper compound page handling so that
arbitrary objects allocated via kmalloc() can be handled by
kmem_cache_bulk_free().
Signed-off-by: Christoph Lameter <cl@linux.com>
Index: linux/include/linux/slab.h
===================================================================
--- linux.orig/include/linux/slab.h
+++ linux/include/linux/slab.h
@@ -315,7 +315,7 @@ void kmem_cache_free(struct kmem_cache *
*
* Note that interrupts must be enabled when calling these functions.
*/
-void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
+void kmem_cache_free_bulk(size_t, void **);
int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
#ifdef CONFIG_NUMA
Index: linux/mm/slab.c
===================================================================
--- linux.orig/mm/slab.c
+++ linux/mm/slab.c
@@ -3413,9 +3413,9 @@ void *kmem_cache_alloc(struct kmem_cache
}
EXPORT_SYMBOL(kmem_cache_alloc);
-void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
+void kmem_cache_free_bulk(size_t size, void **p)
{
- __kmem_cache_free_bulk(s, size, p);
+ __kmem_cache_free_bulk(size, p);
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
Index: linux/mm/slab.h
===================================================================
--- linux.orig/mm/slab.h
+++ linux/mm/slab.h
@@ -166,10 +166,10 @@ ssize_t slabinfo_write(struct file *file
/*
* Generic implementation of bulk operations
* These are useful for situations in which the allocator cannot
- * perform optimizations. In that case segments of the objecct listed
+ * perform optimizations. In that case segments of the object listed
* may be allocated or freed using these operations.
*/
-void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
+void __kmem_cache_free_bulk(size_t, void **);
int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
#ifdef CONFIG_MEMCG_KMEM
Index: linux/mm/slub.c
===================================================================
--- linux.orig/mm/slub.c
+++ linux/mm/slub.c
@@ -2887,23 +2887,30 @@ static int build_detached_freelist(struc
/* Note that interrupts must be enabled when calling this function. */
-void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
+void kmem_cache_free_bulk(size_t size, void **p)
{
if (WARN_ON(!size))
return;
do {
struct detached_freelist df;
- struct kmem_cache *s;
+ struct page *page;
- /* Support for memcg */
- s = cache_from_obj(orig_s, p[size - 1]);
+ page = virt_to_head_page(p[size - 1]);
- size = build_detached_freelist(s, size, p, &df);
+ if (unlikely(!PageSlab(page))) {
+ BUG_ON(!PageCompound(page));
+ kfree_hook(p[size - 1]);
+ __free_kmem_pages(page, compound_order(page));
+ p[--size] = NULL;
+ continue;
+ }
+
+ size = build_detached_freelist(page->slab_cache, size, p, &df);
if (unlikely(!df.page))
continue;
- slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
+ slab_free(page->slab_cache, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
} while (likely(size));
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
@@ -2963,7 +2970,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
error:
local_irq_enable();
slab_post_alloc_hook(s, flags, i, p);
- __kmem_cache_free_bulk(s, i, p);
+ __kmem_cache_free_bulk(i, p);
return 0;
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator
2015-12-09 16:06 ` Christoph Lameter
@ 2015-12-09 18:53 ` Jesper Dangaard Brouer
2015-12-09 19:41 ` Christoph Lameter
2015-12-14 15:19 ` Jesper Dangaard Brouer
1 sibling, 1 reply; 41+ messages in thread
From: Jesper Dangaard Brouer @ 2015-12-09 18:53 UTC (permalink / raw)
To: Christoph Lameter
Cc: linux-mm, Vladimir Davydov, Joonsoo Kim, Linus Torvalds,
Andrew Morton, brouer
On Wed, 9 Dec 2015 10:06:39 -0600 (CST)
Christoph Lameter <cl@linux.com> wrote:
> On Tue, 8 Dec 2015, Jesper Dangaard Brouer wrote:
>
> > +void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
>
> Drop orig_s as a parameter? This makes the function have less code and
> makes it more universally useful for freeing large amount of objects.
I really like the idea of making it able to free kmalloc'ed objects.
But I hate to change the API again... (I do have a use-case in the
network stack where I could use this feature).
> Could we do the following API change patch before this series so that
> kmem_cache_free_bulk is properly generalized?
I'm traveling (to Sweden) Thursday (afternoon) and will be back late
Friday (and have to play Viking in the weekend), thus to be realistic
I'll start working on this Monday, so we can get some benchmark numbers
to guide this decision.
> From: Christoph Lameter <cl@linux.com>
> Subject: slab bulk api: Remove the kmem_cache parameter from kmem_cache_bulk_free()
>
> It is desirable and necessary to free objects from different kmem_caches.
> It is required in order to support memcg object freeing across different5
> cgroups.
>
> So drop the pointless parameter and allow freeing of arbitrary lists
> of slab allocated objects.
>
> This patch also does the proper compound page handling so that
> arbitrary objects allocated via kmalloc() can be handled by
> kmem_cache_bulk_free().
>
> Signed-off-by: Christoph Lameter <cl@linux.com>
>
> Index: linux/include/linux/slab.h
> ===================================================================
> --- linux.orig/include/linux/slab.h
> +++ linux/include/linux/slab.h
> @@ -315,7 +315,7 @@ void kmem_cache_free(struct kmem_cache *
> *
> * Note that interrupts must be enabled when calling these functions.
> */
> -void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
> +void kmem_cache_free_bulk(size_t, void **);
> int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
>
> #ifdef CONFIG_NUMA
> Index: linux/mm/slab.c
> ===================================================================
> --- linux.orig/mm/slab.c
> +++ linux/mm/slab.c
> @@ -3413,9 +3413,9 @@ void *kmem_cache_alloc(struct kmem_cache
> }
> EXPORT_SYMBOL(kmem_cache_alloc);
>
> -void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
> +void kmem_cache_free_bulk(size_t size, void **p)
> {
> - __kmem_cache_free_bulk(s, size, p);
> + __kmem_cache_free_bulk(size, p);
> }
> EXPORT_SYMBOL(kmem_cache_free_bulk);
>
> Index: linux/mm/slab.h
> ===================================================================
> --- linux.orig/mm/slab.h
> +++ linux/mm/slab.h
> @@ -166,10 +166,10 @@ ssize_t slabinfo_write(struct file *file
> /*
> * Generic implementation of bulk operations
> * These are useful for situations in which the allocator cannot
> - * perform optimizations. In that case segments of the objecct listed
> + * perform optimizations. In that case segments of the object listed
> * may be allocated or freed using these operations.
> */
> -void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
> +void __kmem_cache_free_bulk(size_t, void **);
> int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
>
> #ifdef CONFIG_MEMCG_KMEM
> Index: linux/mm/slub.c
> ===================================================================
> --- linux.orig/mm/slub.c
> +++ linux/mm/slub.c
> @@ -2887,23 +2887,30 @@ static int build_detached_freelist(struc
>
>
> /* Note that interrupts must be enabled when calling this function. */
> -void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
> +void kmem_cache_free_bulk(size_t size, void **p)
> {
> if (WARN_ON(!size))
> return;
>
> do {
> struct detached_freelist df;
> - struct kmem_cache *s;
> + struct page *page;
>
> - /* Support for memcg */
> - s = cache_from_obj(orig_s, p[size - 1]);
> + page = virt_to_head_page(p[size - 1]);
Think we can drop this.
> - size = build_detached_freelist(s, size, p, &df);
> + if (unlikely(!PageSlab(page))) {
> + BUG_ON(!PageCompound(page));
> + kfree_hook(p[size - 1]);
> + __free_kmem_pages(page, compound_order(page));
> + p[--size] = NULL;
> + continue;
> + }
and move above into build_detached_freelist() and make it a little more
pretty code wise (avoiding the p[size -1] juggling).
> +
> + size = build_detached_freelist(page->slab_cache, size, p, &df);
also think we should be able to drop the kmem_cache param "page->slab_cache".
> if (unlikely(!df.page))
> continue;
>
> - slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
> + slab_free(page->slab_cache, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
> } while (likely(size));
> }
> EXPORT_SYMBOL(kmem_cache_free_bulk);
> @@ -2963,7 +2970,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
> error:
> local_irq_enable();
> slab_post_alloc_hook(s, flags, i, p);
> - __kmem_cache_free_bulk(s, i, p);
> + __kmem_cache_free_bulk(i, p);
> return 0;
> }
> EXPORT_SYMBOL(kmem_cache_alloc_bulk);
--
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
Author of http://www.iptv-analyzer.org
LinkedIn: http://www.linkedin.com/in/brouer
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator
2015-12-09 18:53 ` Jesper Dangaard Brouer
@ 2015-12-09 19:41 ` Christoph Lameter
2015-12-09 20:50 ` Jesper Dangaard Brouer
2015-12-10 15:10 ` Jesper Dangaard Brouer
0 siblings, 2 replies; 41+ messages in thread
From: Christoph Lameter @ 2015-12-09 19:41 UTC (permalink / raw)
To: Jesper Dangaard Brouer
Cc: linux-mm, Vladimir Davydov, Joonsoo Kim, Linus Torvalds, Andrew Morton
On Wed, 9 Dec 2015, Jesper Dangaard Brouer wrote:
> I really like the idea of making it able to free kmalloc'ed objects.
> But I hate to change the API again... (I do have a use-case in the
> network stack where I could use this feature).
Now is the time to fix the API since its not that much in use yet if at
all.
> I'm traveling (to Sweden) Thursday (afternoon) and will be back late
> Friday (and have to play Viking in the weekend), thus to be realistic
> I'll start working on this Monday, so we can get some benchmark numbers
> to guide this decision.
Ok great.
> > - struct kmem_cache *s;
> > + struct page *page;
> >
> > - /* Support for memcg */
> > - s = cache_from_obj(orig_s, p[size - 1]);
> > + page = virt_to_head_page(p[size - 1]);
>
> Think we can drop this.
Well then you wont be able to check for a compound page. And you do not
want this check in build detached freelist.
>
> > - size = build_detached_freelist(s, size, p, &df);
> > + if (unlikely(!PageSlab(page))) {
> > + BUG_ON(!PageCompound(page));
> > + kfree_hook(p[size - 1]);
> > + __free_kmem_pages(page, compound_order(page));
> > + p[--size] = NULL;
> > + continue;
> > + }
>
> and move above into build_detached_freelist() and make it a little more
> pretty code wise (avoiding the p[size -1] juggling).
If we do this check here then we wont be needing it in
build_detached_freelist.
> > +
> > + size = build_detached_freelist(page->slab_cache, size, p, &df);
>
> also think we should be able to drop the kmem_cache param "page->slab_cache".
Yep.
>
>
> > if (unlikely(!df.page))
> > continue;
> >
> > - slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
> > + slab_free(page->slab_cache, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
Then we need df.slab_cache or something.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator
2015-12-09 19:41 ` Christoph Lameter
@ 2015-12-09 20:50 ` Jesper Dangaard Brouer
2015-12-10 15:15 ` Christoph Lameter
2015-12-10 15:10 ` Jesper Dangaard Brouer
1 sibling, 1 reply; 41+ messages in thread
From: Jesper Dangaard Brouer @ 2015-12-09 20:50 UTC (permalink / raw)
To: Christoph Lameter
Cc: linux-mm, Vladimir Davydov, Joonsoo Kim, Linus Torvalds,
Andrew Morton, brouer, David Miller
On Wed, 9 Dec 2015 13:41:07 -0600 (CST) Christoph Lameter <cl@linux.com> wrote:
> On Wed, 9 Dec 2015, Jesper Dangaard Brouer wrote:
>
> > I really like the idea of making it able to free kmalloc'ed objects.
> > But I hate to change the API again... (I do have a use-case in the
> > network stack where I could use this feature).
>
> Now is the time to fix the API since its not that much in use yet if
> at all.
True. I was just so close submitting the network use-case to DaveM.
Guess, that will have to wait if we choose this API change (and
I'll have to wait another 3 month before the trees are in sync again).
> > I'm traveling (to Sweden) Thursday (afternoon) and will be back late
> > Friday (and have to play Viking in the weekend), thus to be realistic
> > I'll start working on this Monday, so we can get some benchmark numbers
> > to guide this decision.
>
> Ok great.
I'm actually very eager to see if this works out :-)
> > > - struct kmem_cache *s;
> > > + struct page *page;
> > >
> > > - /* Support for memcg */
> > > - s = cache_from_obj(orig_s, p[size - 1]);
> > > + page = virt_to_head_page(p[size - 1]);
> >
> > Think we can drop this.
>
> Well then you wont be able to check for a compound page. And you do not
> want this check in build detached freelist.
>
> >
> > > - size = build_detached_freelist(s, size, p, &df);
> > > + if (unlikely(!PageSlab(page))) {
> > > + BUG_ON(!PageCompound(page));
> > > + kfree_hook(p[size - 1]);
> > > + __free_kmem_pages(page, compound_order(page));
> > > + p[--size] = NULL;
> > > + continue;
> > > + }
> >
> > and move above into build_detached_freelist() and make it a little more
> > pretty code wise (avoiding the p[size -1] juggling).
>
> If we do this check here then we wont be needing it in
> build_detached_freelist.
I'll try see what looks best coding style wise...
> > > +
> > > + size = build_detached_freelist(page->slab_cache, size, p, &df);
> >
> > also think we should be able to drop the kmem_cache param "page->slab_cache".
>
> Yep.
>
> >
> >
> > > if (unlikely(!df.page))
> > > continue;
> > >
> > > - slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
> > > + slab_free(page->slab_cache, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
>
> Then we need df.slab_cache or something.
What about df.page->slab_cache (?)
--
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
Author of http://www.iptv-analyzer.org
LinkedIn: http://www.linkedin.com/in/brouer
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator
2015-12-09 20:50 ` Jesper Dangaard Brouer
@ 2015-12-10 15:15 ` Christoph Lameter
0 siblings, 0 replies; 41+ messages in thread
From: Christoph Lameter @ 2015-12-10 15:15 UTC (permalink / raw)
To: Jesper Dangaard Brouer
Cc: linux-mm, Vladimir Davydov, Joonsoo Kim, Linus Torvalds,
Andrew Morton, David Miller
On Wed, 9 Dec 2015, Jesper Dangaard Brouer wrote:
> True. I was just so close submitting the network use-case to DaveM.
> Guess, that will have to wait if we choose this API change (and
> I'll have to wait another 3 month before the trees are in sync again).
why? Andrew can push that to next pretty soon and DaveM could carry that
patch too.
> > Then we need df.slab_cache or something.
>
> What about df.page->slab_cache (?)
Fine come up with a patch.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator
2015-12-09 19:41 ` Christoph Lameter
2015-12-09 20:50 ` Jesper Dangaard Brouer
@ 2015-12-10 15:10 ` Jesper Dangaard Brouer
2015-12-10 15:18 ` Christoph Lameter
1 sibling, 1 reply; 41+ messages in thread
From: Jesper Dangaard Brouer @ 2015-12-10 15:10 UTC (permalink / raw)
To: Christoph Lameter
Cc: linux-mm, Vladimir Davydov, Joonsoo Kim, Linus Torvalds,
Andrew Morton, brouer
On Wed, 9 Dec 2015 13:41:07 -0600 (CST)
Christoph Lameter <cl@linux.com> wrote:
> On Wed, 9 Dec 2015, Jesper Dangaard Brouer wrote:
>
> > I really like the idea of making it able to free kmalloc'ed objects.
> > But I hate to change the API again... (I do have a use-case in the
> > network stack where I could use this feature).
>
> Now is the time to fix the API since its not that much in use yet if at
> all.
Lets start the naming thread/flame (while waiting for my flight ;-))
If we drop the "kmem_cache *s" parameter from kmem_cache_free_bulk(),
and also make it handle kmalloc'ed objects. Why should we name it
"kmem_cache_free_bulk"? ... what about naming it kfree_bulk() ???
Or should we keep the name to have a symmetric API
kmem_cache_{alloc,free}_bulk() call convention?
I'm undecided...
--
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
Author of http://www.iptv-analyzer.org
LinkedIn: http://www.linkedin.com/in/brouer
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator
2015-12-10 15:10 ` Jesper Dangaard Brouer
@ 2015-12-10 15:18 ` Christoph Lameter
2015-12-10 15:26 ` Vladimir Davydov
0 siblings, 1 reply; 41+ messages in thread
From: Christoph Lameter @ 2015-12-10 15:18 UTC (permalink / raw)
To: Jesper Dangaard Brouer
Cc: linux-mm, Vladimir Davydov, Joonsoo Kim, Linus Torvalds, Andrew Morton
On Thu, 10 Dec 2015, Jesper Dangaard Brouer wrote:
> If we drop the "kmem_cache *s" parameter from kmem_cache_free_bulk(),
> and also make it handle kmalloc'ed objects. Why should we name it
> "kmem_cache_free_bulk"? ... what about naming it kfree_bulk() ???
Yes makes sense.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator
2015-12-10 15:18 ` Christoph Lameter
@ 2015-12-10 15:26 ` Vladimir Davydov
2015-12-10 17:24 ` Christoph Lameter
0 siblings, 1 reply; 41+ messages in thread
From: Vladimir Davydov @ 2015-12-10 15:26 UTC (permalink / raw)
To: Christoph Lameter
Cc: Jesper Dangaard Brouer, linux-mm, Joonsoo Kim, Linus Torvalds,
Andrew Morton
On Thu, Dec 10, 2015 at 09:18:26AM -0600, Christoph Lameter wrote:
> On Thu, 10 Dec 2015, Jesper Dangaard Brouer wrote:
>
> > If we drop the "kmem_cache *s" parameter from kmem_cache_free_bulk(),
> > and also make it handle kmalloc'ed objects. Why should we name it
> > "kmem_cache_free_bulk"? ... what about naming it kfree_bulk() ???
>
> Yes makes sense.
IMHO kmem_cache_alloc_bulk/kfree_bulk looks awkward, especially taking
into account the fact that we pair kmem_cache_alloc/kmem_cache_free and
kmalloc/kfree, but never kmem_cache_alloc/kfree.
So I'd vote for kmem_cache_free_bulk taking a kmem_cache as an argument,
but I'm not a potential user of this API, so please don't count my vote
:-)
Thanks,
Vladimir
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator
2015-12-10 15:26 ` Vladimir Davydov
@ 2015-12-10 17:24 ` Christoph Lameter
0 siblings, 0 replies; 41+ messages in thread
From: Christoph Lameter @ 2015-12-10 17:24 UTC (permalink / raw)
To: Vladimir Davydov
Cc: Jesper Dangaard Brouer, linux-mm, Joonsoo Kim, Linus Torvalds,
Andrew Morton
On Thu, 10 Dec 2015, Vladimir Davydov wrote:
> IMHO kmem_cache_alloc_bulk/kfree_bulk looks awkward, especially taking
> into account the fact that we pair kmem_cache_alloc/kmem_cache_free and
> kmalloc/kfree, but never kmem_cache_alloc/kfree.
>
> So I'd vote for kmem_cache_free_bulk taking a kmem_cache as an argument,
> but I'm not a potential user of this API, so please don't count my vote
> :-)
One way to have it less awkward is to keep naming it kmem_cache_free_bulk
but omit the kmem_cache parameter like what I did initially.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator
2015-12-09 16:06 ` Christoph Lameter
2015-12-09 18:53 ` Jesper Dangaard Brouer
@ 2015-12-14 15:19 ` Jesper Dangaard Brouer
2015-12-15 12:02 ` Jesper Dangaard Brouer
1 sibling, 1 reply; 41+ messages in thread
From: Jesper Dangaard Brouer @ 2015-12-14 15:19 UTC (permalink / raw)
To: Christoph Lameter
Cc: linux-mm, Vladimir Davydov, Joonsoo Kim, Linus Torvalds,
Andrew Morton, brouer
On Wed, 9 Dec 2015 10:06:39 -0600 (CST) Christoph Lameter <cl@linux.com> wrote:
> From: Christoph Lameter <cl@linux.com>
> Subject: slab bulk api: Remove the kmem_cache parameter from kmem_cache_bulk_free()
>
> It is desirable and necessary to free objects from different kmem_caches.
> It is required in order to support memcg object freeing across different5
> cgroups.
>
> So drop the pointless parameter and allow freeing of arbitrary lists
> of slab allocated objects.
>
> This patch also does the proper compound page handling so that
> arbitrary objects allocated via kmalloc() can be handled by
> kmem_cache_bulk_free().
>
> Signed-off-by: Christoph Lameter <cl@linux.com>
I've modified this patch, to instead introduce a kfree_bulk() and keep
the old behavior of kmem_cache_free_bulk(). This allow us to easier
compare the two impl. approaches.
[...]
> Index: linux/mm/slub.c
> ===================================================================
> --- linux.orig/mm/slub.c
> +++ linux/mm/slub.c
> @@ -2887,23 +2887,30 @@ static int build_detached_freelist(struc
>
>
> /* Note that interrupts must be enabled when calling this function. */
> -void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
> +void kmem_cache_free_bulk(size_t size, void **p)
Renamed to kfree_bulk(size_t size, void **p)
> {
> if (WARN_ON(!size))
> return;
>
> do {
> struct detached_freelist df;
> - struct kmem_cache *s;
> + struct page *page;
>
> - /* Support for memcg */
> - s = cache_from_obj(orig_s, p[size - 1]);
> + page = virt_to_head_page(p[size - 1]);
>
> - size = build_detached_freelist(s, size, p, &df);
> + if (unlikely(!PageSlab(page))) {
> + BUG_ON(!PageCompound(page));
> + kfree_hook(p[size - 1]);
> + __free_kmem_pages(page, compound_order(page));
> + p[--size] = NULL;
> + continue;
> + }
> +
> + size = build_detached_freelist(page->slab_cache, size, p, &df);
> if (unlikely(!df.page))
> continue;
>
> - slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
> + slab_free(page->slab_cache, df.page, df.freelist,
> + df.tail, df.cnt, _RET_IP_);
> } while (likely(size));
> }
> EXPORT_SYMBOL(kmem_cache_free_bulk);
This specific implementation was too slow, mostly because we call
virt_to_head_page() both in this function and inside build_detached_freelist().
After integrating this directly into build_detached_freelist() I'm
getting more comparative results.
Results with disabled CONFIG_MEMCG_KMEM, and SLUB (and slab_nomerge for
more accurate results between runs):
bulk- fallback - kmem_cache_free_bulk - kfree_bulk
1 - 58 cycles 14.735 ns - 55 cycles 13.880 ns - 59 cycles 14.843 ns
2 - 53 cycles 13.298 ns - 32 cycles 8.037 ns - 34 cycles 8.592 ns
3 - 51 cycles 12.837 ns - 25 cycles 6.442 ns - 27 cycles 6.794 ns
4 - 50 cycles 12.514 ns - 23 cycles 5.952 ns - 23 cycles 5.958 ns
8 - 48 cycles 12.097 ns - 20 cycles 5.160 ns - 22 cycles 5.505 ns
16 - 47 cycles 11.888 ns - 19 cycles 4.900 ns - 19 cycles 4.969 ns
30 - 47 cycles 11.793 ns - 18 cycles 4.688 ns - 18 cycles 4.682 ns
32 - 47 cycles 11.926 ns - 18 cycles 4.674 ns - 18 cycles 4.702 ns
34 - 95 cycles 23.823 ns - 24 cycles 6.068 ns - 24 cycles 6.058 ns
48 - 81 cycles 20.258 ns - 21 cycles 5.360 ns - 21 cycles 5.338 ns
64 - 73 cycles 18.414 ns - 20 cycles 5.160 ns - 20 cycles 5.140 ns
128 - 90 cycles 22.563 ns - 27 cycles 6.765 ns - 27 cycles 6.801 ns
158 - 99 cycles 24.831 ns - 30 cycles 7.625 ns - 30 cycles 7.720 ns
250 - 104 cycles 26.173 ns - 37 cycles 9.271 ns - 37 cycles 9.371 ns
As can been seen the old kmem_cache_free_bulk() is faster than the new
kfree_bulk() (which omits the kmem_cache pointer and need to derive it
from the page->slab_cache). The base (bulk=1) extra cost is 4 cycles,
which then gets amortized as build_detached_freelist() combines objects
belonging to same page.
This is likely because the compiler, with disabled CONFIG_MEMCG_KMEM=n,
can optimize and avoid doing the lookup of the kmem_cache structure.
I'll start doing testing with CONFIG_MEMCG_KMEM enabled...
--
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
Author of http://www.iptv-analyzer.org
LinkedIn: http://www.linkedin.com/in/brouer
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator
2015-12-14 15:19 ` Jesper Dangaard Brouer
@ 2015-12-15 12:02 ` Jesper Dangaard Brouer
0 siblings, 0 replies; 41+ messages in thread
From: Jesper Dangaard Brouer @ 2015-12-15 12:02 UTC (permalink / raw)
To: Christoph Lameter
Cc: linux-mm, Vladimir Davydov, Joonsoo Kim, Linus Torvalds,
Andrew Morton, brouer
On Mon, 14 Dec 2015 16:19:58 +0100 Jesper Dangaard Brouer <brouer@redhat.com> wrote:
> On Wed, 9 Dec 2015 10:06:39 -0600 (CST) Christoph Lameter <cl@linux.com> wrote:
>
> > From: Christoph Lameter <cl@linux.com>
> > Subject: slab bulk api: Remove the kmem_cache parameter from kmem_cache_bulk_free()
> >
> > It is desirable and necessary to free objects from different kmem_caches.
> > It is required in order to support memcg object freeing across different5
> > cgroups.
> >
> > So drop the pointless parameter and allow freeing of arbitrary lists
> > of slab allocated objects.
> >
> > This patch also does the proper compound page handling so that
> > arbitrary objects allocated via kmalloc() can be handled by
> > kmem_cache_bulk_free().
> >
> > Signed-off-by: Christoph Lameter <cl@linux.com>
>
> I've modified this patch, to instead introduce a kfree_bulk() and keep
> the old behavior of kmem_cache_free_bulk(). This allow us to easier
> compare the two impl. approaches.
>
> [...]
> > Index: linux/mm/slub.c
> > ===================================================================
> > --- linux.orig/mm/slub.c
> > +++ linux/mm/slub.c
> > @@ -2887,23 +2887,30 @@ static int build_detached_freelist(struc
> >
> >
> > /* Note that interrupts must be enabled when calling this function. */
> > -void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
> > +void kmem_cache_free_bulk(size_t size, void **p)
>
> Renamed to kfree_bulk(size_t size, void **p)
>
> > {
> > if (WARN_ON(!size))
> > return;
> >
> > do {
> > struct detached_freelist df;
> > - struct kmem_cache *s;
> > + struct page *page;
> >
> > - /* Support for memcg */
> > - s = cache_from_obj(orig_s, p[size - 1]);
> > + page = virt_to_head_page(p[size - 1]);
> >
> > - size = build_detached_freelist(s, size, p, &df);
> > + if (unlikely(!PageSlab(page))) {
> > + BUG_ON(!PageCompound(page));
> > + kfree_hook(p[size - 1]);
> > + __free_kmem_pages(page, compound_order(page));
> > + p[--size] = NULL;
> > + continue;
> > + }
> > +
> > + size = build_detached_freelist(page->slab_cache, size, p, &df);
> > if (unlikely(!df.page))
> > continue;
> >
> > - slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
> > + slab_free(page->slab_cache, df.page, df.freelist,
> > + df.tail, df.cnt, _RET_IP_);
> > } while (likely(size));
> > }
> > EXPORT_SYMBOL(kmem_cache_free_bulk);
>
> This specific implementation was too slow, mostly because we call
> virt_to_head_page() both in this function and inside build_detached_freelist().
>
> After integrating this directly into build_detached_freelist() I'm
> getting more comparative results.
>
> Results with disabled CONFIG_MEMCG_KMEM, and SLUB (and slab_nomerge for
> more accurate results between runs):
>
> bulk- fallback - kmem_cache_free_bulk - kfree_bulk
> 1 - 58 cycles 14.735 ns - 55 cycles 13.880 ns - 59 cycles 14.843 ns
> 2 - 53 cycles 13.298 ns - 32 cycles 8.037 ns - 34 cycles 8.592 ns
> 3 - 51 cycles 12.837 ns - 25 cycles 6.442 ns - 27 cycles 6.794 ns
> 4 - 50 cycles 12.514 ns - 23 cycles 5.952 ns - 23 cycles 5.958 ns
> 8 - 48 cycles 12.097 ns - 20 cycles 5.160 ns - 22 cycles 5.505 ns
> 16 - 47 cycles 11.888 ns - 19 cycles 4.900 ns - 19 cycles 4.969 ns
> 30 - 47 cycles 11.793 ns - 18 cycles 4.688 ns - 18 cycles 4.682 ns
> 32 - 47 cycles 11.926 ns - 18 cycles 4.674 ns - 18 cycles 4.702 ns
> 34 - 95 cycles 23.823 ns - 24 cycles 6.068 ns - 24 cycles 6.058 ns
> 48 - 81 cycles 20.258 ns - 21 cycles 5.360 ns - 21 cycles 5.338 ns
> 64 - 73 cycles 18.414 ns - 20 cycles 5.160 ns - 20 cycles 5.140 ns
> 128 - 90 cycles 22.563 ns - 27 cycles 6.765 ns - 27 cycles 6.801 ns
> 158 - 99 cycles 24.831 ns - 30 cycles 7.625 ns - 30 cycles 7.720 ns
> 250 - 104 cycles 26.173 ns - 37 cycles 9.271 ns - 37 cycles 9.371 ns
>
> As can been seen the old kmem_cache_free_bulk() is faster than the new
> kfree_bulk() (which omits the kmem_cache pointer and need to derive it
> from the page->slab_cache). The base (bulk=1) extra cost is 4 cycles,
> which then gets amortized as build_detached_freelist() combines objects
> belonging to same page.
>
> This is likely because the compiler, with disabled CONFIG_MEMCG_KMEM=n,
> can optimize and avoid doing the lookup of the kmem_cache structure.
>
> I'll start doing testing with CONFIG_MEMCG_KMEM enabled...
Enabling CONFIG_MEMCG_KMEM didn't change the performance, likely
because memcg_kmem_enabled() uses a static_key_false() construct.
Implemented kfree_bulk() as an inline function just calling
kmem_cache_free_bulk(NULL, size, p) run with CONFIG_MEMCG_KMEM=y (but
no "user" of memcg):
bulk- fallback - kmem_cache_free_bulk - kmem_cache_free_bulk(s=NULL)
1 - 60 cycles 15.232 ns - 52 cycles 13.124 ns - 58 cycles 14.562 ns
2 - 54 cycles 13.678 ns - 31 cycles 7.875 ns - 34 cycles 8.509 ns
3 - 52 cycles 13.112 ns - 25 cycles 6.406 ns - 28 cycles 7.045 ns
4 - 51 cycles 12.833 ns - 24 cycles 6.036 ns - 24 cycles 6.076 ns
8 - 49 cycles 12.367 ns - 21 cycles 5.404 ns - 22 cycles 5.522 ns
16 - 48 cycles 12.144 ns - 19 cycles 4.934 ns - 19 cycles 4.967 ns
30 - 50 cycles 12.542 ns - 18 cycles 4.685 ns - 18 cycles 4.687 ns
32 - 48 cycles 12.234 ns - 18 cycles 4.661 ns - 18 cycles 4.662 ns
34 - 96 cycles 24.204 ns - 24 cycles 6.068 ns - 24 cycles 6.067 ns
48 - 82 cycles 20.553 ns - 21 cycles 5.410 ns - 21 cycles 5.433 ns
64 - 74 cycles 18.743 ns - 20 cycles 5.171 ns - 20 cycles 5.179 ns
128 - 91 cycles 22.791 ns - 26 cycles 6.601 ns - 26 cycles 6.600 ns
158 - 100 cycles 25.191 ns - 30 cycles 7.569 ns - 30 cycles 7.617 ns
250 - 106 cycles 26.535 ns - 37 cycles 9.426 ns - 37 cycles 9.427 ns
As can be seen, it is still more costly to get the kmem_cache pointer
via the object->slab_cache, even when compiled with CONFIG_MEMCG_KMEM.
But what happen when enabling a "user" of kmemcg, which modify the
static_key_false() in function call memcg_kmem_enabled().
First notice normal kmem fastpath cost increased: 70 cycles(tsc) 17.640 ns
bulk- fallback - kmem_cache_free_bulk - kmem_cache_free_bulk(s=NULL)
1 - 85 cycles 21.351 ns - 76 cycles 19.086 ns - 74 cycles 18.701 ns
2 - 79 cycles 19.907 ns - 43 cycles 10.848 ns - 41 cycles 10.467 ns
3 - 77 cycles 19.310 ns - 32 cycles 8.132 ns - 31 cycles 7.880 ns
4 - 76 cycles 19.017 ns - 28 cycles 7.195 ns - 28 cycles 7.050 ns
8 - 77 cycles 19.443 ns - 23 cycles 5.838 ns - 23 cycles 5.782 ns
16 - 75 cycles 18.815 ns - 20 cycles 5.174 ns - 20 cycles 5.127 ns
30 - 74 cycles 18.719 ns - 19 cycles 4.870 ns - 19 cycles 4.816 ns
32 - 75 cycles 18.917 ns - 19 cycles 4.850 ns - 19 cycles 4.800 ns
34 - 119 cycles 29.878 ns - 25 cycles 6.312 ns - 25 cycles 6.282 ns
48 - 106 cycles 26.536 ns - 21 cycles 5.471 ns - 21 cycles 5.448 ns
64 - 98 cycles 24.635 ns - 20 cycles 5.239 ns - 20 cycles 5.224 ns
128 - 114 cycles 28.586 ns - 27 cycles 6.768 ns - 26 cycles 6.736 ns
158 - 124 cycles 31.173 ns - 30 cycles 7.668 ns - 30 cycles 7.611 ns
250 - 129 cycles 32.336 ns - 37 cycles 9.460 ns - 37 cycles 9.468 ns
With kmemcg runtime-enabled, I was expecting to see a bigger win for
the case of kmem_cache_free_bulk(s=NULL).
Implemented a function kfree_bulk_export(), which inlines build_detached_freelist()
and which avoids including the code path for memcg. But result is almost the same:
bulk- fallback - kmem_cache_free_bulk - kfree_bulk_export
1 - 84 cycles 21.221 ns - 77 cycles 19.324 ns - 74 cycles 18.515 ns
2 - 79 cycles 19.963 ns - 44 cycles 11.120 ns - 41 cycles 10.329 ns
3 - 77 cycles 19.471 ns - 33 cycles 8.291 ns - 31 cycles 7.771 ns
4 - 76 cycles 19.191 ns - 28 cycles 7.100 ns - 27 cycles 6.765 ns
8 - 78 cycles 19.525 ns - 23 cycles 5.781 ns - 22 cycles 5.737 ns
16 - 75 cycles 18.922 ns - 20 cycles 5.152 ns - 20 cycles 5.092 ns
30 - 75 cycles 18.812 ns - 19 cycles 4.864 ns - 19 cycles 4.833 ns
32 - 75 cycles 18.807 ns - 19 cycles 4.852 ns - 23 cycles 5.896 ns
34 - 119 cycles 29.989 ns - 25 cycles 6.258 ns - 25 cycles 6.411 ns
48 - 106 cycles 26.677 ns - 28 cycles 7.182 ns - 22 cycles 5.651 ns
64 - 98 cycles 24.716 ns - 20 cycles 5.245 ns - 21 cycles 5.425 ns
128 - 115 cycles 28.905 ns - 26 cycles 6.748 ns - 27 cycles 6.787 ns
158 - 124 cycles 31.039 ns - 30 cycles 7.604 ns - 30 cycles 7.629 ns
250 - 129 cycles 32.372 ns - 37 cycles 9.475 ns - 37 cycles 9.325 ns
I took a closer look with perf, and it looks like the extra cost from
enabling kmemcg originates from the alloc code path. Specifically the
function call get_mem_cgroup_from_mm(9.95%) is the costly part, plus
__memcg_kmem_get_cache(4.84%) and __memcg_kmem_put_cache(1.35%)
(Added current code as a patch below signature)
- -
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
Author of http://www.iptv-analyzer.org
LinkedIn: http://www.linkedin.com/in/brouer
[PATCH] New API kfree_bulk()
---
include/linux/slab.h | 14 +++++++++++++
mm/slab_common.c | 8 ++++++--
mm/slub.c | 52 +++++++++++++++++++++++++++++++++++++++++++++-----
3 files changed, 67 insertions(+), 7 deletions(-)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 2037a861e367..9dd353215c2b 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -317,6 +317,20 @@ void kmem_cache_free(struct kmem_cache *, void *);
*/
void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
+void kfree_bulk_export(size_t, void **);
+
+static __always_inline void kfree_bulk(size_t size, void **p)
+{
+ /* Reusing call to kmem_cache_free_bulk() allow kfree_bulk to
+ * use same code icache
+ */
+ kmem_cache_free_bulk(NULL, size, p);
+ /*
+ * Inlining in kfree_bulk_export() generate smaller code size
+ * than more generic kmem_cache_free_bulk().
+ */
+ // kfree_bulk_export(size, p);
+}
#ifdef CONFIG_NUMA
void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 3c6a86b4ec25..963c25589949 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -108,8 +108,12 @@ void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
{
size_t i;
- for (i = 0; i < nr; i++)
- kmem_cache_free(s, p[i]);
+ for (i = 0; i < nr; i++) {
+ if (s)
+ kmem_cache_free(s, p[i]);
+ else
+ kfree(p[i]);
+ }
}
int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
diff --git a/mm/slub.c b/mm/slub.c
index 4e4dfc4cdd0c..1675f42f17b5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2833,29 +2833,46 @@ struct detached_freelist {
* synchronization primitive. Look ahead in the array is limited due
* to performance reasons.
*/
-static int build_detached_freelist(struct kmem_cache **s, size_t size,
- void **p, struct detached_freelist *df)
+static inline
+int build_detached_freelist(struct kmem_cache **s, size_t size,
+ void **p, struct detached_freelist *df)
{
size_t first_skipped_index = 0;
int lookahead = 3;
void *object;
+ struct page *page;
/* Always re-init detached_freelist */
df->page = NULL;
do {
object = p[--size];
+ /* Do we need !ZERO_OR_NULL_PTR(object) here? (for kfree) */
} while (!object && size);
if (!object)
return 0;
- /* Support for memcg, compiler can optimize this out */
- *s = cache_from_obj(*s, object);
+ page = virt_to_head_page(object);
+ if (!*s) {
+ /* Handle kalloc'ed objects */
+ if (unlikely(!PageSlab(page))) {
+ BUG_ON(!PageCompound(page));
+ kfree_hook(object);
+ __free_kmem_pages(page, compound_order(page));
+ p[size] = NULL; /* mark object processed */
+ return size;
+ }
+ /* Derive kmem_cache from object */
+ *s = page->slab_cache;
+ } else {
+ /* Support for memcg, compiler can optimize this out */
+ *s = cache_from_obj(*s, object);
+ }
/* Start new detached freelist */
+ df->page = page;
set_freepointer(*s, object, NULL);
- df->page = virt_to_head_page(object);
df->tail = object;
df->freelist = object;
p[size] = NULL; /* mark object processed */
@@ -2906,6 +2923,31 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
+/* Note that interrupts must be enabled when calling this function.
+ * This function can also handle kmem_cache allocated object arrays
+ */
+// Will likely remove this function
+void kfree_bulk_export(size_t size, void **p)
+{
+ if (WARN_ON(!size))
+ return;
+
+ do {
+ struct detached_freelist df;
+ /* NULL here removes code in inlined build_detached_freelist */
+ struct kmem_cache *s = NULL;
+
+ size = build_detached_freelist(&s, size, p, &df);
+ if (unlikely(!df.page))
+ continue;
+
+ slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
+// slab_free(df.page->slab_cache, df.page, df.freelist, df.tail,
+// df.cnt, _RET_IP_);
+ } while (likely(size));
+}
+EXPORT_SYMBOL(kfree_bulk_export);
+
/* Note that interrupts must be enabled when calling this function. */
int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 41+ messages in thread
* [RFC PATCH V2 9/9] slab: annotate code to generate more compact asm code
2015-12-08 16:18 ` [RFC PATCH V2 0/9] slab: cleanup and bulk API for SLAB Jesper Dangaard Brouer
` (7 preceding siblings ...)
2015-12-08 16:19 ` [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator Jesper Dangaard Brouer
@ 2015-12-08 16:19 ` Jesper Dangaard Brouer
8 siblings, 0 replies; 41+ messages in thread
From: Jesper Dangaard Brouer @ 2015-12-08 16:19 UTC (permalink / raw)
To: linux-mm
Cc: Christoph Lameter, Vladimir Davydov, Jesper Dangaard Brouer,
Joonsoo Kim, Linus Torvalds, Andrew Morton
Premature optimizations for CONFIG_NUMA case...
---
mm/slab.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/slab.c b/mm/slab.c
index 72c7958b4075..6d79fc5668c4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3185,7 +3185,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
{
void *objp;
- if (current->mempolicy || cpuset_do_slab_mem_spread()) {
+ if (unlikely(current->mempolicy || cpuset_do_slab_mem_spread())) {
objp = alternate_node_alloc(cache, flags);
if (objp)
goto out;
@@ -3196,7 +3196,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
* We may just have run out of memory on the local node.
* ____cache_alloc_node() knows how to locate memory on other nodes
*/
- if (!objp)
+ if (unlikely(!objp))
objp = ____cache_alloc_node(cache, flags, numa_mem_id());
out:
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 41+ messages in thread