All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Dumazet <eric.dumazet@gmail.com>
To: Christoph Lameter <cl@linux.com>
Cc: Mel Gorman <mgorman@suse.de>, Pekka Enberg <penberg@kernel.org>,
	Konstantin Khlebnikov <khlebnikov@openvz.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Matt Mackall <mpm@selenic.com>
Subject: Re: [PATCH] mm-slab: allocate kmem_cache with __GFP_REPEAT
Date: Wed, 20 Jul 2011 19:04:23 +0200	[thread overview]
Message-ID: <1311181463.2338.72.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC> (raw)
In-Reply-To: <1311179465.2338.62.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

Le mercredi 20 juillet 2011 à 18:31 +0200, Eric Dumazet a écrit :

> In fact resulting code is smaller, because most fields are now with <
> 127 offset (x86 assembly code can use shorter instructions)
> 
> Before patch :
> # size mm/slab.o
>    text	   data	    bss	    dec	    hex	filename
>   22605	 361665	     32	 384302	  5dd2e	mm/slab.o
> 
> After patch :
> # size mm/slab.o
>    text	   data	    bss	    dec	    hex	filename
>   22347	 328929	  32800	 384076	  5dc4c	mm/slab.o
> 
> > The per node pointers are lower priority in terms of performance than the
> > per cpu pointers. I'd rather have the per node pointers requiring an
> > additional lookup. Less impact on hot code paths.
> > 
> 
> Sure. I'll post a V2 to have CPU array before NODE array.
> 

Here it is. My dev machine still run after booting with this patch.
No performance impact seen yet.

Thanks

[PATCH v2] slab: shrinks sizeof(struct kmem_cache)

Reduce high order allocations for some setups.
(NR_CPUS=4096 -> we need 64KB per kmem_cache struct)

We now allocate exact needed size (using nr_cpu_ids and nr_node_ids)

This also makes code a bit smaller on x86_64, since some field offsets
are less than the 127 limit :

Before patch :
# size mm/slab.o
   text    data     bss     dec     hex filename
  22605  361665      32  384302   5dd2e mm/slab.o

After patch :
# size mm/slab.o
   text    data     bss     dec     hex filename
  22349	 353473	   8224	 384046	  5dc2e	mm/slab.o

Reported-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Pekka Enberg <penberg@kernel.org>
CC: Christoph Lameter <cl@linux.com>
CC: Andrew Morton <akpm@linux-foundation.org>
---
v2: move the CPU array before NODE array.

 include/linux/slab_def.h |   26 +++++++++++++-------------
 mm/slab.c                |   10 ++++++----
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 83203ae..6e51c4d 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -50,21 +50,19 @@
  */
 
 struct kmem_cache {
-/* 1) per-cpu data, touched during every alloc/free */
-	struct array_cache *array[NR_CPUS];
-/* 2) Cache tunables. Protected by cache_chain_mutex */
+/* 1) Cache tunables. Protected by cache_chain_mutex */
 	unsigned int batchcount;
 	unsigned int limit;
 	unsigned int shared;
 
 	unsigned int buffer_size;
 	u32 reciprocal_buffer_size;
-/* 3) touched by every alloc & free from the backend */
+/* 2) touched by every alloc & free from the backend */
 
 	unsigned int flags;		/* constant flags */
 	unsigned int num;		/* # of objs per slab */
 
-/* 4) cache_grow/shrink */
+/* 3) cache_grow/shrink */
 	/* order of pgs per slab (2^n) */
 	unsigned int gfporder;
 
@@ -80,11 +78,11 @@ struct kmem_cache {
 	/* constructor func */
 	void (*ctor)(void *obj);
 
-/* 5) cache creation/removal */
+/* 4) cache creation/removal */
 	const char *name;
 	struct list_head next;
 
-/* 6) statistics */
+/* 5) statistics */
 #ifdef CONFIG_DEBUG_SLAB
 	unsigned long num_active;
 	unsigned long num_allocations;
@@ -111,16 +109,18 @@ struct kmem_cache {
 	int obj_size;
 #endif /* CONFIG_DEBUG_SLAB */
 
+/* 6) per-cpu/per-node data, touched during every alloc/free */
 	/*
-	 * We put nodelists[] at the end of kmem_cache, because we want to size
-	 * this array to nr_node_ids slots instead of MAX_NUMNODES
+	 * We put array[] at the end of kmem_cache, because we want to size
+	 * this array to nr_cpu_ids slots instead of NR_CPUS
 	 * (see kmem_cache_init())
-	 * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
-	 * is statically defined, so we reserve the max number of nodes.
+	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
+	 * is statically defined, so we reserve the max number of cpus.
 	 */
-	struct kmem_list3 *nodelists[MAX_NUMNODES];
+	struct kmem_list3 **nodelists;
+	struct array_cache *array[NR_CPUS];
 	/*
-	 * Do not add fields after nodelists[]
+	 * Do not add fields after array[]
 	 */
 };
 
diff --git a/mm/slab.c b/mm/slab.c
index d96e223..30181fb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -574,7 +574,9 @@ static struct arraycache_init initarray_generic =
     { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 
 /* internal cache of cache description objs */
+static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES];
 static struct kmem_cache cache_cache = {
+	.nodelists = cache_cache_nodelists,
 	.batchcount = 1,
 	.limit = BOOT_CPUCACHE_ENTRIES,
 	.shared = 1,
@@ -1492,11 +1494,10 @@ void __init kmem_cache_init(void)
 	cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
 
 	/*
-	 * struct kmem_cache size depends on nr_node_ids, which
-	 * can be less than MAX_NUMNODES.
+	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
 	 */
-	cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
-				 nr_node_ids * sizeof(struct kmem_list3 *);
+	cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+				  nr_node_ids * sizeof(struct kmem_list3 *);
 #if DEBUG
 	cache_cache.obj_size = cache_cache.buffer_size;
 #endif
@@ -2308,6 +2309,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 	if (!cachep)
 		goto oops;
 
+	cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
 #if DEBUG
 	cachep->obj_size = size;
 



WARNING: multiple messages have this Message-ID (diff)
From: Eric Dumazet <eric.dumazet@gmail.com>
To: Christoph Lameter <cl@linux.com>
Cc: Mel Gorman <mgorman@suse.de>, Pekka Enberg <penberg@kernel.org>,
	Konstantin Khlebnikov <khlebnikov@openvz.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Matt Mackall <mpm@selenic.com>
Subject: Re: [PATCH] mm-slab: allocate kmem_cache with __GFP_REPEAT
Date: Wed, 20 Jul 2011 19:04:23 +0200	[thread overview]
Message-ID: <1311181463.2338.72.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC> (raw)
In-Reply-To: <1311179465.2338.62.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

Le mercredi 20 juillet 2011 A  18:31 +0200, Eric Dumazet a A(C)crit :

> In fact resulting code is smaller, because most fields are now with <
> 127 offset (x86 assembly code can use shorter instructions)
> 
> Before patch :
> # size mm/slab.o
>    text	   data	    bss	    dec	    hex	filename
>   22605	 361665	     32	 384302	  5dd2e	mm/slab.o
> 
> After patch :
> # size mm/slab.o
>    text	   data	    bss	    dec	    hex	filename
>   22347	 328929	  32800	 384076	  5dc4c	mm/slab.o
> 
> > The per node pointers are lower priority in terms of performance than the
> > per cpu pointers. I'd rather have the per node pointers requiring an
> > additional lookup. Less impact on hot code paths.
> > 
> 
> Sure. I'll post a V2 to have CPU array before NODE array.
> 

Here it is. My dev machine still run after booting with this patch.
No performance impact seen yet.

Thanks

[PATCH v2] slab: shrinks sizeof(struct kmem_cache)

Reduce high order allocations for some setups.
(NR_CPUS=4096 -> we need 64KB per kmem_cache struct)

We now allocate exact needed size (using nr_cpu_ids and nr_node_ids)

This also makes code a bit smaller on x86_64, since some field offsets
are less than the 127 limit :

Before patch :
# size mm/slab.o
   text    data     bss     dec     hex filename
  22605  361665      32  384302   5dd2e mm/slab.o

After patch :
# size mm/slab.o
   text    data     bss     dec     hex filename
  22349	 353473	   8224	 384046	  5dc2e	mm/slab.o

Reported-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Pekka Enberg <penberg@kernel.org>
CC: Christoph Lameter <cl@linux.com>
CC: Andrew Morton <akpm@linux-foundation.org>
---
v2: move the CPU array before NODE array.

 include/linux/slab_def.h |   26 +++++++++++++-------------
 mm/slab.c                |   10 ++++++----
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 83203ae..6e51c4d 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -50,21 +50,19 @@
  */
 
 struct kmem_cache {
-/* 1) per-cpu data, touched during every alloc/free */
-	struct array_cache *array[NR_CPUS];
-/* 2) Cache tunables. Protected by cache_chain_mutex */
+/* 1) Cache tunables. Protected by cache_chain_mutex */
 	unsigned int batchcount;
 	unsigned int limit;
 	unsigned int shared;
 
 	unsigned int buffer_size;
 	u32 reciprocal_buffer_size;
-/* 3) touched by every alloc & free from the backend */
+/* 2) touched by every alloc & free from the backend */
 
 	unsigned int flags;		/* constant flags */
 	unsigned int num;		/* # of objs per slab */
 
-/* 4) cache_grow/shrink */
+/* 3) cache_grow/shrink */
 	/* order of pgs per slab (2^n) */
 	unsigned int gfporder;
 
@@ -80,11 +78,11 @@ struct kmem_cache {
 	/* constructor func */
 	void (*ctor)(void *obj);
 
-/* 5) cache creation/removal */
+/* 4) cache creation/removal */
 	const char *name;
 	struct list_head next;
 
-/* 6) statistics */
+/* 5) statistics */
 #ifdef CONFIG_DEBUG_SLAB
 	unsigned long num_active;
 	unsigned long num_allocations;
@@ -111,16 +109,18 @@ struct kmem_cache {
 	int obj_size;
 #endif /* CONFIG_DEBUG_SLAB */
 
+/* 6) per-cpu/per-node data, touched during every alloc/free */
 	/*
-	 * We put nodelists[] at the end of kmem_cache, because we want to size
-	 * this array to nr_node_ids slots instead of MAX_NUMNODES
+	 * We put array[] at the end of kmem_cache, because we want to size
+	 * this array to nr_cpu_ids slots instead of NR_CPUS
 	 * (see kmem_cache_init())
-	 * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
-	 * is statically defined, so we reserve the max number of nodes.
+	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
+	 * is statically defined, so we reserve the max number of cpus.
 	 */
-	struct kmem_list3 *nodelists[MAX_NUMNODES];
+	struct kmem_list3 **nodelists;
+	struct array_cache *array[NR_CPUS];
 	/*
-	 * Do not add fields after nodelists[]
+	 * Do not add fields after array[]
 	 */
 };
 
diff --git a/mm/slab.c b/mm/slab.c
index d96e223..30181fb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -574,7 +574,9 @@ static struct arraycache_init initarray_generic =
     { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 
 /* internal cache of cache description objs */
+static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES];
 static struct kmem_cache cache_cache = {
+	.nodelists = cache_cache_nodelists,
 	.batchcount = 1,
 	.limit = BOOT_CPUCACHE_ENTRIES,
 	.shared = 1,
@@ -1492,11 +1494,10 @@ void __init kmem_cache_init(void)
 	cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
 
 	/*
-	 * struct kmem_cache size depends on nr_node_ids, which
-	 * can be less than MAX_NUMNODES.
+	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
 	 */
-	cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
-				 nr_node_ids * sizeof(struct kmem_list3 *);
+	cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+				  nr_node_ids * sizeof(struct kmem_list3 *);
 #if DEBUG
 	cache_cache.obj_size = cache_cache.buffer_size;
 #endif
@@ -2308,6 +2309,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 	if (!cachep)
 		goto oops;
 
+	cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
 #if DEBUG
 	cachep->obj_size = size;
 


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2011-07-20 17:04 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-20 12:16 [PATCH] mm-slab: allocate kmem_cache with __GFP_REPEAT Konstantin Khlebnikov
2011-07-20 12:16 ` Konstantin Khlebnikov
2011-07-20 13:14 ` Pekka Enberg
2011-07-20 13:14   ` Pekka Enberg
2011-07-20 13:28   ` Konstantin Khlebnikov
2011-07-20 13:28     ` Konstantin Khlebnikov
2011-07-20 13:42     ` Pekka Enberg
2011-07-20 13:42       ` Pekka Enberg
2011-07-20 13:50       ` Konstantin Khlebnikov
2011-07-20 13:50         ` Konstantin Khlebnikov
2011-07-20 13:53         ` Pekka Enberg
2011-07-20 13:53           ` Pekka Enberg
2011-07-20 15:36           ` Christoph Lameter
2011-07-20 15:36             ` Christoph Lameter
2011-07-20 13:59         ` Konstantin Khlebnikov
2011-07-20 13:59           ` Konstantin Khlebnikov
2011-07-20 13:54       ` Christoph Lameter
2011-07-20 13:54         ` Christoph Lameter
2011-07-20 14:20         ` Mel Gorman
2011-07-20 14:20           ` Mel Gorman
2011-07-20 14:32           ` Konstantin Khlebnikov
2011-07-20 14:32             ` Konstantin Khlebnikov
2011-07-20 14:40             ` Eric Dumazet
2011-07-20 14:40               ` Eric Dumazet
2011-07-20 14:47               ` Konstantin Khlebnikov
2011-07-20 14:47                 ` Konstantin Khlebnikov
2011-07-20 13:43   ` Mel Gorman
2011-07-20 13:43     ` Mel Gorman
2011-07-20 13:56     ` Christoph Lameter
2011-07-20 13:56       ` Christoph Lameter
2011-07-20 14:08       ` Eric Dumazet
2011-07-20 14:08         ` Eric Dumazet
2011-07-20 14:52         ` Christoph Lameter
2011-07-20 14:52           ` Christoph Lameter
2011-07-20 15:09           ` Eric Dumazet
2011-07-20 15:09             ` Eric Dumazet
2011-07-20 15:34             ` Christoph Lameter
2011-07-20 15:34               ` Christoph Lameter
2011-07-20 15:56               ` Eric Dumazet
2011-07-20 15:56                 ` Eric Dumazet
2011-07-20 16:17                 ` Christoph Lameter
2011-07-20 16:17                   ` Christoph Lameter
2011-07-20 16:31                   ` Eric Dumazet
2011-07-20 16:31                     ` Eric Dumazet
2011-07-20 17:04                     ` Eric Dumazet [this message]
2011-07-20 17:04                       ` Eric Dumazet
2011-07-20 17:13                       ` Christoph Lameter
2011-07-20 17:13                         ` Christoph Lameter
2011-07-20 17:28                         ` Pekka Enberg
2011-07-20 17:28                           ` Pekka Enberg
2011-07-20 17:37                           ` Christoph Lameter
2011-07-20 17:37                             ` Christoph Lameter
2011-07-20 17:41                             ` Pekka Enberg
2011-07-20 17:41                               ` Pekka Enberg
2011-07-20 18:07                               ` Matt Mackall
2011-07-20 18:07                                 ` Matt Mackall
2011-07-21  7:18                                 ` Konstantin Khlebnikov
2011-07-21  7:18                                   ` Konstantin Khlebnikov
2011-07-20 19:09                               ` Mel Gorman
2011-07-20 19:09                                 ` Mel Gorman
2011-07-31 11:41             ` Konstantin Khlebnikov
2011-07-31 11:41               ` Konstantin Khlebnikov
2011-07-31 11:44               ` Pekka Enberg
2011-07-31 11:44                 ` Pekka Enberg
2011-07-21  8:43           ` Eric Dumazet
2011-07-21  8:43             ` Eric Dumazet
2011-07-21 15:27             ` Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1311181463.2338.72.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC \
    --to=eric.dumazet@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux.com \
    --cc=khlebnikov@openvz.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mpm@selenic.com \
    --cc=penberg@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.