linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 1/9] mempool - Add page allocator
       [not found] <20060125161321.647368000@localhost.localdomain>
@ 2006-01-25 19:39 ` Matthew Dobson
  2006-01-25 19:39 ` [patch 2/9] mempool - Use common mempool " Matthew Dobson
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Matthew Dobson @ 2006-01-25 19:39 UTC (permalink / raw)
  To: linux-kernel; +Cc: sri, andrea, pavel, linux-mm

plain text document attachment (critical_mempools)
Add another allocator to the common mempool code: a simple page allocator

This will be used by the next patch in the series to replace duplicate
mempool-backed page allocators in 2 places in the kernel.  It is also very
likely that there will be more users in the future.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>

 include/linux/mempool.h |    6 ++++++
 mm/mempool.c            |   17 +++++++++++++++++
 2 files changed, 23 insertions(+)

Index: linux-2.6.16-rc1+critical_mempools/mm/mempool.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/mm/mempool.c
+++ linux-2.6.16-rc1+critical_mempools/mm/mempool.c
@@ -289,3 +289,20 @@ void mempool_free_slab(void *element, vo
 	kmem_cache_free(mem, element);
 }
 EXPORT_SYMBOL(mempool_free_slab);
+
+/*
+ * A simple mempool-backed page allocator
+ */
+void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data)
+{
+	int order = (int)pool_data;
+	return alloc_pages(gfp_mask, order);
+}
+EXPORT_SYMBOL(mempool_alloc_pages);
+
+void mempool_free_pages(void *element, void *pool_data)
+{
+	int order = (int)pool_data;
+	__free_pages(element, order);
+}
+EXPORT_SYMBOL(mempool_free_pages);
Index: linux-2.6.16-rc1+critical_mempools/include/linux/mempool.h
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/include/linux/mempool.h
+++ linux-2.6.16-rc1+critical_mempools/include/linux/mempool.h
@@ -38,4 +38,10 @@ extern void mempool_free(void *element, 
 void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data);
 void mempool_free_slab(void *element, void *pool_data);
 
+/*
+ * A mempool_alloc_t and mempool_free_t for a simple page allocator
+ */
+void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data);
+void mempool_free_pages(void *element, void *pool_data);
+
 #endif /* _LINUX_MEMPOOL_H */

--

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [patch 2/9] mempool - Use common mempool page allocator
       [not found] <20060125161321.647368000@localhost.localdomain>
  2006-01-25 19:39 ` [patch 1/9] mempool - Add page allocator Matthew Dobson
@ 2006-01-25 19:39 ` Matthew Dobson
  2006-01-25 19:40 ` [patch 4/9] mempool - Update mempool page allocator user Matthew Dobson
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Matthew Dobson @ 2006-01-25 19:39 UTC (permalink / raw)
  To: linux-kernel; +Cc: sri, andrea, pavel, linux-mm

plain text document attachment (critical_mempools)
Convert two mempool users that currently use their own mempool-backed page
allocators to use the generic mempool page allocator.

Also included are 2 trivial whitespace fixes.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>

 drivers/md/dm-crypt.c |   17 ++---------------
 mm/highmem.c          |   23 ++++++++---------------
 2 files changed, 10 insertions(+), 30 deletions(-)

Index: linux-2.6.16-rc1+critical_mempools/drivers/md/dm-crypt.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/drivers/md/dm-crypt.c
+++ linux-2.6.16-rc1+critical_mempools/drivers/md/dm-crypt.c
@@ -93,19 +93,6 @@ struct crypt_config {
 
 static kmem_cache_t *_crypt_io_pool;
 
-/*
- * Mempool alloc and free functions for the page
- */
-static void *mempool_alloc_page(gfp_t gfp_mask, void *data)
-{
-	return alloc_page(gfp_mask);
-}
-
-static void mempool_free_page(void *page, void *data)
-{
-	__free_page(page);
-}
-
 
 /*
  * Different IV generation algorithms:
@@ -637,8 +624,8 @@ static int crypt_ctr(struct dm_target *t
 		goto bad3;
 	}
 
-	cc->page_pool = mempool_create(MIN_POOL_PAGES, mempool_alloc_page,
-				       mempool_free_page, NULL);
+	cc->page_pool = mempool_create(MIN_POOL_PAGES, mempool_alloc_pages,
+				       mempool_free_pages, 0);
 	if (!cc->page_pool) {
 		ti->error = PFX "Cannot allocate page mempool";
 		goto bad4;
Index: linux-2.6.16-rc1+critical_mempools/mm/highmem.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/mm/highmem.c
+++ linux-2.6.16-rc1+critical_mempools/mm/highmem.c
@@ -30,15 +30,11 @@
 
 static mempool_t *page_pool, *isa_page_pool;
 
-static void *page_pool_alloc_isa(gfp_t gfp_mask, void *data)
+static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
 {
-	return alloc_page(gfp_mask | GFP_DMA);
+	return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
 }
 
-static void page_pool_free(void *page, void *data)
-{
-	__free_page(page);
-}
 
 /*
  * Virtual_count is not a pure "count".
@@ -50,11 +46,6 @@ static void page_pool_free(void *page, v
  */
 #ifdef CONFIG_HIGHMEM
 
-static void *page_pool_alloc(gfp_t gfp_mask, void *data)
-{
-	return alloc_page(gfp_mask);
-}
-
 static int pkmap_count[LAST_PKMAP];
 static unsigned int last_pkmap_nr;
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
@@ -228,7 +219,8 @@ static __init int init_emergency_pool(vo
 	if (!i.totalhigh)
 		return 0;
 
-	page_pool = mempool_create(POOL_SIZE, page_pool_alloc, page_pool_free, NULL);
+	page_pool = mempool_create(POOL_SIZE, mempool_alloc_pages,
+				   mempool_free_pages, 0);
 	if (!page_pool)
 		BUG();
 	printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
@@ -271,7 +263,8 @@ int init_emergency_isa_pool(void)
 	if (isa_page_pool)
 		return 0;
 
-	isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc_isa, page_pool_free, NULL);
+	isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
+				       mempool_free_pages, 0);
 	if (!isa_page_pool)
 		BUG();
 
@@ -336,7 +329,7 @@ static void bounce_end_io(struct bio *bi
 	bio_put(bio);
 }
 
-static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done,int err)
+static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
 {
 	if (bio->bi_size)
 		return 1;
@@ -383,7 +376,7 @@ static int bounce_end_io_read_isa(struct
 }
 
 static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
-			mempool_t *pool)
+			       mempool_t *pool)
 {
 	struct page *page;
 	struct bio *bio = NULL;

--


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [patch 4/9] mempool - Update mempool page allocator user
       [not found] <20060125161321.647368000@localhost.localdomain>
  2006-01-25 19:39 ` [patch 1/9] mempool - Add page allocator Matthew Dobson
  2006-01-25 19:39 ` [patch 2/9] mempool - Use common mempool " Matthew Dobson
@ 2006-01-25 19:40 ` Matthew Dobson
  2006-01-25 19:40 ` [patch 5/9] mempool - Update kmalloc mempool users Matthew Dobson
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Matthew Dobson @ 2006-01-25 19:40 UTC (permalink / raw)
  To: linux-kernel; +Cc: sri, andrea, pavel, linux-mm

plain text document attachment (critical_mempools)
Fixup existing mempool users to use the new mempool API, part 1.

Fix the sole "indirect" user of mempool_alloc_pages to be aware of its new
'node_id' argument.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>

 highmem.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

Index: linux-2.6.16-rc1+critical_mempools/mm/highmem.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/mm/highmem.c
+++ linux-2.6.16-rc1+critical_mempools/mm/highmem.c
@@ -30,9 +30,9 @@
 
 static mempool_t *page_pool, *isa_page_pool;
 
-static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
+static void *mempool_alloc_pages_isa(gfp_t gfp_mask, int node_id, void *data)
 {
-	return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
+	return mempool_alloc_pages(gfp_mask | GFP_DMA, node_id, data);
 }
 


--


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [patch 5/9] mempool - Update kmalloc mempool users
       [not found] <20060125161321.647368000@localhost.localdomain>
                   ` (2 preceding siblings ...)
  2006-01-25 19:40 ` [patch 4/9] mempool - Update mempool page allocator user Matthew Dobson
@ 2006-01-25 19:40 ` Matthew Dobson
  2006-01-25 19:40 ` [patch 6/9] mempool - Update kzalloc " Matthew Dobson
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Matthew Dobson @ 2006-01-25 19:40 UTC (permalink / raw)
  To: linux-kernel; +Cc: sri, andrea, pavel, linux-mm

plain text document attachment (critical_mempools)
Fixup existing mempool users to use the new mempool API, part 2.

This patch fixes up all the trivial mempool users, all of which are basically
currently just wrappers around kmalloc().  Change these functions to 
call kmalloc_node() with their shiny new 'node_id' argument.

Possible further work:  Many of these functions could be replaced by a single
common mempool_kmalloc() function, storing the size to kmalloc in pool_data.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>

 drivers/block/pktcdvd.c      |    8 ++++----
 drivers/md/bitmap.c          |    6 +++---
 drivers/md/dm-io.c           |    4 ++--
 drivers/md/dm-raid1.c        |    4 ++--
 drivers/s390/scsi/zfcp_aux.c |    4 ++--
 drivers/scsi/lpfc/lpfc_mem.c |    4 ++--
 fs/bio.c                     |   12 ++++++------
 7 files changed, 21 insertions(+), 21 deletions(-)

Index: linux-2.6.16-rc1+critical_mempools/drivers/block/pktcdvd.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/drivers/block/pktcdvd.c
+++ linux-2.6.16-rc1+critical_mempools/drivers/block/pktcdvd.c
@@ -229,9 +229,9 @@ static int pkt_grow_pktlist(struct pktcd
 	return 1;
 }
 
-static void *pkt_rb_alloc(gfp_t gfp_mask, void *data)
+static void *pkt_rb_alloc(gfp_t gfp_mask, int nid, void *data)
 {
-	return kmalloc(sizeof(struct pkt_rb_node), gfp_mask);
+	return kmalloc_node(sizeof(struct pkt_rb_node), gfp_mask, nid);
 }
 
 static void pkt_rb_free(void *ptr, void *data)
@@ -2085,9 +2085,9 @@ static int pkt_close(struct inode *inode
 }
 

-static void *psd_pool_alloc(gfp_t gfp_mask, void *data)
+static void *psd_pool_alloc(gfp_t gfp_mask, int nid, void *data)
 {
-	return kmalloc(sizeof(struct packet_stacked_data), gfp_mask);
+	return kmalloc_node(sizeof(struct packet_stacked_data), gfp_mask, nid);
 }
 
 static void psd_pool_free(void *ptr, void *data)
Index: linux-2.6.16-rc1+critical_mempools/drivers/scsi/lpfc/lpfc_mem.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/drivers/scsi/lpfc/lpfc_mem.c
+++ linux-2.6.16-rc1+critical_mempools/drivers/scsi/lpfc/lpfc_mem.c
@@ -39,9 +39,9 @@
 #define LPFC_MEM_POOL_SIZE      64      /* max elem in non-DMA safety pool */
 
 static void *
-lpfc_pool_kmalloc(gfp_t gfp_flags, void *data)
+lpfc_pool_kmalloc(gfp_t gfp_flags, int nid, void *data)
 {
-	return kmalloc((unsigned long)data, gfp_flags);
+	return kmalloc_node((unsigned long)data, gfp_flags, nid);
 }
 
 static void
Index: linux-2.6.16-rc1+critical_mempools/drivers/md/dm-raid1.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/drivers/md/dm-raid1.c
+++ linux-2.6.16-rc1+critical_mempools/drivers/md/dm-raid1.c
@@ -122,9 +122,9 @@ static inline sector_t region_to_sector(
 /* FIXME move this */
 static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw);
 
-static void *region_alloc(gfp_t gfp_mask, void *pool_data)
+static void *region_alloc(gfp_t gfp_mask, int nid, void *pool_data)
 {
-	return kmalloc(sizeof(struct region), gfp_mask);
+	return kmalloc_node(sizeof(struct region), gfp_mask, nid);
 }
 
 static void region_free(void *element, void *pool_data)
Index: linux-2.6.16-rc1+critical_mempools/drivers/s390/scsi/zfcp_aux.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/drivers/s390/scsi/zfcp_aux.c
+++ linux-2.6.16-rc1+critical_mempools/drivers/s390/scsi/zfcp_aux.c
@@ -832,9 +832,9 @@ zfcp_unit_dequeue(struct zfcp_unit *unit
 }
 
 static void *
-zfcp_mempool_alloc(gfp_t gfp_mask, void *size)
+zfcp_mempool_alloc(gfp_t gfp_mask, int nid, void *size)
 {
-	return kmalloc((size_t) size, gfp_mask);
+	return kmalloc_node((size_t) size, gfp_mask, nid);
 }
 
 static void
Index: linux-2.6.16-rc1+critical_mempools/drivers/md/dm-io.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/drivers/md/dm-io.c
+++ linux-2.6.16-rc1+critical_mempools/drivers/md/dm-io.c
@@ -32,9 +32,9 @@ struct io {
 static unsigned _num_ios;
 static mempool_t *_io_pool;
 
-static void *alloc_io(gfp_t gfp_mask, void *pool_data)
+static void *alloc_io(gfp_t gfp_mask, int nid, void *pool_data)
 {
-	return kmalloc(sizeof(struct io), gfp_mask);
+	return kmalloc_node(sizeof(struct io), gfp_mask, nid);
 }
 
 static void free_io(void *element, void *pool_data)
Index: linux-2.6.16-rc1+critical_mempools/drivers/md/bitmap.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/drivers/md/bitmap.c
+++ linux-2.6.16-rc1+critical_mempools/drivers/md/bitmap.c
@@ -90,9 +90,9 @@ int bitmap_active(struct bitmap *bitmap)
 
 #define WRITE_POOL_SIZE 256
 /* mempool for queueing pending writes on the bitmap file */
-static void *write_pool_alloc(gfp_t gfp_flags, void *data)
+static void *write_pool_alloc(gfp_t gfp_flags, int nid, void *data)
 {
-	return kmalloc(sizeof(struct page_list), gfp_flags);
+	return kmalloc_node(sizeof(struct page_list), gfp_flags, nid);
 }
 
 static void write_pool_free(void *ptr, void *data)
@@ -1565,7 +1565,7 @@ int bitmap_create(mddev_t *mddev)
 	INIT_LIST_HEAD(&bitmap->complete_pages);
 	init_waitqueue_head(&bitmap->write_wait);
 	bitmap->write_pool = mempool_create(WRITE_POOL_SIZE, write_pool_alloc,
-				write_pool_free, NULL);
+					    write_pool_free, NULL);
 	err = -ENOMEM;
 	if (!bitmap->write_pool)
 		goto error;
Index: linux-2.6.16-rc1+critical_mempools/fs/bio.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/fs/bio.c
+++ linux-2.6.16-rc1+critical_mempools/fs/bio.c
@@ -1122,9 +1122,9 @@ struct bio_pair *bio_split(struct bio *b
 	return bp;
 }
 
-static void *bio_pair_alloc(gfp_t gfp_flags, void *data)
+static void *bio_pair_alloc(gfp_t gfp_flags, int nid, void *data)
 {
-	return kmalloc(sizeof(struct bio_pair), gfp_flags);
+	return kmalloc_node(sizeof(struct bio_pair), gfp_flags, nid);
 }
 
 static void bio_pair_free(void *bp, void *data)
@@ -1149,7 +1149,7 @@ static int biovec_create_pools(struct bi
 			pool_entries >>= 1;
 
 		*bvp = mempool_create(pool_entries, mempool_alloc_slab,
-					mempool_free_slab, bp->slab);
+				      mempool_free_slab, bp->slab);
 		if (!*bvp)
 			return -ENOMEM;
 	}
@@ -1188,7 +1188,7 @@ struct bio_set *bioset_create(int bio_po
 
 	memset(bs, 0, sizeof(*bs));
 	bs->bio_pool = mempool_create(bio_pool_size, mempool_alloc_slab,
-			mempool_free_slab, bio_slab);
+				      mempool_free_slab, bio_slab);
 
 	if (!bs->bio_pool)
 		goto bad;
@@ -1252,8 +1252,8 @@ static int __init init_bio(void)
 	if (!fs_bio_set)
 		panic("bio: can't allocate bios\n");
 
-	bio_split_pool = mempool_create(BIO_SPLIT_ENTRIES,
-				bio_pair_alloc, bio_pair_free, NULL);
+	bio_split_pool = mempool_create(BIO_SPLIT_ENTRIES, bio_pair_alloc,
+					    bio_pair_free, NULL);
 	if (!bio_split_pool)
 		panic("bio: can't create split pool\n");
 

--


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [patch 6/9] mempool - Update kzalloc mempool users
       [not found] <20060125161321.647368000@localhost.localdomain>
                   ` (3 preceding siblings ...)
  2006-01-25 19:40 ` [patch 5/9] mempool - Update kmalloc mempool users Matthew Dobson
@ 2006-01-25 19:40 ` Matthew Dobson
  2006-01-26  7:30   ` Pekka Enberg
  2006-01-25 19:40 ` [patch 8/9] slab - Add *_mempool slab variants Matthew Dobson
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 44+ messages in thread
From: Matthew Dobson @ 2006-01-25 19:40 UTC (permalink / raw)
  To: linux-kernel; +Cc: sri, andrea, pavel, linux-mm

plain text document attachment (critical_mempools)
Fixup existing mempool users to use the new mempool API, part 3.

This mempool users which are basically just wrappers around kzalloc().  To do
this we create a new function, kzalloc_node() and change all the old mempool
allocators which were calling kzalloc() to now call kzalloc_node().

A couple of trivial whitespace fixes are included.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>

 drivers/md/multipath.c |    6 ++----
 drivers/md/raid1.c     |    9 ++++-----
 drivers/md/raid10.c    |    8 ++++----
 include/linux/slab.h   |    2 ++
 mm/util.c              |   13 ++++++++++---
 5 files changed, 22 insertions(+), 16 deletions(-)

Index: linux-2.6.16-rc1+critical_mempools/mm/util.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/mm/util.c
+++ linux-2.6.16-rc1+critical_mempools/mm/util.c
@@ -3,17 +3,24 @@
 #include <linux/module.h>
 
 /**
- * kzalloc - allocate memory. The memory is set to zero.
+ * kzalloc_node - allocate memory. The memory is set to zero.
  * @size: how many bytes of memory are required.
  * @flags: the type of memory to allocate.
+ * @node_id: where (which node) to allocate the memory
  */
-void *kzalloc(size_t size, gfp_t flags)
+void *kzalloc_node(size_t size, gfp_t flags, int node_id)
 {
-	void *ret = kmalloc(size, flags);
+	void *ret = kmalloc_node(size, flags, node_id);
 	if (ret)
 		memset(ret, 0, size);
 	return ret;
 }
+EXPORT_SYMBOL(kzalloc_node);
+
+void *kzalloc(size_t size, gfp_t flags)
+{
+	return kzalloc_node(size, flags, -1);
+}
 EXPORT_SYMBOL(kzalloc);
 
 /*
Index: linux-2.6.16-rc1+critical_mempools/include/linux/slab.h
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/include/linux/slab.h
+++ linux-2.6.16-rc1+critical_mempools/include/linux/slab.h
@@ -101,6 +101,7 @@ found:
 	return __kmalloc(size, flags);
 }
 
+extern void *kzalloc_node(size_t, gfp_t, int);
 extern void *kzalloc(size_t, gfp_t);
 
 /**
@@ -151,6 +152,7 @@ void *kmem_cache_alloc(struct kmem_cache
 void kmem_cache_free(struct kmem_cache *c, void *b);
 const char *kmem_cache_name(struct kmem_cache *);
 void *kmalloc(size_t size, gfp_t flags);
+void *kzalloc_node(size_t size, gfp_t flags, int node_id);
 void *kzalloc(size_t size, gfp_t flags);
 void kfree(const void *m);
 unsigned int ksize(const void *m);
Index: linux-2.6.16-rc1+critical_mempools/drivers/md/multipath.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/drivers/md/multipath.c
+++ linux-2.6.16-rc1+critical_mempools/drivers/md/multipath.c
@@ -35,11 +35,9 @@
 #define	NR_RESERVED_BUFS	32
 

-static void *mp_pool_alloc(gfp_t gfp_flags, void *data)
+static void *mp_pool_alloc(gfp_t gfp_flags, int node_id, void *data)
 {
-	struct multipath_bh *mpb;
-	mpb = kzalloc(sizeof(*mpb), gfp_flags);
-	return mpb;
+	return kzalloc_node(sizeof(struct multipath_bh), gfp_flags, node_id);
 }
 
 static void mp_pool_free(void *mpb, void *data)
Index: linux-2.6.16-rc1+critical_mempools/drivers/md/raid10.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/drivers/md/raid10.c
+++ linux-2.6.16-rc1+critical_mempools/drivers/md/raid10.c
@@ -52,14 +52,14 @@ static void unplug_slaves(mddev_t *mddev
 static void allow_barrier(conf_t *conf);
 static void lower_barrier(conf_t *conf);
 
-static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
+static void *r10bio_pool_alloc(gfp_t gfp_flags, int node_id, void *data)
 {
 	conf_t *conf = data;
 	r10bio_t *r10_bio;
 	int size = offsetof(struct r10bio_s, devs[conf->copies]);
 
 	/* allocate a r10bio with room for raid_disks entries in the bios array */
-	r10_bio = kzalloc(size, gfp_flags);
+	r10_bio = kzalloc_node(size, gfp_flags, node_id);
 	if (!r10_bio)
 		unplug_slaves(conf->mddev);
 
@@ -93,7 +93,7 @@ static void * r10buf_pool_alloc(gfp_t gf
 	int i, j;
 	int nalloc;
 
-	r10_bio = r10bio_pool_alloc(gfp_flags, conf);
+	r10_bio = r10bio_pool_alloc(gfp_flags, -1, conf);
 	if (!r10_bio) {
 		unplug_slaves(conf->mddev);
 		return NULL;
@@ -1949,7 +1949,7 @@ static int run(mddev_t *mddev)
 	conf->stride = stride << conf->chunk_shift;
 
 	conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
-						r10bio_pool_free, conf);
+					   r10bio_pool_free, conf);
 	if (!conf->r10bio_pool) {
 		printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
 			mdname(mddev));
Index: linux-2.6.16-rc1+critical_mempools/drivers/md/raid1.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/drivers/md/raid1.c
+++ linux-2.6.16-rc1+critical_mempools/drivers/md/raid1.c
@@ -53,14 +53,14 @@ static void unplug_slaves(mddev_t *mddev
 static void allow_barrier(conf_t *conf);
 static void lower_barrier(conf_t *conf);
 
-static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
+static void *r1bio_pool_alloc(gfp_t gfp_flags, int node_id, void *data)
 {
 	struct pool_info *pi = data;
 	r1bio_t *r1_bio;
 	int size = offsetof(r1bio_t, bios[pi->raid_disks]);
 
 	/* allocate a r1bio with room for raid_disks entries in the bios array */
-	r1_bio = kzalloc(size, gfp_flags);
+	r1_bio = kzalloc_node(size, gfp_flags, node_id);
 	if (!r1_bio)
 		unplug_slaves(pi->mddev);
 
@@ -86,7 +86,7 @@ static void * r1buf_pool_alloc(gfp_t gfp
 	struct bio *bio;
 	int i, j;
 
-	r1_bio = r1bio_pool_alloc(gfp_flags, pi);
+	r1_bio = r1bio_pool_alloc(gfp_flags, -1, pi);
 	if (!r1_bio) {
 		unplug_slaves(pi->mddev);
 		return NULL;
@@ -1811,8 +1811,7 @@ static int run(mddev_t *mddev)
 	conf->poolinfo->mddev = mddev;
 	conf->poolinfo->raid_disks = mddev->raid_disks;
 	conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
-					  r1bio_pool_free,
-					  conf->poolinfo);
+					  r1bio_pool_free, conf->poolinfo);
 	if (!conf->r1bio_pool)
 		goto out_no_mem;
 

--


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [patch 8/9] slab - Add *_mempool slab variants
       [not found] <20060125161321.647368000@localhost.localdomain>
                   ` (4 preceding siblings ...)
  2006-01-25 19:40 ` [patch 6/9] mempool - Update kzalloc " Matthew Dobson
@ 2006-01-25 19:40 ` Matthew Dobson
  2006-01-26  7:41   ` Pekka Enberg
  2006-01-25 19:40 ` [patch 9/9] slab - Implement single mempool backing for slab allocator Matthew Dobson
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 44+ messages in thread
From: Matthew Dobson @ 2006-01-25 19:40 UTC (permalink / raw)
  To: linux-kernel; +Cc: sri, andrea, pavel, linux-mm

plain text document attachment (critical_mempools)
Support for using a single mempool as a critical pool for all slab allocations.

This patch adds *_mempool variants to the existing slab allocator API functions:
kmalloc_mempool(), kmalloc_node_mempool(), kmem_cache_alloc_mempool() &
kmem_cache_alloc_node_mempool().  These functions behave the same as their
non-mempool cousins, but they take an additional mempool_t argument.

This patch does not actually USE the mempool_t argument, but simply adds the
function calls.  The next patch in the series will add code to use the
mempools.  This patch should have no externally visible changes to existing
users of the slab allocator.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>

 include/linux/slab.h |   38 +++++++++++++++++++++++++++++---------
 mm/slab.c            |   39 +++++++++++++++++++++++++++++++--------
 2 files changed, 60 insertions(+), 17 deletions(-)

Index: linux-2.6.16-rc1+critical_mempools/include/linux/slab.h
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/include/linux/slab.h
+++ linux-2.6.16-rc1+critical_mempools/include/linux/slab.h
@@ -15,6 +15,7 @@ typedef struct kmem_cache kmem_cache_t;
 #include	<linux/gfp.h>
 #include	<linux/init.h>
 #include	<linux/types.h>
+#include	<linux/mempool.h>
 #include	<asm/page.h>		/* kmalloc_sizes.h needs PAGE_SIZE */
 #include	<asm/cache.h>		/* kmalloc_sizes.h needs L1_CACHE_BYTES */
 
@@ -63,6 +64,7 @@ extern kmem_cache_t *kmem_cache_create(c
 				       void (*)(void *, kmem_cache_t *, unsigned long));
 extern int kmem_cache_destroy(kmem_cache_t *);
 extern int kmem_cache_shrink(kmem_cache_t *);
+extern void *kmem_cache_alloc_mempool(kmem_cache_t *, gfp_t, mempool_t *);
 extern void *kmem_cache_alloc(kmem_cache_t *, gfp_t);
 extern void kmem_cache_free(kmem_cache_t *, void *);
 extern unsigned int kmem_cache_size(kmem_cache_t *);
@@ -76,9 +78,9 @@ struct cache_sizes {
 	kmem_cache_t	*cs_dmacachep;
 };
 extern struct cache_sizes malloc_sizes[];
-extern void *__kmalloc(size_t, gfp_t);
+extern void *__kmalloc(size_t, gfp_t, mempool_t *);
 
-static inline void *kmalloc(size_t size, gfp_t flags)
+static inline void *kmalloc_mempool(size_t size, gfp_t flags, mempool_t *pool)
 {
 	if (__builtin_constant_p(size)) {
 		int i = 0;
@@ -94,11 +96,16 @@ static inline void *kmalloc(size_t size,
 			__you_cannot_kmalloc_that_much();
 		}
 found:
-		return kmem_cache_alloc((flags & GFP_DMA) ?
+		return kmem_cache_alloc_mempool((flags & GFP_DMA) ?
 			malloc_sizes[i].cs_dmacachep :
-			malloc_sizes[i].cs_cachep, flags);
+			malloc_sizes[i].cs_cachep, flags, pool);
 	}
-	return __kmalloc(size, flags);
+	return __kmalloc(size, flags, pool);
+}
+
+static inline void *kmalloc(size_t size, gfp_t flags)
+{
+	return kmalloc_mempool(size, flags, NULL);
 }
 
 extern void *kzalloc_node(size_t, gfp_t, int);
@@ -121,14 +128,27 @@ extern void kfree(const void *);
 extern unsigned int ksize(const void *);
 
 #ifdef CONFIG_NUMA
-extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node);
-extern void *kmalloc_node(size_t size, gfp_t flags, int node);
+extern void *kmem_cache_alloc_node_mempool(kmem_cache_t *, gfp_t, int, mempool_t *);
+extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t, int);
+extern void *kmalloc_node_mempool(size_t, gfp_t, int, mempool_t *);
+extern void *kmalloc_node(size_t, gfp_t, int);
 #else
-static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node)
+static inline void *kmem_cache_alloc_node_mempool(kmem_cache_t *cachep, gfp_t flags,
+						  int node_id, mempool_t *pool)
+{
+	return kmem_cache_alloc_mempool(cachep, flags, pool);
+}
+static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags,
+					  int node_id)
 {
 	return kmem_cache_alloc(cachep, flags);
 }
-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+static inline void *kmalloc_node_mempool(size_t size, gfp_t flags, int node_id,
+					 mempool_t *pool)
+{
+	return kmalloc_mempool(size, flags, pool);
+}
+static inline void *kmalloc_node(size_t size, gfp_t flags, int node_id)
 {
 	return kmalloc(size, flags);
 }
Index: linux-2.6.16-rc1+critical_mempools/mm/slab.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/mm/slab.c
+++ linux-2.6.16-rc1+critical_mempools/mm/slab.c
@@ -2837,17 +2837,25 @@ static inline void __cache_free(kmem_cac
 }
 
 /**
- * kmem_cache_alloc - Allocate an object
+ * kmem_cache_alloc_mempool - Allocate an object
  * @cachep: The cache to allocate from.
  * @flags: See kmalloc().
+ * @pool: mempool to allocate pages from if we need to refill a slab
  *
  * Allocate an object from this cache.  The flags are only relevant
  * if the cache has no available objects.
  */
-void *kmem_cache_alloc(kmem_cache_t *cachep, gfp_t flags)
+void *kmem_cache_alloc_mempool(kmem_cache_t *cachep, gfp_t flags,
+			       mempool_t *pool)
 {
 	return __cache_alloc(cachep, flags);
 }
+EXPORT_SYMBOL(kmem_cache_alloc_mempool);
+
+void *kmem_cache_alloc(kmem_cache_t *cachep, gfp_t flags)
+{
+	return kmem_cache_alloc_mempool(cachep, flags, NULL);
+}
 EXPORT_SYMBOL(kmem_cache_alloc);
 
 /**
@@ -2894,18 +2902,20 @@ int fastcall kmem_ptr_validate(kmem_cach
 
 #ifdef CONFIG_NUMA
 /**
- * kmem_cache_alloc_node - Allocate an object on the specified node
+ * kmem_cache_alloc_node_mempool - Allocate an object on the specified node
  * @cachep: The cache to allocate from.
  * @flags: See kmalloc().
  * @nodeid: node number of the target node.
+ * @pool: mempool to allocate pages from if we need to refill a slab
  *
- * Identical to kmem_cache_alloc, except that this function is slow
+ * Identical to kmem_cache_alloc_mempool, except that this function is slow
  * and can sleep. And it will allocate memory on the given node, which
  * can improve the performance for cpu bound structures.
  * New and improved: it will now make sure that the object gets
  * put on the correct node list so that there is no false sharing.
  */
-void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
+void *kmem_cache_alloc_node_mempool(kmem_cache_t *cachep, gfp_t flags,
+				    int nodeid, mempool_t *pool)
 {
 	unsigned long save_flags;
 	void *ptr;
@@ -2934,16 +2944,29 @@ void *kmem_cache_alloc_node(kmem_cache_t
 
 	return ptr;
 }
+EXPORT_SYMBOL(kmem_cache_alloc_node_mempool);
+
+void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
+{
+	return kmem_cache_alloc_node_mempool(cachep, flags, nodeid, NULL);
+}
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 
-void *kmalloc_node(size_t size, gfp_t flags, int node)
+void *kmalloc_node_mempool(size_t size, gfp_t flags, int nodeid,
+			   mempool_t *pool)
 {
 	kmem_cache_t *cachep;
 
 	cachep = kmem_find_general_cachep(size, flags);
 	if (unlikely(cachep == NULL))
 		return NULL;
-	return kmem_cache_alloc_node(cachep, flags, node);
+	return kmem_cache_alloc_node_mempool(cachep, flags, nodeid, pool);
+}
+EXPORT_SYMBOL(kmalloc_node_mempool);
+
+void *kmalloc_node(size_t size, gfp_t flags, int nodeid)
+{
+	return kmalloc_node_mempool(size, flags, nodeid, NULL);
 }
 EXPORT_SYMBOL(kmalloc_node);
 #endif
@@ -2969,7 +2992,7 @@ EXPORT_SYMBOL(kmalloc_node);
  * platforms.  For example, on i386, it means that the memory must come
  * from the first 16MB.
  */
-void *__kmalloc(size_t size, gfp_t flags)
+void *__kmalloc(size_t size, gfp_t flags, mempool_t *pool)
 {
 	kmem_cache_t *cachep;
 

--


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [patch 9/9] slab - Implement single mempool backing for slab allocator
       [not found] <20060125161321.647368000@localhost.localdomain>
                   ` (5 preceding siblings ...)
  2006-01-25 19:40 ` [patch 8/9] slab - Add *_mempool slab variants Matthew Dobson
@ 2006-01-25 19:40 ` Matthew Dobson
  2006-01-26  8:11   ` Pekka Enberg
  2006-01-25 23:51 ` [patch 1/9] mempool - Add page allocator Matthew Dobson
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 44+ messages in thread
From: Matthew Dobson @ 2006-01-25 19:40 UTC (permalink / raw)
  To: linux-kernel; +Cc: sri, andrea, pavel, linux-mm

plain text document attachment (critical_mempools)
Support for using a single mempool as a critical pool for all slab allocations.

This patch completes the actual implementation of this functionality.  What we
do is take the mempool_t pointer, which is now passed into the slab allocator
by all the externally callable functions (thanks to the last patch), and pass
it all the way down through the slab allocator code.  If the slab allocator
needs to allocate memory to satisfy a slab request, which only happens in
kmem_getpages(), it will allocate that memory via the mempool's allocator,
rather than calling alloc_pages_node() directly.  This allows us to use a
single mempool to back ALL slab allocations for a single subsystem, rather than
having to back each & every kmem_cache_alloc/kmalloc allocation that subsystem
makes with it's own mempool.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>

 slab.c |   60 +++++++++++++++++++++++++++++++++++++++---------------------
 1 files changed, 39 insertions(+), 21 deletions(-)

Index: linux-2.6.16-rc1+critical_mempools/mm/slab.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/mm/slab.c
+++ linux-2.6.16-rc1+critical_mempools/mm/slab.c
@@ -1209,15 +1209,26 @@ __initcall(cpucache_init);
  * If we requested dmaable memory, we will get it. Even if we
  * did not request dmaable memory, we might get it, but that
  * would be relatively rare and ignorable.
+ *
+ * For now, we only support order-0 allocations with mempools.
  */
-static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid)
+static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid,
+			   mempool_t *pool)
 {
 	struct page *page;
 	void *addr;
 	int i;
 
 	flags |= cachep->gfpflags;
-	page = alloc_pages_node(nodeid, flags, cachep->gfporder);
+	/*
+	 * If this allocation request isn't backed by a memory pool, or if that
+	 * memory pool's gfporder is not the same as the cache's gfporder, fall
+	 * back to alloc_pages_node().
+	 */
+	if (!pool || cachep->gfporder != (int)pool->pool_data)
+		page = alloc_pages_node(nodeid, flags, cachep->gfporder);
+	else
+		page = mempool_alloc_node(pool, flags, nodeid);
 	if (!page)
 		return NULL;
 	addr = page_address(page);
@@ -2084,13 +2095,15 @@ EXPORT_SYMBOL(kmem_cache_destroy);
 
 /* Get the memory for a slab management obj. */
 static struct slab *alloc_slabmgmt(kmem_cache_t *cachep, void *objp,
-				   int colour_off, gfp_t local_flags)
+				   int colour_off, gfp_t local_flags,
+				   mempool_t *pool)
 {
 	struct slab *slabp;
 
 	if (OFF_SLAB(cachep)) {
 		/* Slab management obj is off-slab. */
-		slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
+		slabp = kmem_cache_alloc_mempool(cachep->slabp_cache,
+						 local_flags, pool);
 		if (!slabp)
 			return NULL;
 	} else {
@@ -2188,7 +2201,8 @@ static void set_slab_attr(kmem_cache_t *
  * Grow (by 1) the number of slabs within a cache.  This is called by
  * kmem_cache_alloc() when there are no active objs left in a cache.
  */
-static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid)
+static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid,
+		      mempool_t *pool)
 {
 	struct slab *slabp;
 	void *objp;
@@ -2242,11 +2256,11 @@ static int cache_grow(kmem_cache_t *cach
 	/* Get mem for the objs.
 	 * Attempt to allocate a physical page from 'nodeid',
 	 */
-	if (!(objp = kmem_getpages(cachep, flags, nodeid)))
+	if (!(objp = kmem_getpages(cachep, flags, nodeid, pool)))
 		goto failed;
 
 	/* Get slab management. */
-	if (!(slabp = alloc_slabmgmt(cachep, objp, offset, local_flags)))
+	if (!(slabp = alloc_slabmgmt(cachep, objp, offset, local_flags, pool)))
 		goto opps1;
 
 	slabp->nodeid = nodeid;
@@ -2406,7 +2420,8 @@ static void check_slabp(kmem_cache_t *ca
 #define check_slabp(x,y) do { } while(0)
 #endif
 
-static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags)
+static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags,
+				mempool_t *pool)
 {
 	int batchcount;
 	struct kmem_list3 *l3;
@@ -2492,7 +2507,7 @@ static void *cache_alloc_refill(kmem_cac
 
 	if (unlikely(!ac->avail)) {
 		int x;
-		x = cache_grow(cachep, flags, numa_node_id());
+		x = cache_grow(cachep, flags, numa_node_id(), pool);
 
 		// cache_grow can reenable interrupts, then ac could change.
 		ac = ac_data(cachep);
@@ -2565,7 +2580,8 @@ static void *cache_alloc_debugcheck_afte
 #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
 #endif
 
-static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags)
+static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags,
+				    mempool_t *pool)
 {
 	void *objp;
 	struct array_cache *ac;
@@ -2578,12 +2594,13 @@ static inline void *____cache_alloc(kmem
 		objp = ac->entry[--ac->avail];
 	} else {
 		STATS_INC_ALLOCMISS(cachep);
-		objp = cache_alloc_refill(cachep, flags);
+		objp = cache_alloc_refill(cachep, flags, pool);
 	}
 	return objp;
 }
 
-static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags)
+static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags,
+				  mempool_t *pool)
 {
 	unsigned long save_flags;
 	void *objp;
@@ -2591,7 +2608,7 @@ static inline void *__cache_alloc(kmem_c
 	cache_alloc_debugcheck_before(cachep, flags);
 
 	local_irq_save(save_flags);
-	objp = ____cache_alloc(cachep, flags);
+	objp = ____cache_alloc(cachep, flags, pool);
 	local_irq_restore(save_flags);
 	objp = cache_alloc_debugcheck_after(cachep, flags, objp,
 					    __builtin_return_address(0));
@@ -2603,7 +2620,8 @@ static inline void *__cache_alloc(kmem_c
 /*
  * A interface to enable slab creation on nodeid
  */
-static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
+static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid,
+				mempool_t *pool)
 {
 	struct list_head *entry;
 	struct slab *slabp;
@@ -2659,7 +2677,7 @@ static void *__cache_alloc_node(kmem_cac
 
       must_grow:
 	spin_unlock(&l3->list_lock);
-	x = cache_grow(cachep, flags, nodeid);
+	x = cache_grow(cachep, flags, nodeid, pool);
 
 	if (!x)
 		return NULL;
@@ -2848,7 +2866,7 @@ static inline void __cache_free(kmem_cac
 void *kmem_cache_alloc_mempool(kmem_cache_t *cachep, gfp_t flags,
 			       mempool_t *pool)
 {
-	return __cache_alloc(cachep, flags);
+	return __cache_alloc(cachep, flags, pool);
 }
 EXPORT_SYMBOL(kmem_cache_alloc_mempool);
 
@@ -2921,22 +2939,22 @@ void *kmem_cache_alloc_node_mempool(kmem
 	void *ptr;
 
 	if (nodeid == -1)
-		return __cache_alloc(cachep, flags);
+		return __cache_alloc(cachep, flags, pool);
 
 	if (unlikely(!cachep->nodelists[nodeid])) {
 		/* Fall back to __cache_alloc if we run into trouble */
 		printk(KERN_WARNING
 		       "slab: not allocating in inactive node %d for cache %s\n",
 		       nodeid, cachep->name);
-		return __cache_alloc(cachep, flags);
+		return __cache_alloc(cachep, flags, pool);
 	}
 
 	cache_alloc_debugcheck_before(cachep, flags);
 	local_irq_save(save_flags);
 	if (nodeid == numa_node_id())
-		ptr = ____cache_alloc(cachep, flags);
+		ptr = ____cache_alloc(cachep, flags, pool);
 	else
-		ptr = __cache_alloc_node(cachep, flags, nodeid);
+		ptr = __cache_alloc_node(cachep, flags, nodeid, pool);
 	local_irq_restore(save_flags);
 	ptr =
 	    cache_alloc_debugcheck_after(cachep, flags, ptr,
@@ -3004,7 +3022,7 @@ void *__kmalloc(size_t size, gfp_t flags
 	cachep = __find_general_cachep(size, flags);
 	if (unlikely(cachep == NULL))
 		return NULL;
-	return __cache_alloc(cachep, flags);
+	return __cache_alloc(cachep, flags, pool);
 }
 EXPORT_SYMBOL(__kmalloc);
 

--


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [patch 1/9] mempool - Add page allocator
       [not found] <20060125161321.647368000@localhost.localdomain>
                   ` (6 preceding siblings ...)
  2006-01-25 19:40 ` [patch 9/9] slab - Implement single mempool backing for slab allocator Matthew Dobson
@ 2006-01-25 23:51 ` Matthew Dobson
  2006-01-25 23:51 ` [patch 3/9] mempool - Make mempools NUMA aware Matthew Dobson
  2006-01-25 23:51 ` [patch 7/9] mempool - Update other mempool users Matthew Dobson
  9 siblings, 0 replies; 44+ messages in thread
From: Matthew Dobson @ 2006-01-25 23:51 UTC (permalink / raw)
  To: linux-kernel; +Cc: sri, andrea, pavel, linux-mm

plain text document attachment (critical_mempools)
Add another allocator to the common mempool code: a simple page allocator

This will be used by the next patch in the series to replace duplicate
mempool-backed page allocators in 2 places in the kernel.  It is also very
likely that there will be more users in the future.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>

 include/linux/mempool.h |    6 ++++++
 mm/mempool.c            |   17 +++++++++++++++++
 2 files changed, 23 insertions(+)

Index: linux-2.6.16-rc1+critical_mempools/mm/mempool.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/mm/mempool.c
+++ linux-2.6.16-rc1+critical_mempools/mm/mempool.c
@@ -289,3 +289,20 @@ void mempool_free_slab(void *element, vo
 	kmem_cache_free(mem, element);
 }
 EXPORT_SYMBOL(mempool_free_slab);
+
+/*
+ * A simple mempool-backed page allocator
+ */
+void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data)
+{
+	int order = (int)pool_data;
+	return alloc_pages(gfp_mask, order);
+}
+EXPORT_SYMBOL(mempool_alloc_pages);
+
+void mempool_free_pages(void *element, void *pool_data)
+{
+	int order = (int)pool_data;
+	__free_pages(element, order);
+}
+EXPORT_SYMBOL(mempool_free_pages);
Index: linux-2.6.16-rc1+critical_mempools/include/linux/mempool.h
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/include/linux/mempool.h
+++ linux-2.6.16-rc1+critical_mempools/include/linux/mempool.h
@@ -38,4 +38,10 @@ extern void mempool_free(void *element, 
 void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data);
 void mempool_free_slab(void *element, void *pool_data);
 
+/*
+ * A mempool_alloc_t and mempool_free_t for a simple page allocator
+ */
+void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data);
+void mempool_free_pages(void *element, void *pool_data);
+
 #endif /* _LINUX_MEMPOOL_H */

--


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [patch 3/9] mempool - Make mempools NUMA aware
       [not found] <20060125161321.647368000@localhost.localdomain>
                   ` (7 preceding siblings ...)
  2006-01-25 23:51 ` [patch 1/9] mempool - Add page allocator Matthew Dobson
@ 2006-01-25 23:51 ` Matthew Dobson
  2006-01-26 17:54   ` Christoph Lameter
  2006-01-27  0:23   ` [patch 3/9] mempool - Make mempools NUMA aware Benjamin LaHaise
  2006-01-25 23:51 ` [patch 7/9] mempool - Update other mempool users Matthew Dobson
  9 siblings, 2 replies; 44+ messages in thread
From: Matthew Dobson @ 2006-01-25 23:51 UTC (permalink / raw)
  To: linux-kernel; +Cc: sri, andrea, pavel, linux-mm

plain text document attachment (critical_mempools)
Add NUMA-awareness to the mempool code.  This involves several changes:

1) Update mempool_alloc_t to include a node_id argument.
2) Change mempool_create_node() to pass its node_id argument on to the updated
   pool->alloc() function in an attempt to allocate the memory pool elements on
   the requested node.
3) Change mempool_create() to be a static inline calling mempool_create_node().
4) Add mempool_resize_node() to the mempool API.  This function does the same
   thing as the old mempool_resize() function, but attempts to allocate both
   the internal storage array and the individual memory pool elements on the
   specified node, just like mempool_create_node() does.
5) Change mempool_resize() to be a static inline calling mempool_resize_node().
6) Add mempool_alloc_node() to the mempool API.  This function allows callers
   to request memory from a particular node.  This request is only guaranteed
   to fulfilled with memory from the specified node if the mempool was
   originally created with mempool_create_node().  If not, this is only a hint
   to the allocator, and the request may actually be fulfilled by memory from
   another node, because we don't know from which node the memory pool elements
   were allocated.
7) Change mempool_alloc() to be a static inline calling mempool_alloc_node().
8) Update the two "builtin" mempool allocators. mempool_alloc_slab &
   mempool_alloc_pages, to use the new mempool_alloc_t by adding a nid argument
   and changing them to call kmem_cache_alloc_node() & alloc_pages_node(),
   respectively.
9) Cleanup some minor whitespace and comments.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>

 include/linux/mempool.h |   31 ++++++++++++++++++-------
 mm/mempool.c            |   58 ++++++++++++++++++++++++++----------------------
 2 files changed, 54 insertions(+), 35 deletions(-)

Index: linux-2.6.16-rc1+critical_mempools/mm/mempool.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/mm/mempool.c
+++ linux-2.6.16-rc1+critical_mempools/mm/mempool.c
@@ -38,12 +38,14 @@ static void free_pool(mempool_t *pool)
 }
 
 /**
- * mempool_create - create a memory pool
+ * mempool_create_node - create a memory pool
  * @min_nr:    the minimum number of elements guaranteed to be
  *             allocated for this pool.
  * @alloc_fn:  user-defined element-allocation function.
  * @free_fn:   user-defined element-freeing function.
  * @pool_data: optional private data available to the user-defined functions.
+ * @node_id:   node to allocate this memory pool's control structure, storage
+ *             array and all of its reserved elements on.
  *
  * this function creates and allocates a guaranteed size, preallocated
  * memory pool. The pool can be used from the mempool_alloc and mempool_free
@@ -51,15 +53,9 @@ static void free_pool(mempool_t *pool)
  * functions might sleep - as long as the mempool_alloc function is not called
  * from IRQ contexts.
  */
-mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
-				mempool_free_t *free_fn, void *pool_data)
-{
-	return  mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,-1);
-}
-EXPORT_SYMBOL(mempool_create);
-
 mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
-			mempool_free_t *free_fn, void *pool_data, int node_id)
+			       mempool_free_t *free_fn, void *pool_data,
+			       int node_id)
 {
 	mempool_t *pool;
 	pool = kmalloc_node(sizeof(*pool), GFP_KERNEL, node_id);
@@ -85,7 +81,7 @@ mempool_t *mempool_create_node(int min_n
 	while (pool->curr_nr < pool->min_nr) {
 		void *element;
 
-		element = pool->alloc(GFP_KERNEL, pool->pool_data);
+		element = pool->alloc(GFP_KERNEL, node_id, pool->pool_data);
 		if (unlikely(!element)) {
 			free_pool(pool);
 			return NULL;
@@ -97,12 +93,14 @@ mempool_t *mempool_create_node(int min_n
 EXPORT_SYMBOL(mempool_create_node);
 
 /**
- * mempool_resize - resize an existing memory pool
+ * mempool_resize_node - resize an existing memory pool
  * @pool:       pointer to the memory pool which was allocated via
  *              mempool_create().
  * @new_min_nr: the new minimum number of elements guaranteed to be
  *              allocated for this pool.
  * @gfp_mask:   the usual allocation bitmask.
+ * @node_id:    node to allocate this memory pool's new storage array
+ *              and all of its reserved elements on.
  *
  * This function shrinks/grows the pool. In the case of growing,
  * it cannot be guaranteed that the pool will be grown to the new
@@ -112,7 +110,8 @@ EXPORT_SYMBOL(mempool_create_node);
  * while this function is running. mempool_alloc() & mempool_free()
  * might be called (eg. from IRQ contexts) while this function executes.
  */
-int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask)
+int mempool_resize_node(mempool_t *pool, int new_min_nr, gfp_t gfp_mask,
+			int node_id)
 {
 	void *element;
 	void **new_elements;
@@ -134,7 +133,8 @@ int mempool_resize(mempool_t *pool, int 
 	spin_unlock_irqrestore(&pool->lock, flags);
 
 	/* Grow the pool */
-	new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask);
+	new_elements = kmalloc_node(new_min_nr * sizeof(*new_elements),
+				    gfp_mask, node_id);
 	if (!new_elements)
 		return -ENOMEM;
 
@@ -153,7 +153,7 @@ int mempool_resize(mempool_t *pool, int 
 
 	while (pool->curr_nr < pool->min_nr) {
 		spin_unlock_irqrestore(&pool->lock, flags);
-		element = pool->alloc(gfp_mask, pool->pool_data);
+		element = pool->alloc(gfp_mask, node_id, pool->pool_data);
 		if (!element)
 			goto out;
 		spin_lock_irqsave(&pool->lock, flags);
@@ -170,14 +170,14 @@ out_unlock:
 out:
 	return 0;
 }
-EXPORT_SYMBOL(mempool_resize);
+EXPORT_SYMBOL(mempool_resize_node);
 
 /**
  * mempool_destroy - deallocate a memory pool
  * @pool:      pointer to the memory pool which was allocated via
  *             mempool_create().
  *
- * this function only sleeps if the free_fn() function sleeps. The caller
+ * this function only sleeps if the ->free() function sleeps. The caller
  * has to guarantee that all elements have been returned to the pool (ie:
  * freed) prior to calling mempool_destroy().
  */
@@ -190,17 +190,22 @@ void mempool_destroy(mempool_t *pool)
 EXPORT_SYMBOL(mempool_destroy);
 
 /**
- * mempool_alloc - allocate an element from a specific memory pool
+ * mempool_alloc_node - allocate an element from a specific memory pool
  * @pool:      pointer to the memory pool which was allocated via
  *             mempool_create().
  * @gfp_mask:  the usual allocation bitmask.
+ * @node_id:   node to _attempt_ to allocate from.  This request is only
+ *             guaranteed to fulfilled with memory from the specified node if
+ *             the mempool was originally created with mempool_create_node().
+ *             If not, this is only a hint to the allocator, and the request
+ *             may actually be fulfilled by memory from another node.
  *
- * this function only sleeps if the alloc_fn function sleeps or
+ * this function only sleeps if the ->alloc() function sleeps or
  * returns NULL. Note that due to preallocation, this function
  * *never* fails when called from process contexts. (it might
  * fail if called from an IRQ context.)
  */
-void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
+void *mempool_alloc_node(mempool_t *pool, gfp_t gfp_mask, int node_id)
 {
 	void *element;
 	unsigned long flags;
@@ -217,7 +222,7 @@ void * mempool_alloc(mempool_t *pool, gf
 
 repeat_alloc:
 
-	element = pool->alloc(gfp_temp, pool->pool_data);
+	element = pool->alloc(gfp_temp, node_id, pool->pool_data);
 	if (likely(element != NULL))
 		return element;
 
@@ -244,7 +249,7 @@ repeat_alloc:
 
 	goto repeat_alloc;
 }
-EXPORT_SYMBOL(mempool_alloc);
+EXPORT_SYMBOL(mempool_alloc_node);
 
 /**
  * mempool_free - return an element to the pool.
@@ -252,7 +257,7 @@ EXPORT_SYMBOL(mempool_alloc);
  * @pool:      pointer to the memory pool which was allocated via
  *             mempool_create().
  *
- * this function only sleeps if the free_fn() function sleeps.
+ * this function only sleeps if the ->free() function sleeps.
  */
 void mempool_free(void *element, mempool_t *pool)
 {
@@ -276,10 +281,10 @@ EXPORT_SYMBOL(mempool_free);
 /*
  * A commonly used alloc and free fn.
  */
-void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
+void *mempool_alloc_slab(gfp_t gfp_mask, int node_id, void *pool_data)
 {
 	kmem_cache_t *mem = (kmem_cache_t *) pool_data;
-	return kmem_cache_alloc(mem, gfp_mask);
+	return kmem_cache_alloc_node(mem, gfp_mask, node_id);
 }
 EXPORT_SYMBOL(mempool_alloc_slab);
 
@@ -293,10 +298,11 @@ EXPORT_SYMBOL(mempool_free_slab);
 /*
  * A simple mempool-backed page allocator
  */
-void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data)
+void *mempool_alloc_pages(gfp_t gfp_mask, int node_id, void *pool_data)
 {
 	int order = (int)pool_data;
-	return alloc_pages(gfp_mask, order);
+	return alloc_pages_node(node_id >= 0 ? node_id : numa_node_id(),
+				gfp_mask, order);
 }
 EXPORT_SYMBOL(mempool_alloc_pages);
 
Index: linux-2.6.16-rc1+critical_mempools/include/linux/mempool.h
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/include/linux/mempool.h
+++ linux-2.6.16-rc1+critical_mempools/include/linux/mempool.h
@@ -6,7 +6,7 @@
 
 #include <linux/wait.h>
 
-typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data);
+typedef void * (mempool_alloc_t)(gfp_t gfp_mask, int node_id, void *pool_data);
 typedef void (mempool_free_t)(void *element, void *pool_data);
 
 typedef struct mempool_s {
@@ -21,27 +21,40 @@ typedef struct mempool_s {
 	wait_queue_head_t wait;
 } mempool_t;
 
-extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
-			mempool_free_t *free_fn, void *pool_data);
 extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
-			mempool_free_t *free_fn, void *pool_data, int nid);
-
-extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask);
+			mempool_free_t *free_fn, void *pool_data, int node_id);
+static inline mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
+			mempool_free_t *free_fn, void *pool_data)
+{
+	return mempool_create_node(min_nr, alloc_fn, free_fn, pool_data, -1);
+}
 extern void mempool_destroy(mempool_t *pool);
-extern void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask);
+
+extern int mempool_resize_node(mempool_t *pool, int new_min_nr, gfp_t gfp_mask,
+			       int node_id);
+static inline int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask)
+{
+	return mempool_resize_node(pool, new_min_nr, gfp_mask, -1);
+}
+
+extern void *mempool_alloc_node(mempool_t *pool, gfp_t gfp_mask, int node_id);
+static inline void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
+{
+	return mempool_alloc_node(pool, gfp_mask, -1);
+}
 extern void mempool_free(void *element, mempool_t *pool);
 
 /*
  * A mempool_alloc_t and mempool_free_t that get the memory from
  * a slab that is passed in through pool_data.
  */
-void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data);
+void *mempool_alloc_slab(gfp_t gfp_mask, int node_id, void *pool_data);
 void mempool_free_slab(void *element, void *pool_data);
 
 /*
  * A mempool_alloc_t and mempool_free_t for a simple page allocator
  */
-void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data);
+void *mempool_alloc_pages(gfp_t gfp_mask, int node_id, void *pool_data);
 void mempool_free_pages(void *element, void *pool_data);
 
 #endif /* _LINUX_MEMPOOL_H */

--


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [patch 7/9] mempool - Update other mempool users
       [not found] <20060125161321.647368000@localhost.localdomain>
                   ` (8 preceding siblings ...)
  2006-01-25 23:51 ` [patch 3/9] mempool - Make mempools NUMA aware Matthew Dobson
@ 2006-01-25 23:51 ` Matthew Dobson
  9 siblings, 0 replies; 44+ messages in thread
From: Matthew Dobson @ 2006-01-25 23:51 UTC (permalink / raw)
  To: linux-kernel; +Cc: sri, andrea, pavel, linux-mm

plain text document attachment (critical_mempools)
Fixup existing mempool users to use the new mempool API, part 4.

This patch papers over the three remaining mempool users that are non-trivial
to change over to the new API.  We simply add an UNUSED_NID argument to their
mempool_alloc functions to match the new API.  None of these mempools appear to
support NUMA in any way, so this should not cause any problems.

If anyone familiar with the code involved would like to send proper fixes,
that would be greatly appreciated.

Signed-off-by: Matthew Dobson <colpatch@us.ibm.com>

 md/raid1.c                  |    8 ++++----
 md/raid10.c                 |    7 ++++---
 scsi/scsi_transport_iscsi.c |    4 ++--
 3 files changed, 10 insertions(+), 9 deletions(-)

Index: linux-2.6.16-rc1+critical_mempools/drivers/md/raid10.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/drivers/md/raid10.c
+++ linux-2.6.16-rc1+critical_mempools/drivers/md/raid10.c
@@ -84,7 +84,7 @@ static void r10bio_pool_free(void *r10_b
  * one for write (we recover only one drive per r10buf)
  *
  */
-static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
+static void *r10buf_pool_alloc(gfp_t gfp_flags, int UNUSED_NID, void *data)
 {
 	conf_t *conf = data;
 	struct page *page;
@@ -93,7 +93,7 @@ static void * r10buf_pool_alloc(gfp_t gf
 	int i, j;
 	int nalloc;
 
-	r10_bio = r10bio_pool_alloc(gfp_flags, -1, conf);
+	r10_bio = r10bio_pool_alloc(gfp_flags, UNUSED_NID, conf);
 	if (!r10_bio) {
 		unplug_slaves(conf->mddev);
 		return NULL;
@@ -1518,7 +1518,8 @@ static int init_resync(conf_t *conf)
 	buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
 	if (conf->r10buf_pool)
 		BUG();
-	conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf);
+	conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc,
+					   r10buf_pool_free, conf);
 	if (!conf->r10buf_pool)
 		return -ENOMEM;
 	conf->next_resync = 0;
Index: linux-2.6.16-rc1+critical_mempools/drivers/md/raid1.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/drivers/md/raid1.c
+++ linux-2.6.16-rc1+critical_mempools/drivers/md/raid1.c
@@ -78,7 +78,7 @@ static void r1bio_pool_free(void *r1_bio
 #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
 #define RESYNC_WINDOW (2048*1024)
 
-static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
+static void *r1buf_pool_alloc(gfp_t gfp_flags, int UNUSED_NID, void *data)
 {
 	struct pool_info *pi = data;
 	struct page *page;
@@ -86,7 +86,7 @@ static void * r1buf_pool_alloc(gfp_t gfp
 	struct bio *bio;
 	int i, j;
 
-	r1_bio = r1bio_pool_alloc(gfp_flags, -1, pi);
+	r1_bio = r1bio_pool_alloc(gfp_flags, UNUSED_NID, pi);
 	if (!r1_bio) {
 		unplug_slaves(pi->mddev);
 		return NULL;
@@ -1543,8 +1543,8 @@ static int init_resync(conf_t *conf)
 	buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
 	if (conf->r1buf_pool)
 		BUG();
-	conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free,
-					  conf->poolinfo);
+	conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc,
+					  r1buf_pool_free, conf->poolinfo);
 	if (!conf->r1buf_pool)
 		return -ENOMEM;
 	conf->next_resync = 0;
Index: linux-2.6.16-rc1+critical_mempools/drivers/scsi/scsi_transport_iscsi.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/drivers/scsi/scsi_transport_iscsi.c
+++ linux-2.6.16-rc1+critical_mempools/drivers/scsi/scsi_transport_iscsi.c
@@ -462,8 +462,8 @@ static inline struct list_head *skb_to_l
 	return (struct list_head *)&skb->cb;
 }
 
-static void*
-mempool_zone_alloc_skb(unsigned int gfp_mask, void *pool_data)
+static void *
+mempool_zone_alloc_skb(unsigned int gfp_mask, int UNUSED_NID, void *pool_data)
 {
 	struct mempool_zone *zone = pool_data;
 

--


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 6/9] mempool - Update kzalloc mempool users
  2006-01-25 19:40 ` [patch 6/9] mempool - Update kzalloc " Matthew Dobson
@ 2006-01-26  7:30   ` Pekka Enberg
  2006-01-26 22:03     ` Matthew Dobson
  0 siblings, 1 reply; 44+ messages in thread
From: Pekka Enberg @ 2006-01-26  7:30 UTC (permalink / raw)
  To: colpatch; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

Hi,

On 1/25/06, Matthew Dobson <colpatch@us.ibm.com> wrote:
> plain text document attachment (critical_mempools)
> Fixup existing mempool users to use the new mempool API, part 3.
>
> This mempool users which are basically just wrappers around kzalloc().  To do
> this we create a new function, kzalloc_node() and change all the old mempool
> allocators which were calling kzalloc() to now call kzalloc_node().

The slab bits look good to me. You might have some rediffing to do
because -mm has quite a bit of slab patches in it.

Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>

                               Pekka

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 8/9] slab - Add *_mempool slab variants
  2006-01-25 19:40 ` [patch 8/9] slab - Add *_mempool slab variants Matthew Dobson
@ 2006-01-26  7:41   ` Pekka Enberg
  2006-01-26 22:40     ` Matthew Dobson
  0 siblings, 1 reply; 44+ messages in thread
From: Pekka Enberg @ 2006-01-26  7:41 UTC (permalink / raw)
  To: colpatch; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

Hi Matthew,

On 1/25/06, Matthew Dobson <colpatch@us.ibm.com> wrote:
> +extern void *__kmalloc(size_t, gfp_t, mempool_t *);

If you really need to do this, please ntoe that you're adding an extra
parameter push for the nominal case where mempool is not required. The
compiler is unable to optimize it away. It's better that you create a
new entry point for the mempool case in mm/slab.c rather than
overloading __kmalloc() et al. See the following patch that does that
sort of thing:

http://www.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.16-rc1/2.6.16-rc1-mm3/broken-out/slab-fix-kzalloc-and-kstrdup-caller-report-for-config_debug_slab.patch

Now as for the rest of the patch, are you sure you want to reserve
whole pages for each critical allocation that cannot be satisfied by
the slab allocator? Wouldn't it be better to use something like the
slob allocator to allocate from the mempool pages? That way you
wouldn't have to make the slab allocator mempool aware at all, simply
make your kmalloc_mempool first try the slab allocator and if it
returns NULL, go for the critical pool. All this in preferably
separate file so you don't make mm/slab.c any more complex than it is
now.

                                            Pekka

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 9/9] slab - Implement single mempool backing for slab allocator
  2006-01-25 19:40 ` [patch 9/9] slab - Implement single mempool backing for slab allocator Matthew Dobson
@ 2006-01-26  8:11   ` Pekka Enberg
  2006-01-26 22:48     ` Matthew Dobson
  0 siblings, 1 reply; 44+ messages in thread
From: Pekka Enberg @ 2006-01-26  8:11 UTC (permalink / raw)
  To: colpatch; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

Hi,

On 1/25/06, Matthew Dobson <colpatch@us.ibm.com> wrote:
> -static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid)
> +static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid,
> +                          mempool_t *pool)
>  {
>         struct page *page;
>         void *addr;
>         int i;
>
>         flags |= cachep->gfpflags;
> -       page = alloc_pages_node(nodeid, flags, cachep->gfporder);
> +       /*
> +        * If this allocation request isn't backed by a memory pool, or if that
> +        * memory pool's gfporder is not the same as the cache's gfporder, fall
> +        * back to alloc_pages_node().
> +        */
> +       if (!pool || cachep->gfporder != (int)pool->pool_data)
> +               page = alloc_pages_node(nodeid, flags, cachep->gfporder);
> +       else
> +               page = mempool_alloc_node(pool, flags, nodeid);

You're not returning any pages to the pool, so the it will run out
pages at some point, no? Also, there's no guarantee the slab allocator
will give back the critical page any time soon either because it will
use it for non-critical allocations as well as soon as it becomes part
of the object cache slab lists.

                                     Pekka

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-25 23:51 ` [patch 3/9] mempool - Make mempools NUMA aware Matthew Dobson
@ 2006-01-26 17:54   ` Christoph Lameter
  2006-01-26 22:57     ` Matthew Dobson
  2006-01-27  0:23   ` [patch 3/9] mempool - Make mempools NUMA aware Benjamin LaHaise
  1 sibling, 1 reply; 44+ messages in thread
From: Christoph Lameter @ 2006-01-26 17:54 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

On Wed, 25 Jan 2006, Matthew Dobson wrote:

> plain text document attachment (critical_mempools)
> Add NUMA-awareness to the mempool code.  This involves several changes:

I am not quite sure why you would need numa awareness in an emergency 
memory pool. Presumably the effectiveness of the accesses do not matter. 
You only want to be sure that there is some memory available right?

You do not need this.... 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 6/9] mempool - Update kzalloc mempool users
  2006-01-26  7:30   ` Pekka Enberg
@ 2006-01-26 22:03     ` Matthew Dobson
  0 siblings, 0 replies; 44+ messages in thread
From: Matthew Dobson @ 2006-01-26 22:03 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

Pekka Enberg wrote:
> Hi,
> 
> On 1/25/06, Matthew Dobson <colpatch@us.ibm.com> wrote:
> 
>>plain text document attachment (critical_mempools)
>>Fixup existing mempool users to use the new mempool API, part 3.
>>
>>This mempool users which are basically just wrappers around kzalloc().  To do
>>this we create a new function, kzalloc_node() and change all the old mempool
>>allocators which were calling kzalloc() to now call kzalloc_node().
> 
> 
> The slab bits look good to me. You might have some rediffing to do
> because -mm has quite a bit of slab patches in it.
> 
> Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
> 
>                                Pekka

Good to hear.  I expect there to be plenty of differences.  Some pieces of
this are ready to be pushed now, but most of it is still very much in
planning/design stage.  My hopes (which I probably should have made more
clear in the introductory email) are just to get feedback on the general
approach to the problem that I'm pursuing.

Thanks!

-Matt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 8/9] slab - Add *_mempool slab variants
  2006-01-26  7:41   ` Pekka Enberg
@ 2006-01-26 22:40     ` Matthew Dobson
  2006-01-27  7:09       ` Pekka J Enberg
  2006-01-27  7:10       ` Pekka J Enberg
  0 siblings, 2 replies; 44+ messages in thread
From: Matthew Dobson @ 2006-01-26 22:40 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

Pekka Enberg wrote:
> Hi Matthew,
> 
> On 1/25/06, Matthew Dobson <colpatch@us.ibm.com> wrote:
> 
>>+extern void *__kmalloc(size_t, gfp_t, mempool_t *);
> 
> 
> If you really need to do this, please ntoe that you're adding an extra
> parameter push for the nominal case where mempool is not required. The
> compiler is unable to optimize it away. It's better that you create a
> new entry point for the mempool case in mm/slab.c rather than
> overloading __kmalloc() et al. See the following patch that does that
> sort of thing:
> 
> http://www.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.16-rc1/2.6.16-rc1-mm3/broken-out/slab-fix-kzalloc-and-kstrdup-caller-report-for-config_debug_slab.patch

At some level non-mempool users are going to have to use the same functions
as mempool users.  As you can see in patch 9/9, all we really need is to
get the mempool_t pointer down to cache_grow() where the *actual* cache
growth happens.  Unfortunately, between the external callers (kmalloc(),
kmem_cache_alloc(), etc) and cache_grow() there are several functions.  I
will try to follow an approach similar to the patch you linked to above for
this patch (modifying the externally callable functions), but I don't think
the follow-on patch can change much.  The overhead of passing along a NULL
pointer should not be too onerous.


> Now as for the rest of the patch, are you sure you want to reserve
> whole pages for each critical allocation that cannot be satisfied by
> the slab allocator? Wouldn't it be better to use something like the
> slob allocator to allocate from the mempool pages? That way you
> wouldn't have to make the slab allocator mempool aware at all, simply
> make your kmalloc_mempool first try the slab allocator and if it
> returns NULL, go for the critical pool. All this in preferably
> separate file so you don't make mm/slab.c any more complex than it is
> now.

I decided that using a whole page allocator would be the easiest way to
cover the most common uses of slab/kmalloc, but your idea is very
interesting.  My immediate concern would be trying to determine, at kfree()
time, what was allocated by the slab allocator and what was allocated by
the critical pool.  I will give this approach more thought, as the idea of
completely separating the critical pool and slab allocator is attractive.

Thanks!

-Matt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 9/9] slab - Implement single mempool backing for slab allocator
  2006-01-26  8:11   ` Pekka Enberg
@ 2006-01-26 22:48     ` Matthew Dobson
  2006-01-27  7:22       ` Pekka J Enberg
  0 siblings, 1 reply; 44+ messages in thread
From: Matthew Dobson @ 2006-01-26 22:48 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

Pekka Enberg wrote:
> Hi,
> 
> On 1/25/06, Matthew Dobson <colpatch@us.ibm.com> wrote:
> 
>>-static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid)
>>+static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid,
>>+                          mempool_t *pool)
>> {
>>        struct page *page;
>>        void *addr;
>>        int i;
>>
>>        flags |= cachep->gfpflags;
>>-       page = alloc_pages_node(nodeid, flags, cachep->gfporder);
>>+       /*
>>+        * If this allocation request isn't backed by a memory pool, or if that
>>+        * memory pool's gfporder is not the same as the cache's gfporder, fall
>>+        * back to alloc_pages_node().
>>+        */
>>+       if (!pool || cachep->gfporder != (int)pool->pool_data)
>>+               page = alloc_pages_node(nodeid, flags, cachep->gfporder);
>>+       else
>>+               page = mempool_alloc_node(pool, flags, nodeid);
> 
> 
> You're not returning any pages to the pool, so the it will run out
> pages at some point, no? Also, there's no guarantee the slab allocator
> will give back the critical page any time soon either because it will
> use it for non-critical allocations as well as soon as it becomes part
> of the object cache slab lists.

As this is not a full implementation, just a partly fleshed-out RFC, the
kfree() hooks aren't there yet.  Essentially, the plan would be to add a
mempool_t pointer to struct slab, and if that pointer is non-NULL when
we're freeing an unused slab, return it to the mempool it came from.

As you mention, there is no guarantee as to how long the critical page will
be in use for.  One idea to tackle that problem would be to use the new
mempool_t pointer in struct slab to provide exclusivity of critical slab
pages, ie: if a non-critical slab request comes in and the only free page
in the slab is a 'critical' page (it came from a mempool) then attempt to
grow the slab or fail the non-critical request.  Both of these concepts
were (more or less) implemented in my other attempt at solving the critical
pool problem, so moving them to fit into this approach should not be difficult.

Thanks!

-Matt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-26 17:54   ` Christoph Lameter
@ 2006-01-26 22:57     ` Matthew Dobson
  2006-01-26 23:15       ` Christoph Lameter
  0 siblings, 1 reply; 44+ messages in thread
From: Matthew Dobson @ 2006-01-26 22:57 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

Christoph Lameter wrote:
> On Wed, 25 Jan 2006, Matthew Dobson wrote:
> 
> 
>>plain text document attachment (critical_mempools)
>>Add NUMA-awareness to the mempool code.  This involves several changes:
> 
> 
> I am not quite sure why you would need numa awareness in an emergency 
> memory pool. Presumably the effectiveness of the accesses do not matter. 
> You only want to be sure that there is some memory available right?

Not all requests for memory from a specific node are performance
enhancements, some are for correctness.  With large machines, especially as
those large machines' workloads are more and more likely to be partitioned
with something like cpusets, you want to be able to specify where you want
your reserve pool to come from.  As it was not incredibly difficult to
offer this option, I added it.  I was unwilling to completely ignore
callers' NUMA requests, assuming that they are all purely performance
motivated.


> You do not need this.... 

I do not agree...


Thanks!

-Matt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-26 22:57     ` Matthew Dobson
@ 2006-01-26 23:15       ` Christoph Lameter
  2006-01-26 23:24         ` Matthew Dobson
  0 siblings, 1 reply; 44+ messages in thread
From: Christoph Lameter @ 2006-01-26 23:15 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

On Thu, 26 Jan 2006, Matthew Dobson wrote:

> Not all requests for memory from a specific node are performance
> enhancements, some are for correctness.  With large machines, especially as

alloc_pages_node and friends do not guarantee allocation on that specific 
node. That argument for "correctness" is bogus.

> > You do not need this.... 
> I do not agree...

There is no way that you would need this patch.



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-26 23:15       ` Christoph Lameter
@ 2006-01-26 23:24         ` Matthew Dobson
  2006-01-26 23:29           ` Christoph Lameter
  0 siblings, 1 reply; 44+ messages in thread
From: Matthew Dobson @ 2006-01-26 23:24 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

Christoph Lameter wrote:
> On Thu, 26 Jan 2006, Matthew Dobson wrote:
> 
> 
>>Not all requests for memory from a specific node are performance
>>enhancements, some are for correctness.  With large machines, especially as
> 
> 
> alloc_pages_node and friends do not guarantee allocation on that specific 
> node. That argument for "correctness" is bogus.

alloc_pages_node() does not guarantee allocation on a specific node, but
calling __alloc_pages() with a specific nodelist would.


>>>You do not need this.... 
>>
>>I do not agree...
> 
> 
> There is no way that you would need this patch.

My goal was to not change the behavior of the slab allocator when inserting
a mempool-backed allocator "under" it.  Without support for at least
*requesting* allocations from a specific node when allocating from a
mempool, this would change how the slab allocator works.  That would be
bad.  The slab allocator now does not guarantee that, for example, a
kmalloc_node() request is satisfied by memory from the requested node, but
it does at least TRY.  Without adding mempool_alloc_node() then I would
never be able to even TRY to satisfy a mempool-backed kmalloc_node()
request from the correct node.  I believe that would constitute an
unacceptable breakage from normal, documented behavior.  So, I *do* need
this patch.

-Matt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-26 23:24         ` Matthew Dobson
@ 2006-01-26 23:29           ` Christoph Lameter
  2006-01-27  0:15             ` Matthew Dobson
  0 siblings, 1 reply; 44+ messages in thread
From: Christoph Lameter @ 2006-01-26 23:29 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

On Thu, 26 Jan 2006, Matthew Dobson wrote:

> alloc_pages_node() does not guarantee allocation on a specific node, but
> calling __alloc_pages() with a specific nodelist would.

True but you have emergency *_node function that do not take nodelists.

> > There is no way that you would need this patch.
> 
> My goal was to not change the behavior of the slab allocator when inserting
> a mempool-backed allocator "under" it.  Without support for at least
> *requesting* allocations from a specific node when allocating from a
> mempool, this would change how the slab allocator works.  That would be
> bad.  The slab allocator now does not guarantee that, for example, a
> kmalloc_node() request is satisfied by memory from the requested node, but
> it does at least TRY.  Without adding mempool_alloc_node() then I would
> never be able to even TRY to satisfy a mempool-backed kmalloc_node()
> request from the correct node.  I believe that would constitute an
> unacceptable breakage from normal, documented behavior.  So, I *do* need
> this patch.

If you get to the emergency lists then you are already in a tight memory 
situation. In that situation it does not make sense to worry about the 
node number the memory is coming from. kmalloc_node is just a kmalloc with 
an indication of a preference of where the memory should be coming from. 
The node locality only influences performance and not correctness.

There is no change to the way the slab allocator works. Just drop the 
*_node variants.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-26 23:29           ` Christoph Lameter
@ 2006-01-27  0:15             ` Matthew Dobson
  2006-01-27  0:21               ` Christoph Lameter
  0 siblings, 1 reply; 44+ messages in thread
From: Matthew Dobson @ 2006-01-27  0:15 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

Christoph Lameter wrote:
> On Thu, 26 Jan 2006, Matthew Dobson wrote:
> 
> 
>>alloc_pages_node() does not guarantee allocation on a specific node, but
>>calling __alloc_pages() with a specific nodelist would.
> 
> 
> True but you have emergency *_node function that do not take nodelists.

Agreed.


>>>There is no way that you would need this patch.
>>
>>My goal was to not change the behavior of the slab allocator when inserting
>>a mempool-backed allocator "under" it.  Without support for at least
>>*requesting* allocations from a specific node when allocating from a
>>mempool, this would change how the slab allocator works.  That would be
>>bad.  The slab allocator now does not guarantee that, for example, a
>>kmalloc_node() request is satisfied by memory from the requested node, but
>>it does at least TRY.  Without adding mempool_alloc_node() then I would
>>never be able to even TRY to satisfy a mempool-backed kmalloc_node()
>>request from the correct node.  I believe that would constitute an
>>unacceptable breakage from normal, documented behavior.  So, I *do* need
>>this patch.
> 
> 
> If you get to the emergency lists then you are already in a tight memory 
> situation. In that situation it does not make sense to worry about the 
> node number the memory is coming from. kmalloc_node is just a kmalloc with 
> an indication of a preference of where the memory should be coming from. 
> The node locality only influences performance and not correctness.
> 
> There is no change to the way the slab allocator works. Just drop the 
> *_node variants.

If you look more carefully at how the emergency mempools are used, I think
you'll better understand why I did this:

Look at patch 9/9, specficially the changes to kmem_getpages():

-	page = alloc_pages_node(nodeid, flags, cachep->gfporder);
+	/*
+	 * If this allocation request isn't backed by a memory pool, or if that
+	 * memory pool's gfporder is not the same as the cache's gfporder, fall
+	 * back to alloc_pages_node().
+	 */
+	if (!pool || cachep->gfporder != (int)pool->pool_data)
+		page = alloc_pages_node(nodeid, flags, cachep->gfporder);
+	else
+		page = mempool_alloc_node(pool, flags, nodeid);

Allocations backed by a mempool must always be allocated via
mempool_alloc() (or mempool_alloc_node() in this case).  What that means
is, without a mempool_alloc_node() function, NO mempool backed allocations
will be able to request a specific node, even when the system has PLENTY of
memory!  This, IMO, is unacceptable.  Adding more NUMA-awareness to the
mempool system allows us to keep the same slab behavior as before, as well
as leaving us free to ignore the node requests when memory is low.

-Matt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-27  0:15             ` Matthew Dobson
@ 2006-01-27  0:21               ` Christoph Lameter
  2006-01-27  0:34                 ` Matthew Dobson
  0 siblings, 1 reply; 44+ messages in thread
From: Christoph Lameter @ 2006-01-27  0:21 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

On Thu, 26 Jan 2006, Matthew Dobson wrote:

> Allocations backed by a mempool must always be allocated via
> mempool_alloc() (or mempool_alloc_node() in this case).  What that means
> is, without a mempool_alloc_node() function, NO mempool backed allocations
> will be able to request a specific node, even when the system has PLENTY of
> memory!  This, IMO, is unacceptable.  Adding more NUMA-awareness to the
> mempool system allows us to keep the same slab behavior as before, as well
> as leaving us free to ignore the node requests when memory is low.

Ok. That makes sense. I thought the mempool_xxx functions were only for 
emergencies. But nevertheless you still duplicate all memory allocation 
functions. I already was a bit concerned when I added the _node stuff.

What may be better is to add some kind of "allocation policy" to an 
allocation. That allocation policy could require the allocation on a node, 
distribution over a series of nodes, require allocation on a particular 
node, or allow the use of emergency pools etc.

Maybe unify all the different page allocations to one call and do the 
same with the slab allocator.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-25 23:51 ` [patch 3/9] mempool - Make mempools NUMA aware Matthew Dobson
  2006-01-26 17:54   ` Christoph Lameter
@ 2006-01-27  0:23   ` Benjamin LaHaise
  2006-01-27  0:35     ` Matthew Dobson
  1 sibling, 1 reply; 44+ messages in thread
From: Benjamin LaHaise @ 2006-01-27  0:23 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

On Wed, Jan 25, 2006 at 03:51:33PM -0800, Matthew Dobson wrote:
> plain text document attachment (critical_mempools)
> Add NUMA-awareness to the mempool code.  This involves several changes:

This is horribly bloated.  Mempools should really just be a flag and 
reserve count on a slab, as then the code would not be in hot paths.

		-ben

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-27  0:21               ` Christoph Lameter
@ 2006-01-27  0:34                 ` Matthew Dobson
  2006-01-27  0:39                   ` Christoph Lameter
  2006-01-27 10:51                   ` Paul Jackson
  0 siblings, 2 replies; 44+ messages in thread
From: Matthew Dobson @ 2006-01-27  0:34 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

Christoph Lameter wrote:
> On Thu, 26 Jan 2006, Matthew Dobson wrote:
> 
> 
>>Allocations backed by a mempool must always be allocated via
>>mempool_alloc() (or mempool_alloc_node() in this case).  What that means
>>is, without a mempool_alloc_node() function, NO mempool backed allocations
>>will be able to request a specific node, even when the system has PLENTY of
>>memory!  This, IMO, is unacceptable.  Adding more NUMA-awareness to the
>>mempool system allows us to keep the same slab behavior as before, as well
>>as leaving us free to ignore the node requests when memory is low.
> 
> 
> Ok. That makes sense. I thought the mempool_xxx functions were only for 
> emergencies. But nevertheless you still duplicate all memory allocation 
> functions. I already was a bit concerned when I added the _node stuff.

I'm glad we're on the same page now. :)  And yes, adding four "duplicate"
*_mempool allocators was not my first choice, but I couldn't easily see a
better way.


> What may be better is to add some kind of "allocation policy" to an 
> allocation. That allocation policy could require the allocation on a node, 
> distribution over a series of nodes, require allocation on a particular 
> node, or allow the use of emergency pools etc.
> 
> Maybe unify all the different page allocations to one call and do the 
> same with the slab allocator.

Hmmm...  I kinda like that.  Some sort of

struct allocation_policy
{
	enum       policy_type;
	nodemask_t nodes;
	mempool_t  critical_pool;
}

that could be passed to __alloc_pages()?

That seems a bit beyond the scope of what I'd hoped for this patch series,
but if an approach like this is believed to be generally useful, it's
something I'm more than willing to work on...

-Matt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-27  0:23   ` [patch 3/9] mempool - Make mempools NUMA aware Benjamin LaHaise
@ 2006-01-27  0:35     ` Matthew Dobson
  2006-01-27  3:23       ` Benjamin LaHaise
  0 siblings, 1 reply; 44+ messages in thread
From: Matthew Dobson @ 2006-01-27  0:35 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

Benjamin LaHaise wrote:
> On Wed, Jan 25, 2006 at 03:51:33PM -0800, Matthew Dobson wrote:
> 
>>plain text document attachment (critical_mempools)
>>Add NUMA-awareness to the mempool code.  This involves several changes:
> 
> 
> This is horribly bloated.  Mempools should really just be a flag and 
> reserve count on a slab, as then the code would not be in hot paths.
> 
> 		-ben

Ummm...  ok?  But with only a simple flag, how do you know *which* mempool
you're trying to use?  What if you want to use a mempool for a non-slab
allocation?

-Matt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-27  0:34                 ` Matthew Dobson
@ 2006-01-27  0:39                   ` Christoph Lameter
  2006-01-27  0:44                     ` Matthew Dobson
  2006-01-27 10:51                   ` Paul Jackson
  1 sibling, 1 reply; 44+ messages in thread
From: Christoph Lameter @ 2006-01-27  0:39 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

On Thu, 26 Jan 2006, Matthew Dobson wrote:

> That seems a bit beyond the scope of what I'd hoped for this patch series,
> but if an approach like this is believed to be generally useful, it's
> something I'm more than willing to work on...

We need this for other issues as well. f.e. to establish memory allocation 
policies for the page cache, tmpfs and various other needs. Look at 
mempolicy.h which defines a subset of what we need. Currently there is no 
way to specify a policy when invoking the page allocator or slab 
allocator. The policy is implicily fetched from the current task structure 
which is not optimal.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-27  0:39                   ` Christoph Lameter
@ 2006-01-27  0:44                     ` Matthew Dobson
  2006-01-27  0:57                       ` Christoph Lameter
  0 siblings, 1 reply; 44+ messages in thread
From: Matthew Dobson @ 2006-01-27  0:44 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

Christoph Lameter wrote:
> On Thu, 26 Jan 2006, Matthew Dobson wrote:
> 
> 
>>That seems a bit beyond the scope of what I'd hoped for this patch series,
>>but if an approach like this is believed to be generally useful, it's
>>something I'm more than willing to work on...
> 
> 
> We need this for other issues as well. f.e. to establish memory allocation 
> policies for the page cache, tmpfs and various other needs. Look at 
> mempolicy.h which defines a subset of what we need. Currently there is no 
> way to specify a policy when invoking the page allocator or slab 
> allocator. The policy is implicily fetched from the current task structure 
> which is not optimal.

I agree that the current, task-based policies are subobtimal.  Having to
allocate and fill in even a small structure for each allocation is going to
be a tough sell, though.  I suppose most allocations could get by with a
small handfull of static generic "policy structures"...  This seems like it
will turn into a signifcant rework of all the kernel's allocation routines,
no small task.  Certainly not something that I'd even start without
response from some other major players in the VM area...  Anyone?


-Matt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-27  0:44                     ` Matthew Dobson
@ 2006-01-27  0:57                       ` Christoph Lameter
  2006-01-27  1:07                         ` Andi Kleen
  0 siblings, 1 reply; 44+ messages in thread
From: Christoph Lameter @ 2006-01-27  0:57 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: linux-kernel, sri, andrea, pavel, linux-mm, pj, ak

On Thu, 26 Jan 2006, Matthew Dobson wrote:

> > We need this for other issues as well. f.e. to establish memory allocation 
> > policies for the page cache, tmpfs and various other needs. Look at 
> > mempolicy.h which defines a subset of what we need. Currently there is no 
> > way to specify a policy when invoking the page allocator or slab 
> > allocator. The policy is implicily fetched from the current task structure 
> > which is not optimal.
> 
> I agree that the current, task-based policies are subobtimal.  Having to
> allocate and fill in even a small structure for each allocation is going to
> be a tough sell, though.  I suppose most allocations could get by with a
> small handfull of static generic "policy structures"...  This seems like it
> will turn into a signifcant rework of all the kernel's allocation routines,
> no small task.  Certainly not something that I'd even start without
> response from some other major players in the VM area...  Anyone?

No you would have a set of policies and only pass a pointer to the 
policies to the allocator. I.e. have one emergency policy allocated 
somewhere in the IP stack and then pass that to the allocator.

I guess that Andi Kleen and Paul Jackson would likely be interested in 
such an endeavor.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-27  0:57                       ` Christoph Lameter
@ 2006-01-27  1:07                         ` Andi Kleen
  0 siblings, 0 replies; 44+ messages in thread
From: Andi Kleen @ 2006-01-27  1:07 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Matthew Dobson, linux-kernel, sri, andrea, pavel, linux-mm, pj

On Friday 27 January 2006 01:57, Christoph Lameter wrote:
> On Thu, 26 Jan 2006, Matthew Dobson wrote:
> 
> > > We need this for other issues as well. f.e. to establish memory allocation 
> > > policies for the page cache, tmpfs and various other needs. Look at 
> > > mempolicy.h which defines a subset of what we need. Currently there is no 
> > > way to specify a policy when invoking the page allocator or slab 
> > > allocator. The policy is implicily fetched from the current task structure 
> > > which is not optimal.
> > 
> > I agree that the current, task-based policies are subobtimal.  Having to
> > allocate and fill in even a small structure for each allocation is going to
> > be a tough sell, though.  I suppose most allocations could get by with a
> > small handfull of static generic "policy structures"...  This seems like it
> > will turn into a signifcant rework of all the kernel's allocation routines,
> > no small task.  Certainly not something that I'd even start without
> > response from some other major players in the VM area...  Anyone?
> 
> No you would have a set of policies and only pass a pointer to the 
> policies to the allocator. I.e. have one emergency policy allocated 
> somewhere in the IP stack and then pass that to the allocator.

What would that be needed for? 

My goal for mempolicies was always to keep it as simple as possible
and keep the fast paths fast  and there has to be a very good reason to add any 
new complexity.

-Andi

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-27  0:35     ` Matthew Dobson
@ 2006-01-27  3:23       ` Benjamin LaHaise
  2006-01-28  1:08         ` Matthew Dobson
  0 siblings, 1 reply; 44+ messages in thread
From: Benjamin LaHaise @ 2006-01-27  3:23 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

On Thu, Jan 26, 2006 at 04:35:56PM -0800, Matthew Dobson wrote:
> Ummm...  ok?  But with only a simple flag, how do you know *which* mempool
> you're trying to use?  What if you want to use a mempool for a non-slab
> allocation?

Are there any?  A quick poke around has only found a couple of places 
that use kzalloc(), which is still quite effectively a slab allocation.  
There seems to be just one page user, the dm-crypt driver, which could 
be served by a reservation scheme.

		-ben
-- 
"Ladies and gentlemen, I'm sorry to interrupt, but the police are here 
and they've asked us to stop the party."  Don't Email: <dont@kvack.org>.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 8/9] slab - Add *_mempool slab variants
  2006-01-26 22:40     ` Matthew Dobson
@ 2006-01-27  7:09       ` Pekka J Enberg
  2006-01-27  7:10       ` Pekka J Enberg
  1 sibling, 0 replies; 44+ messages in thread
From: Pekka J Enberg @ 2006-01-27  7:09 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

On Thu, 26 Jan 2006, Matthew Dobson wrote:
> The overhead of passing along a NULL pointer should not be too onerous.

It is, the extra parameter passing will be propagated all over the kernel 
where kmalloc() is called. Increasing kernel text for no good reason is 
not acceptable.

			Pekka

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 8/9] slab - Add *_mempool slab variants
  2006-01-26 22:40     ` Matthew Dobson
  2006-01-27  7:09       ` Pekka J Enberg
@ 2006-01-27  7:10       ` Pekka J Enberg
  1 sibling, 0 replies; 44+ messages in thread
From: Pekka J Enberg @ 2006-01-27  7:10 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

On Thu, 26 Jan 2006, Matthew Dobson wrote:
> I decided that using a whole page allocator would be the easiest way to
> cover the most common uses of slab/kmalloc, but your idea is very
> interesting.  My immediate concern would be trying to determine, at kfree()
> time, what was allocated by the slab allocator and what was allocated by
> the critical pool.  I will give this approach more thought, as the idea of
> completely separating the critical pool and slab allocator is attractive.

I think you can use PageSlab for that.

			Pekka

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 9/9] slab - Implement single mempool backing for slab allocator
  2006-01-26 22:48     ` Matthew Dobson
@ 2006-01-27  7:22       ` Pekka J Enberg
  0 siblings, 0 replies; 44+ messages in thread
From: Pekka J Enberg @ 2006-01-27  7:22 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

On Thu, 26 Jan 2006, Matthew Dobson wrote:
> As you mention, there is no guarantee as to how long the critical page will
> be in use for.  One idea to tackle that problem would be to use the new
> mempool_t pointer in struct slab to provide exclusivity of critical slab
> pages, ie: if a non-critical slab request comes in and the only free page
> in the slab is a 'critical' page (it came from a mempool) then attempt to
> grow the slab or fail the non-critical request.  Both of these concepts
> were (more or less) implemented in my other attempt at solving the critical
> pool problem, so moving them to fit into this approach should not be difficult.

Increasing struct slab size doesn't sound too appealing. I don't think the 
slab allocator should know about mempools. Your critical allocations and 
regular slab allocations have very different needs. For regular 
allocations, we want to avoid page allocation which is why the object 
caches hold on to slab pages whereas for mempool, I think you almost 
always want to give back unused memory to the pool.

I think a better way to go is to improve the mempool implementation to 
allow non-fixed size allocations and then combine that with the slab 
allocator to get what you need.

				Pekka

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-27  0:34                 ` Matthew Dobson
  2006-01-27  0:39                   ` Christoph Lameter
@ 2006-01-27 10:51                   ` Paul Jackson
  2006-01-28  1:00                     ` Matthew Dobson
  1 sibling, 1 reply; 44+ messages in thread
From: Paul Jackson @ 2006-01-27 10:51 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: clameter, linux-kernel, sri, andrea, pavel, linux-mm

Matthew wrote:
> I'm glad we're on the same page now. :)  And yes, adding four "duplicate"
> *_mempool allocators was not my first choice, but I couldn't easily see a
> better way.

I hope the following comments aren't too far off target.

I too am inclined to prefer the __GFP_CRITICAL approach over this.
That or Andrea's suggestion, which except for a free hook, was entirely
outside of the page_alloc.c code paths.  Or Alan's suggested revival
of the old code to drop non-critical network patches in duress.

I am tempted to think you've taken an approach that raised some
substantial looking issues:

 * how to tell the system when to use the emergency pool
 * this doesn't really solve the problem (network can still starve)
 * it wastes memory most of the time
 * it doesn't really improve on GFP_ATOMIC

and just added another substantial looking issue:

 * it entwines another thread of complexity and performance costs
   into the important memory allocation code path.

Progress in the wrong direction ;).

> With large machines, especially as
> those large machines' workloads are more and more likely to be partitioned
> with something like cpusets, you want to be able to specify where you want
> your reserve pool to come from.

Cpusets is about performance, not correctness.  Anytime I get cornered
in the cpuset code, I prefer violating the cpuset containment, over
serious system failure.

-- 
                  I won't rest till it's the best ...
                  Programmer, Linux Scalability
                  Paul Jackson <pj@sgi.com> 1.925.600.0401

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-27 10:51                   ` Paul Jackson
@ 2006-01-28  1:00                     ` Matthew Dobson
  2006-01-28  5:08                       ` Paul Jackson
  2006-01-28  8:16                       ` Pavel Machek
  0 siblings, 2 replies; 44+ messages in thread
From: Matthew Dobson @ 2006-01-28  1:00 UTC (permalink / raw)
  To: Paul Jackson; +Cc: clameter, linux-kernel, sri, andrea, pavel, linux-mm

Paul Jackson wrote:
> Matthew wrote:
> 
>>I'm glad we're on the same page now. :)  And yes, adding four "duplicate"
>>*_mempool allocators was not my first choice, but I couldn't easily see a
>>better way.
> 
> 
> I hope the following comments aren't too far off target.
> 
> I too am inclined to prefer the __GFP_CRITICAL approach over this.

OK.  Chalk one more up for that solution...


> That or Andrea's suggestion, which except for a free hook, was entirely
> outside of the page_alloc.c code paths.

This is supposed to be an implementation of Andrea's suggestion.  There are
no hooks in ANY page_alloc.c code paths.  These patches touch mempool code
and some slab code, but not any page allocator code.


> Or Alan's suggested revival
> of the old code to drop non-critical network patches in duress.

Dropping non-critical packets is still in our plan, but I don't think that
is a FULL solution.  As we mentioned before on that topic, you can't tell
if a packet is critical until AFTER you receive it, by which point it has
already had an skbuff (hopefully) allocated for it.  If your network
traffic is coming in faster than you can receive, examine, and drop
non-critical packets you're hosed.  I still think some sort of reserve pool
is necessary to give the networking stack a little breathing room when
under both memory pressure and network load.


> I am tempted to think you've taken an approach that raised some
> substantial looking issues:
> 
>  * how to tell the system when to use the emergency pool

We've dropped the whole "in_emergency" thing.  The system uses the
emergency pool when the normal pool (ie: the buddy allocator) is out of pages.

>  * this doesn't really solve the problem (network can still starve)

Only if the pool is not large enough.  One can argue that sizing the pool
appropriately is impossible (theoretical incoming traffic over a GigE card
or two for a minute or two is extremely large), but then I guess we
shouldn't even try to fix the problem...?

>  * it wastes memory most of the time

True.  Any "real" reserve system will suffer from that problem.  Ben
LaHaise suggested a reserve system that allows the reserve pages to be used
for trivially reclaimable allocation while not in active use.  An
interesting idea.  Regardless, the Linux VM sorta already wastes memory by
keeping min_free_kbytes around, no?

>  * it doesn't really improve on GFP_ATOMIC

I disagree.  It improves on GFP_ATOMIC by giving it a second chance.  If
you've got a GFP_ATOMIC allocation that is particularly critical, using a
mempool to back it means that you can keep going for a while when the rest
of the system OOMs/goes into SWAP hell/etc.

> and just added another substantial looking issue:
> 
>  * it entwines another thread of complexity and performance costs
>    into the important memory allocation code path.

I can't say that it doesn't add any complexity into an important memory
allocation path, but I don't think it is a significant amount of
complexity.  It is just a pointer check in kmem_getpages()...


>>With large machines, especially as
>>those large machines' workloads are more and more likely to be partitioned
>>with something like cpusets, you want to be able to specify where you want
>>your reserve pool to come from.
> 
> 
> Cpusets is about performance, not correctness.  Anytime I get cornered
> in the cpuset code, I prefer violating the cpuset containment, over
> serious system failure.

Fair enough.  But if we can keep the same baseline performance and add this
new feature, I'd like to do that.  Doing our best to allocate on a
particular node when requested to isn't too much to ask.

-Matt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-27  3:23       ` Benjamin LaHaise
@ 2006-01-28  1:08         ` Matthew Dobson
  0 siblings, 0 replies; 44+ messages in thread
From: Matthew Dobson @ 2006-01-28  1:08 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: linux-kernel, sri, andrea, pavel, linux-mm

Benjamin LaHaise wrote:
> On Thu, Jan 26, 2006 at 04:35:56PM -0800, Matthew Dobson wrote:
> 
>>Ummm...  ok?  But with only a simple flag, how do you know *which* mempool
>>you're trying to use?  What if you want to use a mempool for a non-slab
>>allocation?
> 
> 
> Are there any?  A quick poke around has only found a couple of places 
> that use kzalloc(), which is still quite effectively a slab allocation.  
> There seems to be just one page user, the dm-crypt driver, which could 
> be served by a reservation scheme.

A couple.  If Andrew is willing to pick up the mempool patches I posted an
hour or so ago, there will be only 4 mempool users that aren't using a
common mempool allocator.  Regardless of whether that happens, there are
only a few users that aren't slab based:
   1) mm/highmem.c - page based allocator
   2) drivers/scsi/scsi_transport_iscsi.c - calls alloc_skb(), which does
      eventually end up making a slab allocation
   3) drivers/md/raid1.c & raid10.c - easily the biggest mempool_alloc
      functions in the kernel.  Non-trivial.
   4) drivers/md/dm-crypt.c - the driver you mentioned, also using a page
      allocator

So we could possibly get away with a reservation scheme, but a couple users
would be non-trivial to fixup.

-Matt

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-28  1:00                     ` Matthew Dobson
@ 2006-01-28  5:08                       ` Paul Jackson
  2006-01-28  8:16                       ` Pavel Machek
  1 sibling, 0 replies; 44+ messages in thread
From: Paul Jackson @ 2006-01-28  5:08 UTC (permalink / raw)
  To: Matthew Dobson; +Cc: clameter, linux-kernel, sri, andrea, pavel, linux-mm

Matthew wrote:
> > I too am inclined to prefer the __GFP_CRITICAL approach over this.
> 
> OK.  Chalk one more up for that solution...

I don't think my vote should count for much.  See below.

> This is supposed to be an implementation of Andrea's suggestion.  There are
> no hooks in ANY page_alloc.c code paths.  These patches touch mempool code
> and some slab code, but not any page allocator code.

Yeah - you're right.  I misread your patch set.  Sorry
for wasting your time.

-- 
                  I won't rest till it's the best ...
                  Programmer, Linux Scalability
                  Paul Jackson <pj@sgi.com> 1.925.600.0401

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-28  1:00                     ` Matthew Dobson
  2006-01-28  5:08                       ` Paul Jackson
@ 2006-01-28  8:16                       ` Pavel Machek
  2006-01-28 16:14                         ` Sridhar Samudrala
  1 sibling, 1 reply; 44+ messages in thread
From: Pavel Machek @ 2006-01-28  8:16 UTC (permalink / raw)
  To: Matthew Dobson
  Cc: Paul Jackson, clameter, linux-kernel, sri, andrea, linux-mm

Hi!

I'll probably regret getting into this discussion, but:

> > Or Alan's suggested revival
> > of the old code to drop non-critical network patches in duress.
> 
> Dropping non-critical packets is still in our plan, but I don't think that
> is a FULL solution.  As we mentioned before on that topic, you can't tell
> if a packet is critical until AFTER you receive it, by which point it has
> already had an skbuff (hopefully) allocated for it.  If your network
> traffic is coming in faster than you can receive, examine, and drop
> non-critical packets you're hosed.  

Why? You run out of atomic memory, start dropping the packets before
they even enter the kernel memory, and process backlog in the
meantime. Other hosts realize you are dropping packets and slow down,
or, if they are malicious, you just end up consistently dropping 70%
of packets. But that's okay.

> I still think some sort of reserve pool
> is necessary to give the networking stack a little breathing room when
> under both memory pressure and network load.

"Lets throw some memory there and hope it does some good?" Eek? What
about auditing/fixing the networking stack, instead?

> >  * this doesn't really solve the problem (network can still starve)
> 
> Only if the pool is not large enough.  One can argue that sizing the pool
> appropriately is impossible (theoretical incoming traffic over a GigE card
> or two for a minute or two is extremely large), but then I guess we
> shouldn't even try to fix the problem...?

And what problem are you trying to fix, anyway? Last time I asked I
got reply around some strange clustering solution that absolutely has
to survive two minutes. And no, your patches do not even solve that,
because sizing the pool is impossible. 
								Pavel
-- 
Thanks, Sharp!

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-28  8:16                       ` Pavel Machek
@ 2006-01-28 16:14                         ` Sridhar Samudrala
  2006-01-28 16:41                           ` Pavel Machek
  0 siblings, 1 reply; 44+ messages in thread
From: Sridhar Samudrala @ 2006-01-28 16:14 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Matthew Dobson, Paul Jackson, clameter, linux-kernel, andrea, linux-mm

Pavel Machek wrote:
> Hi!
>
> I'll probably regret getting into this discussion, but:
>
>   
>>> Or Alan's suggested revival
>>> of the old code to drop non-critical network patches in duress.
>>>       
>> Dropping non-critical packets is still in our plan, but I don't think that
>> is a FULL solution.  As we mentioned before on that topic, you can't tell
>> if a packet is critical until AFTER you receive it, by which point it has
>> already had an skbuff (hopefully) allocated for it.  If your network
>> traffic is coming in faster than you can receive, examine, and drop
>> non-critical packets you're hosed.  
>>     
>
> Why? You run out of atomic memory, start dropping the packets before
> they even enter the kernel memory, and process backlog in the
> meantime. Other hosts realize you are dropping packets and slow down,
> or, if they are malicious, you just end up consistently dropping 70%
> of packets. But that's okay.
>
>   
>> I still think some sort of reserve pool
>> is necessary to give the networking stack a little breathing room when
>> under both memory pressure and network load.
>>     
>
> "Lets throw some memory there and hope it does some good?" Eek? What
> about auditing/fixing the networking stack, instead?
>   
The other reason we need a separate critical pool is to satifsy critical 
GFP_KERNEL allocations
when we are in emergency. These are made in the send side and we cannot 
block/sleep.
>   
>>>  * this doesn't really solve the problem (network can still starve)
>>>       
>> Only if the pool is not large enough.  One can argue that sizing the pool
>> appropriately is impossible (theoretical incoming traffic over a GigE card
>> or two for a minute or two is extremely large), but then I guess we
>> shouldn't even try to fix the problem...?
>>     
>
> And what problem are you trying to fix, anyway? Last time I asked I
> got reply around some strange clustering solution that absolutely has
> to survive two minutes. And no, your patches do not even solve that,
> because sizing the pool is impossible. 
>   
Yes, it is true that sizing the critical pool may be difficult if we use 
it for all incoming allocations.
May be as an initial solution we could just depend on dropping 
non-critical incoming packets
and use the critical pool only for outgoing allocations. We could 
definitely size the pool if we use
it only for allocations for critical outgoing packets.

Thanks
Sridhar


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-28 16:14                         ` Sridhar Samudrala
@ 2006-01-28 16:41                           ` Pavel Machek
  2006-01-28 16:53                             ` Sridhar Samudrala
  0 siblings, 1 reply; 44+ messages in thread
From: Pavel Machek @ 2006-01-28 16:41 UTC (permalink / raw)
  To: Sridhar Samudrala
  Cc: Matthew Dobson, Paul Jackson, clameter, linux-kernel, andrea, linux-mm

> >>I still think some sort of reserve pool
> >>is necessary to give the networking stack a little breathing room when
> >>under both memory pressure and network load.
> >>    
> >
> >"Lets throw some memory there and hope it does some good?" Eek? What
> >about auditing/fixing the networking stack, instead?
> >  
> The other reason we need a separate critical pool is to satifsy critical 
> GFP_KERNEL allocations
> when we are in emergency. These are made in the send side and we cannot 
> block/sleep.

If sending routines can work with constant ammount of memory, why use
kmalloc at all? Anyway I thought we were talking receiving side
earlier in the thread.

Ouch and wait a moment. You claim that GFP_KERNEL allocations can't
block/sleep? Of course they can, that's why they are GFP_KERNEL and
not GFP_ATOMIC.
								Pavel
-- 
Thanks, Sharp!

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-28 16:41                           ` Pavel Machek
@ 2006-01-28 16:53                             ` Sridhar Samudrala
  2006-01-28 22:59                               ` Pavel Machek
  0 siblings, 1 reply; 44+ messages in thread
From: Sridhar Samudrala @ 2006-01-28 16:53 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Matthew Dobson, Paul Jackson, clameter, linux-kernel, andrea, linux-mm

Pavel Machek wrote:
>>>> I still think some sort of reserve pool
>>>> is necessary to give the networking stack a little breathing room when
>>>> under both memory pressure and network load.
>>>>    
>>>>         
>>> "Lets throw some memory there and hope it does some good?" Eek? What
>>> about auditing/fixing the networking stack, instead?
>>>  
>>>       
>> The other reason we need a separate critical pool is to satifsy critical 
>> GFP_KERNEL allocations
>> when we are in emergency. These are made in the send side and we cannot 
>> block/sleep.
>>     
>
> If sending routines can work with constant ammount of memory, why use
> kmalloc at all? Anyway I thought we were talking receiving side
> earlier in the thread.
>
> Ouch and wait a moment. You claim that GFP_KERNEL allocations can't
> block/sleep? Of course they can, that's why they are GFP_KERNEL and
> not GFP_ATOMIC.
>   
I didn't meant GFP_KERNEL allocations cannot block/sleep? When in 
emergency, we
want even the GFP_KERNEL allocations that are made by critical sockets 
not to block/sleep.
So my original critical sockets patches changes the gfp flag passed to 
these allocation requests
to GFP_KERNEL|GFP_CRITICAL.

Thanks
Sridhar


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [patch 3/9] mempool - Make mempools NUMA aware
  2006-01-28 16:53                             ` Sridhar Samudrala
@ 2006-01-28 22:59                               ` Pavel Machek
  2006-01-28 23:10                                 ` Let the flames begin... [was Re: [patch 3/9] mempool - Make mempools NUMA aware] Pavel Machek
  0 siblings, 1 reply; 44+ messages in thread
From: Pavel Machek @ 2006-01-28 22:59 UTC (permalink / raw)
  To: Sridhar Samudrala
  Cc: Matthew Dobson, Paul Jackson, clameter, linux-kernel, andrea, linux-mm

Hi!

> >If sending routines can work with constant ammount of memory, why use
> >kmalloc at all? Anyway I thought we were talking receiving side
> >earlier in the thread.
> >
> >Ouch and wait a moment. You claim that GFP_KERNEL allocations can't
> >block/sleep? Of course they can, that's why they are GFP_KERNEL and
> >not GFP_ATOMIC.
> >  
> I didn't meant GFP_KERNEL allocations cannot block/sleep? When in 
> emergency, we
> want even the GFP_KERNEL allocations that are made by critical sockets 
> not to block/sleep.
> So my original critical sockets patches changes the gfp flag passed to 
> these allocation requests
> to GFP_KERNEL|GFP_CRITICAL.

Could we get description of what you are really trying to achieve?
I don't know what "critical socket" is, when you are "in emergency",
etc. When I am in emergency, I just dial 112...

[Having enough memory on the send side will not mean you'll be able to
send data at TCP layer.]

You seem to have some rather strange needs, that are maybe best served
by s/GFP_KERNEL/GFP_ATOMIC/ in network layer; but we can't / don't
want to do that in vanilla kernel -- your case is too specialized for
that. (Ouch and it does not work anyway without rewriting network
stack...)
								Pavel
-- 
Thanks, Sharp!

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Let the flames begin... [was Re: [patch 3/9] mempool - Make mempools NUMA aware]
  2006-01-28 22:59                               ` Pavel Machek
@ 2006-01-28 23:10                                 ` Pavel Machek
  0 siblings, 0 replies; 44+ messages in thread
From: Pavel Machek @ 2006-01-28 23:10 UTC (permalink / raw)
  To: Sridhar Samudrala
  Cc: Matthew Dobson, Paul Jackson, clameter, linux-kernel, andrea, linux-mm

On So 28-01-06 23:59:07, Pavel Machek wrote:
> Hi!
> 
> > >If sending routines can work with constant ammount of memory, why use
> > >kmalloc at all? Anyway I thought we were talking receiving side
> > >earlier in the thread.
> > >
> > >Ouch and wait a moment. You claim that GFP_KERNEL allocations can't
> > >block/sleep? Of course they can, that's why they are GFP_KERNEL and
> > >not GFP_ATOMIC.
> > >  
> > I didn't meant GFP_KERNEL allocations cannot block/sleep? When in 
> > emergency, we
> > want even the GFP_KERNEL allocations that are made by critical sockets 
> > not to block/sleep.
> > So my original critical sockets patches changes the gfp flag passed to 
> > these allocation requests
> > to GFP_KERNEL|GFP_CRITICAL.

(I'd say Al Viro mode, but Al could take that personally)

IOW: You keep pushing complex and known-broken solution for problem
that does not exist.

(Now you should be angry enough, and please explain why I'm wrong in
easy to understand terms, so that even I will understand that we need
critical sockets for kernels in emergency).
								Pavel
-- 
Thanks, Sharp!

^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2006-01-29  7:41 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20060125161321.647368000@localhost.localdomain>
2006-01-25 19:39 ` [patch 1/9] mempool - Add page allocator Matthew Dobson
2006-01-25 19:39 ` [patch 2/9] mempool - Use common mempool " Matthew Dobson
2006-01-25 19:40 ` [patch 4/9] mempool - Update mempool page allocator user Matthew Dobson
2006-01-25 19:40 ` [patch 5/9] mempool - Update kmalloc mempool users Matthew Dobson
2006-01-25 19:40 ` [patch 6/9] mempool - Update kzalloc " Matthew Dobson
2006-01-26  7:30   ` Pekka Enberg
2006-01-26 22:03     ` Matthew Dobson
2006-01-25 19:40 ` [patch 8/9] slab - Add *_mempool slab variants Matthew Dobson
2006-01-26  7:41   ` Pekka Enberg
2006-01-26 22:40     ` Matthew Dobson
2006-01-27  7:09       ` Pekka J Enberg
2006-01-27  7:10       ` Pekka J Enberg
2006-01-25 19:40 ` [patch 9/9] slab - Implement single mempool backing for slab allocator Matthew Dobson
2006-01-26  8:11   ` Pekka Enberg
2006-01-26 22:48     ` Matthew Dobson
2006-01-27  7:22       ` Pekka J Enberg
2006-01-25 23:51 ` [patch 1/9] mempool - Add page allocator Matthew Dobson
2006-01-25 23:51 ` [patch 3/9] mempool - Make mempools NUMA aware Matthew Dobson
2006-01-26 17:54   ` Christoph Lameter
2006-01-26 22:57     ` Matthew Dobson
2006-01-26 23:15       ` Christoph Lameter
2006-01-26 23:24         ` Matthew Dobson
2006-01-26 23:29           ` Christoph Lameter
2006-01-27  0:15             ` Matthew Dobson
2006-01-27  0:21               ` Christoph Lameter
2006-01-27  0:34                 ` Matthew Dobson
2006-01-27  0:39                   ` Christoph Lameter
2006-01-27  0:44                     ` Matthew Dobson
2006-01-27  0:57                       ` Christoph Lameter
2006-01-27  1:07                         ` Andi Kleen
2006-01-27 10:51                   ` Paul Jackson
2006-01-28  1:00                     ` Matthew Dobson
2006-01-28  5:08                       ` Paul Jackson
2006-01-28  8:16                       ` Pavel Machek
2006-01-28 16:14                         ` Sridhar Samudrala
2006-01-28 16:41                           ` Pavel Machek
2006-01-28 16:53                             ` Sridhar Samudrala
2006-01-28 22:59                               ` Pavel Machek
2006-01-28 23:10                                 ` Let the flames begin... [was Re: [patch 3/9] mempool - Make mempools NUMA aware] Pavel Machek
2006-01-27  0:23   ` [patch 3/9] mempool - Make mempools NUMA aware Benjamin LaHaise
2006-01-27  0:35     ` Matthew Dobson
2006-01-27  3:23       ` Benjamin LaHaise
2006-01-28  1:08         ` Matthew Dobson
2006-01-25 23:51 ` [patch 7/9] mempool - Update other mempool users Matthew Dobson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).