All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Lameter <cl@linux.com>
To: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: linux-mm@kvack.org
Subject: [RFC V2 SLEB 14/14] SLEB: Allocate off node objects from remote shared caches
Date: Fri, 21 May 2010 16:15:06 -0500	[thread overview]
Message-ID: <20100521211545.336946412@quilx.com> (raw)
In-Reply-To: 20100521211452.659982351@quilx.com

[-- Attachment #1: sled_off_node_from_shared --]
[-- Type: text/plain, Size: 7316 bytes --]

This is in a draft state.

Leave the cpu queue alone for off node accesses and go directly to the
remote shared cache for alloations.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>

---
 include/linux/slub_def.h |    1 
 mm/slub.c                |  184 ++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 142 insertions(+), 43 deletions(-)

Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h	2010-05-21 15:30:47.000000000 -0500
+++ linux-2.6/include/linux/slub_def.h	2010-05-21 15:34:45.000000000 -0500
@@ -42,7 +42,6 @@ struct kmem_cache_cpu {
 	unsigned stat[NR_SLUB_STAT_ITEMS];
 #endif
 	int objects;		/* Number of objects available */
-	int node;		/* The node of the page (or -1 for debug) */
 	void *object[BOOT_QUEUE_SIZE];		/* List of objects */
 };
 
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2010-05-21 15:30:47.000000000 -0500
+++ linux-2.6/mm/slub.c	2010-05-21 15:37:04.000000000 -0500
@@ -1616,19 +1616,6 @@ static void resize_cpu_queue(struct kmem
 	}
 }
 
-/*
- * Check if the objects in a per cpu structure fit numa
- * locality expectations.
- */
-static inline int node_match(struct kmem_cache_cpu *c, int node)
-{
-#ifdef CONFIG_NUMA
-	if (node != -1 && c->node != node)
-		return 0;
-#endif
-	return 1;
-}
-
 static unsigned long count_partial(struct kmem_cache_node *n,
 					int (*get_count)(struct page *))
 {
@@ -1718,9 +1705,9 @@ void retrieve_objects(struct kmem_cache 
 	}
 }
 
+#ifdef CONFIG_NUMA
 static inline int find_numa_node(struct kmem_cache *s, int selected_node)
 {
-#ifdef CONFIG_NUMA
 	if (s->flags & SLAB_MEM_SPREAD &&
 			!in_interrupt() &&
 			selected_node == SLAB_NODE_UNSPECIFIED) {
@@ -1731,10 +1718,113 @@ static inline int find_numa_node(struct 
 		if (current->mempolicy)
 			return slab_node(current->mempolicy);
 	}
-#endif
 	return selected_node;
 }
 
+/*
+ * Try to allocate a partial slab from a specific node.
+ */
+static struct page *__get_partial_node(struct kmem_cache_node *n)
+{
+	struct page *page;
+
+	if (!n->nr_partial)
+		return NULL;
+
+	list_for_each_entry(page, &n->partial, lru)
+		if (lock_and_freeze_slab(n, page))
+			goto out;
+	page = NULL;
+out:
+	return page;
+}
+
+
+void *off_node_alloc(struct kmem_cache *s, int node, gfp_t gfpflags)
+{
+	void *object = NULL;
+	struct kmem_cache_node *n = get_node(s, node);
+
+	spin_lock(&n->shared_lock);
+
+	while (!object) {
+		/* Direct allocation from remote shared cache */
+		if (n->objects) {
+#if 0
+			/* Taking a hot object remotely  */
+			object = n->object[--n->objects];
+#else
+			/* Take a cold object from the remote shared cache */
+			object = n->object[0];
+			n->objects--;
+			memcpy(n->object, n->object + 1, n->objects * sizeof(void *));
+#endif
+			break;
+		}
+
+		while (n->objects < s->batch) {
+			struct page *new;
+			int d;
+
+			/* Should be getting cold remote page !! This is hot */
+			new = __get_partial_node(n);
+			if (unlikely(!new)) {
+
+				spin_unlock(&n->shared_lock);
+
+				if (gfpflags & __GFP_WAIT)
+					local_irq_enable();
+
+				new = new_slab(s, gfpflags, node);
+
+				if (gfpflags & __GFP_WAIT)
+					local_irq_disable();
+
+				spin_lock(&n->shared_lock);
+
+ 				if (!new)
+					goto out;
+
+				stat(s, ALLOC_SLAB);
+				slab_lock(new);
+			} else
+				stat(s, ALLOC_FROM_PARTIAL);
+
+			d = min(s->batch - n->objects, available(new));
+			retrieve_objects(s, new, n->object + n->objects, d);
+			n->objects += d;
+
+			if (!all_objects_used(new))
+
+				add_partial(get_node(s, page_to_nid(new)), new, 1);
+
+			else
+				add_full(s, get_node(s, page_to_nid(new)), new);
+
+			slab_unlock(new);
+		}
+	}
+out:
+	spin_unlock(&n->shared_lock);
+	return object;
+}
+
+/*
+ * Check if the objects in a per cpu structure fit numa
+ * locality expectations.
+ */
+static inline int node_local(int node)
+{
+	if (node != -1 || numa_node_id() != node)
+		return 0;
+	return 1;
+}
+
+#else
+static inline int find_numa_node(struct kmem_cache *s, int selected_node) { return selected_node; }
+static inline void *off_node_alloc(struct kmem_cache *s, int node, gfp_t gfpflags) { return NULL; }
+static inline int node_local(int node) { return 1; }
+#endif
 
 static void *slab_alloc(struct kmem_cache *s,
 		gfp_t gfpflags, int node, unsigned long addr)
@@ -1753,36 +1843,41 @@ redo:
 	node = find_numa_node(s, node);
 	local_irq_save(flags);
 	c = __this_cpu_ptr(s->cpu_slab);
-	if (unlikely(!c->objects || !node_match(c, node))) {
+	if (unlikely(!c->objects || !node_local(node))) {
+
+		struct kmem_cache_node *n;
 
 		gfpflags &= gfp_allowed_mask;
 
-		if (unlikely(!node_match(c, node))) {
-			flush_cpu_objects(s, c);
-			c->node = node;
-		} else {
-			struct kmem_cache_node *n = get_node(s, c->node);
+		if (unlikely(!node_local(node))) {
+			object = off_node_alloc(s, node, gfpflags);
+			if (!object)
+				goto oom;
+			else
+				goto got_object;
+		}
 
-			/*
-			 * Node specified is matching the stuff that we cache,
-			 * so we could retrieve objects from the shared cache
-			 * of the indicated node if there would be anything
-			 * there.
-			 */
-			if (n->objects) {
-				int d;
+		n = get_node(s, numa_node_id());
 
-				spin_lock(&n->shared_lock);
-				d = min(min(s->batch, s->shared), n->objects);
-				if (d > 0) {
-					memcpy(c->object + c->objects,
-						n->object + n->objects - d,
-						d * sizeof(void *));
-					n->objects -= d;
-					c->objects += d;
-				}
-				spin_unlock(&n->shared_lock);
+		/*
+		 * Node specified is matching the stuff that we cache,
+		 * so we could retrieve objects from the shared cache
+		 * of the indicated node if there would be anything
+		 * there.
+		 */
+		if (n->objects) {
+			int d;
+
+			spin_lock(&n->shared_lock);
+			d = min(min(s->batch, s->shared), n->objects);
+			if (d > 0) {
+				memcpy(c->object + c->objects,
+					n->object + n->objects - d,
+					d * sizeof(void *));
+				n->objects -= d;
+				c->objects += d;
 			}
+			spin_unlock(&n->shared_lock);
 		}
 
 		while (c->objects < s->batch) {
@@ -1833,6 +1928,8 @@ redo:
 
 	object = c->object[--c->objects];
 
+got_object:
+
 	if (unlikely(debug_on(s))) {
 		if (!alloc_debug_processing(s, object, addr))
 			goto redo;
@@ -1962,8 +2059,10 @@ static void slab_free(struct kmem_cache 
 	if (!(s->flags & SLAB_DEBUG_OBJECTS))
 		debug_check_no_obj_freed(object, s->objsize);
 
+#ifdef CONFIG_NUMA
 	if (numa_off_node_free(s, x))
 		goto out;
+#endif
 
 	if (unlikely(c->objects >= s->queue)) {
 
@@ -3941,8 +4040,9 @@ static ssize_t show_slab_objects(struct 
 
 		for_each_possible_cpu(cpu) {
 			struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+			int node = cpu_to_node(cpu);
 
-			if (!c || c->node < 0)
+			if (!c)
 				continue;
 
 			if (c->objects) {
@@ -3954,9 +4054,9 @@ static ssize_t show_slab_objects(struct 
 					x = 1;
 
 				total += x;
-				nodes[c->node] += x;
+				nodes[node] += x;
 			}
-			per_cpu[c->node]++;
+			per_cpu[node]++;
 		}
 	}
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2010-05-21 21:19 UTC|newest]

Thread overview: 89+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-21 21:14 [RFC V2 SLEB 00/14] The Enhanced(hopefully) Slab Allocator Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 01/14] slab: Introduce a constant for a unspecified node Christoph Lameter
2010-06-07 21:44   ` David Rientjes
2010-06-07 22:30     ` Christoph Lameter
2010-06-08  5:41       ` Pekka Enberg
2010-06-08  6:20         ` David Rientjes
2010-06-08  6:34           ` Pekka Enberg
2010-06-08 23:35             ` David Rientjes
2010-06-09  5:55               ` Pekka Enberg
2010-06-09  5:55                 ` Pekka Enberg
2010-06-09  6:20                 ` David Rientjes
2010-06-09  6:20                   ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 02/14] SLUB: Constants need UL Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 03/14] SLUB: Use kmem_cache flags to detect if Slab is in debugging mode Christoph Lameter
2010-06-08  3:57   ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 04/14] SLUB: discard_slab_unlock Christoph Lameter
2010-05-21 21:14 ` [RFC V2 SLEB 05/14] SLUB: is_kmalloc_cache Christoph Lameter
2010-06-08  8:54   ` David Rientjes
2010-05-21 21:14 ` [RFC V2 SLEB 06/14] SLUB: Get rid of the kmalloc_node slab Christoph Lameter
2010-06-09  6:14   ` David Rientjes
2010-06-09 16:14     ` Christoph Lameter
2010-06-09 16:26       ` Pekka Enberg
2010-06-10  6:07         ` Pekka Enberg
2010-05-21 21:14 ` [RFC V2 SLEB 07/14] SLEB: The Enhanced Slab Allocator Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 08/14] SLEB: Resize cpu queue Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 09/14] SLED: Get rid of useless function Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 10/14] SLEB: Remove MAX_OBJS limitation Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 11/14] SLEB: Add per node cache (with a fixed size for now) Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 12/14] SLEB: Make the size of the shared cache configurable Christoph Lameter
2010-05-21 21:15 ` [RFC V2 SLEB 13/14] SLEB: Enhanced NUMA support Christoph Lameter
2010-05-21 21:15 ` Christoph Lameter [this message]
2010-05-22  8:37 ` [RFC V2 SLEB 00/14] The Enhanced(hopefully) Slab Allocator Pekka Enberg
2010-05-24  7:03 ` Nick Piggin
2010-05-24 15:06   ` Christoph Lameter
2010-05-25  2:06     ` Nick Piggin
2010-05-25  6:55       ` Pekka Enberg
2010-05-25  7:07         ` Nick Piggin
2010-05-25  8:03           ` Pekka Enberg
2010-05-25  8:03             ` Pekka Enberg
2010-05-25  8:16             ` Nick Piggin
2010-05-25  8:16               ` Nick Piggin
2010-05-25  9:19               ` Pekka Enberg
2010-05-25  9:19                 ` Pekka Enberg
2010-05-25  9:34                 ` Nick Piggin
2010-05-25  9:34                   ` Nick Piggin
2010-05-25  9:53                   ` Pekka Enberg
2010-05-25  9:53                     ` Pekka Enberg
2010-05-25 10:19                     ` Nick Piggin
2010-05-25 10:19                       ` Nick Piggin
2010-05-25 10:45                       ` Pekka Enberg
2010-05-25 10:45                         ` Pekka Enberg
2010-05-25 11:06                         ` Nick Piggin
2010-05-25 11:06                           ` Nick Piggin
2010-05-25 15:13                         ` Linus Torvalds
2010-05-25 15:13                           ` Linus Torvalds
2010-05-25 15:43                           ` Nick Piggin
2010-05-25 15:43                             ` Nick Piggin
2010-05-25 17:02                             ` Pekka Enberg
2010-05-25 17:02                               ` Pekka Enberg
2010-05-25 17:19                               ` Nick Piggin
2010-05-25 17:19                                 ` Nick Piggin
2010-05-25 17:35                                 ` Pekka Enberg
2010-05-25 17:35                                   ` Pekka Enberg
2010-05-25 17:40                                   ` Nick Piggin
2010-05-25 17:40                                     ` Nick Piggin
2010-05-25 10:07               ` David Rientjes
2010-05-25 10:07                 ` David Rientjes
2010-05-25 10:02             ` David Rientjes
2010-05-25 10:02               ` David Rientjes
2010-05-25 10:47               ` Pekka Enberg
2010-05-25 10:47                 ` Pekka Enberg
2010-05-25 19:57                 ` David Rientjes
2010-05-25 19:57                   ` David Rientjes
2010-05-25 14:13       ` Christoph Lameter
2010-05-25 14:34         ` Nick Piggin
2010-05-25 14:43           ` Nick Piggin
2010-05-25 14:48           ` Christoph Lameter
2010-05-25 15:11             ` Nick Piggin
2010-05-25 15:28               ` Christoph Lameter
2010-05-25 15:37                 ` Nick Piggin
2010-05-27 14:24                   ` Christoph Lameter
2010-05-27 14:37                     ` Nick Piggin
2010-05-27 15:52                       ` Christoph Lameter
2010-05-27 16:07                         ` Nick Piggin
2010-05-27 16:57                           ` Christoph Lameter
2010-05-28  8:39                             ` Nick Piggin
2010-05-25 14:40         ` Nick Piggin
2010-05-25 14:48           ` Christoph Lameter
2010-05-25 15:12             ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100521211545.336946412@quilx.com \
    --to=cl@linux.com \
    --cc=linux-mm@kvack.org \
    --cc=penberg@cs.helsinki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.