From: Christoph Lameter <cl@linux.com>
To: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: David Rientjes <rientjes@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: linux-kernel@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Subject: [slubllv7 09/17] slub: Rework allocator fastpaths
Date: Wed, 01 Jun 2011 12:25:52 -0500 [thread overview]
Message-ID: <20110601172616.992426589@linux.com> (raw)
In-Reply-To: 20110601172543.437240675@linux.com
[-- Attachment #1: rework_fastpaths --]
[-- Type: text/plain, Size: 14347 bytes --]
Rework the allocation paths so that updates of the page freelist, frozen state
and number of objects use cmpxchg_double_slab().
Signed-off-by: Christoph Lameter <cl@linux.com>
---
mm/slub.c | 409 ++++++++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 280 insertions(+), 129 deletions(-)
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2011-05-31 10:14:06.172977333 -0500
+++ linux-2.6/mm/slub.c 2011-05-31 10:17:07.132976175 -0500
@@ -992,11 +992,6 @@ static noinline int alloc_debug_processi
if (!check_slab(s, page))
goto bad;
- if (!on_freelist(s, page, object)) {
- object_err(s, page, object, "Object already allocated");
- goto bad;
- }
-
if (!check_valid_pointer(s, page, object)) {
object_err(s, page, object, "Freelist Pointer check fails");
goto bad;
@@ -1060,14 +1055,6 @@ static noinline int free_debug_processin
goto fail;
}
- /* Special debug activities for freeing objects */
- if (!page->frozen && !page->freelist) {
- struct kmem_cache_node *n = get_node(s, page_to_nid(page));
-
- spin_lock(&n->list_lock);
- remove_full(s, page);
- spin_unlock(&n->list_lock);
- }
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_FREE, addr);
trace(s, page, object, 0);
@@ -1178,6 +1165,7 @@ static inline int check_object(struct km
void *object, u8 val) { return 1; }
static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct page *page) {}
+static inline void remove_full(struct kmem_cache *s, struct page *page) {}
static inline unsigned long kmem_cache_flags(unsigned long objsize,
unsigned long flags, const char *name,
void (*ctor)(void *))
@@ -1460,11 +1448,52 @@ static inline void remove_partial(struct
static inline int lock_and_freeze_slab(struct kmem_cache *s,
struct kmem_cache_node *n, struct page *page)
{
- if (slab_trylock(page)) {
- remove_partial(n, page);
+ void *freelist;
+ unsigned long counters;
+ struct page new;
+
+
+ if (!slab_trylock(page))
+ return 0;
+
+ /*
+ * Zap the freelist and set the frozen bit.
+ * The old freelist is the list of objects for the
+ * per cpu allocation list.
+ */
+ do {
+ freelist = page->freelist;
+ counters = page->counters;
+ new.counters = counters;
+ new.inuse = page->objects;
+
+ VM_BUG_ON(new.frozen);
+ new.frozen = 1;
+
+ } while (!cmpxchg_double_slab(s, page,
+ freelist, counters,
+ NULL, new.counters,
+ "lock and freeze"));
+
+ remove_partial(n, page);
+
+ if (freelist) {
+ /* Populate the per cpu freelist */
+ this_cpu_write(s->cpu_slab->freelist, freelist);
+ this_cpu_write(s->cpu_slab->page, page);
+ this_cpu_write(s->cpu_slab->node, page_to_nid(page));
return 1;
+ } else {
+ /*
+ * Slab page came from the wrong list. No object to allocate
+ * from. Put it onto the correct list and continue partial
+ * scan.
+ */
+ printk(KERN_ERR "SLUB: %s : Page without available objects on"
+ " partial list\n", s->name);
+ slab_unlock(page);
+ return 0;
}
- return 0;
}
/*
@@ -1564,59 +1593,6 @@ static struct page *get_partial(struct k
return get_any_partial(s, flags);
}
-/*
- * Move a page back to the lists.
- *
- * Must be called with the slab lock held.
- *
- * On exit the slab lock will have been dropped.
- */
-static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
- __releases(bitlock)
-{
- struct kmem_cache_node *n = get_node(s, page_to_nid(page));
-
- if (page->inuse) {
-
- if (page->freelist) {
- spin_lock(&n->list_lock);
- add_partial(n, page, tail);
- spin_unlock(&n->list_lock);
- stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
- } else {
- stat(s, DEACTIVATE_FULL);
- if (kmem_cache_debug(s) && (s->flags & SLAB_STORE_USER)) {
- spin_lock(&n->list_lock);
- add_full(s, n, page);
- spin_unlock(&n->list_lock);
- }
- }
- slab_unlock(page);
- } else {
- stat(s, DEACTIVATE_EMPTY);
- if (n->nr_partial < s->min_partial) {
- /*
- * Adding an empty slab to the partial slabs in order
- * to avoid page allocator overhead. This slab needs
- * to come after the other slabs with objects in
- * so that the others get filled first. That way the
- * size of the partial list stays small.
- *
- * kmem_cache_shrink can reclaim any empty slabs from
- * the partial list.
- */
- spin_lock(&n->list_lock);
- add_partial(n, page, 1);
- spin_unlock(&n->list_lock);
- slab_unlock(page);
- } else {
- slab_unlock(page);
- stat(s, FREE_SLAB);
- discard_slab(s, page);
- }
- }
-}
-
#ifdef CONFIG_PREEMPT
/*
* Calculate the next globally unique transaction for disambiguiation
@@ -1686,37 +1662,158 @@ void init_kmem_cache_cpus(struct kmem_ca
/*
* Remove the cpu slab
*/
+
+/*
+ * Remove the cpu slab
+ */
static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
- __releases(bitlock)
{
+ enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
struct page *page = c->page;
- int tail = 1;
+ struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+ int lock = 0;
+ enum slab_modes l = M_NONE, m = M_NONE;
+ void *freelist;
+ void *nextfree;
+ int tail = 0;
+ struct page new;
+ struct page old;
- if (page->freelist)
+ if (page->freelist) {
stat(s, DEACTIVATE_REMOTE_FREES);
+ tail = 1;
+ }
+
+ c->tid = next_tid(c->tid);
+ c->page = NULL;
+ freelist = c->freelist;
+ c->freelist = NULL;
+
/*
- * Merge cpu freelist into slab freelist. Typically we get here
- * because both freelists are empty. So this is unlikely
- * to occur.
+ * Stage one: Free all available per cpu objects back
+ * to the page freelist while it is still frozen. Leave the
+ * last one.
+ *
+ * There is no need to take the list->lock because the page
+ * is still frozen.
*/
- while (unlikely(c->freelist)) {
- void **object;
+ while (freelist && (nextfree = get_freepointer(s, freelist))) {
+ void *prior;
+ unsigned long counters;
+
+ do {
+ prior = page->freelist;
+ counters = page->counters;
+ set_freepointer(s, freelist, prior);
+ new.counters = counters;
+ new.inuse--;
+ VM_BUG_ON(!new.frozen);
+
+ } while (!cmpxchg_double_slab(s, page,
+ prior, counters,
+ freelist, new.counters,
+ "drain percpu freelist"));
- tail = 0; /* Hot objects. Put the slab first */
+ freelist = nextfree;
+ }
- /* Retrieve object from cpu_freelist */
- object = c->freelist;
- c->freelist = get_freepointer(s, c->freelist);
+ /*
+ * Stage two: Ensure that the page is unfrozen while the
+ * list presence reflects the actual number of objects
+ * during unfreeze.
+ *
+ * We setup the list membership and then perform a cmpxchg
+ * with the count. If there is a mismatch then the page
+ * is not unfrozen but the page is on the wrong list.
+ *
+ * Then we restart the process which may have to remove
+ * the page from the list that we just put it on again
+ * because the number of objects in the slab may have
+ * changed.
+ */
+redo:
- /* And put onto the regular freelist */
- set_freepointer(s, object, page->freelist);
- page->freelist = object;
- page->inuse--;
+ old.freelist = page->freelist;
+ old.counters = page->counters;
+ VM_BUG_ON(!old.frozen);
+
+ /* Determine target state of the slab */
+ new.counters = old.counters;
+ if (freelist) {
+ new.inuse--;
+ set_freepointer(s, freelist, old.freelist);
+ new.freelist = freelist;
+ } else
+ new.freelist = old.freelist;
+
+ new.frozen = 0;
+
+ if (!new.inuse && n->nr_partial < s->min_partial)
+ m = M_FREE;
+ else if (new.freelist) {
+ m = M_PARTIAL;
+ if (!lock) {
+ lock = 1;
+ /*
+ * Taking the spinlock removes the possiblity
+ * that acquire_slab() will see a slab page that
+ * is frozen
+ */
+ spin_lock(&n->list_lock);
+ }
+ } else {
+ m = M_FULL;
+ if (kmem_cache_debug(s) && !lock) {
+ lock = 1;
+ /*
+ * This also ensures that the scanning of full
+ * slabs from diagnostic functions will not see
+ * any frozen slabs.
+ */
+ spin_lock(&n->list_lock);
+ }
+ }
+
+ if (l != m) {
+
+ if (l == M_PARTIAL)
+
+ remove_partial(n, page);
+
+ else if (l == M_FULL)
+
+ remove_full(s, page);
+
+ if (m == M_PARTIAL) {
+
+ add_partial(n, page, tail);
+ stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
+
+ } else if (m == M_FULL) {
+
+ stat(s, DEACTIVATE_FULL);
+ add_full(s, n, page);
+
+ }
+ }
+
+ l = m;
+ if (!cmpxchg_double_slab(s, page,
+ old.freelist, old.counters,
+ new.freelist, new.counters,
+ "unfreezing slab"))
+ goto redo;
+
+ slab_unlock(page);
+
+ if (lock)
+ spin_unlock(&n->list_lock);
+
+ if (m == M_FREE) {
+ stat(s, DEACTIVATE_EMPTY);
+ discard_slab(s, page);
+ stat(s, FREE_SLAB);
}
- c->page = NULL;
- c->tid = next_tid(c->tid);
- page->frozen = 0;
- unfreeze_slab(s, page, tail);
}
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
@@ -1851,6 +1948,8 @@ static void *__slab_alloc(struct kmem_ca
void **object;
struct page *page;
unsigned long flags;
+ struct page new;
+ unsigned long counters;
local_irq_save(flags);
#ifdef CONFIG_PREEMPT
@@ -1873,25 +1972,33 @@ static void *__slab_alloc(struct kmem_ca
if (unlikely(!node_match(c, node)))
goto another_slab;
- stat(s, ALLOC_REFILL);
+ stat(s, ALLOC_SLOWPATH);
+
+ do {
+ object = page->freelist;
+ counters = page->counters;
+ new.counters = counters;
+ new.inuse = page->objects;
+ VM_BUG_ON(!new.frozen);
+
+ } while (!cmpxchg_double_slab(s, page,
+ object, counters,
+ NULL, new.counters,
+ "__slab_alloc"));
load_freelist:
VM_BUG_ON(!page->frozen);
- object = page->freelist;
if (unlikely(!object))
goto another_slab;
- if (kmem_cache_debug(s))
- goto debug;
- c->freelist = get_freepointer(s, object);
- page->inuse = page->objects;
- page->freelist = NULL;
+ stat(s, ALLOC_REFILL);
slab_unlock(page);
+
+ c->freelist = get_freepointer(s, object);
c->tid = next_tid(c->tid);
local_irq_restore(flags);
- stat(s, ALLOC_SLOWPATH);
return object;
another_slab:
@@ -1901,9 +2008,10 @@ new_slab:
page = get_partial(s, gfpflags, node);
if (page) {
stat(s, ALLOC_FROM_PARTIAL);
- page->frozen = 1;
- c->node = page_to_nid(page);
- c->page = page;
+ object = c->freelist;
+
+ if (kmem_cache_debug(s))
+ goto debug;
goto load_freelist;
}
@@ -1911,12 +2019,19 @@ new_slab:
if (page) {
c = __this_cpu_ptr(s->cpu_slab);
- stat(s, ALLOC_SLAB);
if (c->page)
flush_slab(s, c);
+ /*
+ * No other reference to the page yet so we can
+ * muck around with it freely without cmpxchg
+ */
+ object = page->freelist;
+ page->freelist = NULL;
+ page->inuse = page->objects;
+
+ stat(s, ALLOC_SLAB);
slab_lock(page);
- page->frozen = 1;
c->node = page_to_nid(page);
c->page = page;
goto load_freelist;
@@ -1925,12 +2040,12 @@ new_slab:
slab_out_of_memory(s, gfpflags, node);
local_irq_restore(flags);
return NULL;
+
debug:
- if (!alloc_debug_processing(s, page, object, addr))
- goto another_slab;
+ if (!object || !alloc_debug_processing(s, page, object, addr))
+ goto new_slab;
- page->inuse++;
- page->freelist = get_freepointer(s, object);
+ c->freelist = get_freepointer(s, object);
deactivate_slab(s, c);
c->page = NULL;
c->node = NUMA_NO_NODE;
@@ -2082,6 +2197,11 @@ static void __slab_free(struct kmem_cach
{
void *prior;
void **object = (void *)x;
+ int was_frozen;
+ int inuse;
+ struct page new;
+ unsigned long counters;
+ struct kmem_cache_node *n = NULL;
unsigned long uninitialized_var(flags);
local_irq_save(flags);
@@ -2091,32 +2211,65 @@ static void __slab_free(struct kmem_cach
if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
goto out_unlock;
- prior = page->freelist;
- set_freepointer(s, object, prior);
- page->freelist = object;
- page->inuse--;
-
- if (unlikely(page->frozen)) {
- stat(s, FREE_FROZEN);
- goto out_unlock;
- }
+ do {
+ prior = page->freelist;
+ counters = page->counters;
+ set_freepointer(s, object, prior);
+ new.counters = counters;
+ was_frozen = new.frozen;
+ new.inuse--;
+ if ((!new.inuse || !prior) && !was_frozen && !n) {
+ n = get_node(s, page_to_nid(page));
+ /*
+ * Speculatively acquire the list_lock.
+ * If the cmpxchg does not succeed then we may
+ * drop the list_lock without any processing.
+ *
+ * Otherwise the list_lock will synchronize with
+ * other processors updating the list of slabs.
+ */
+ spin_lock(&n->list_lock);
+ }
+ inuse = new.inuse;
- if (unlikely(!page->inuse))
- goto slab_empty;
+ } while (!cmpxchg_double_slab(s, page,
+ prior, counters,
+ object, new.counters,
+ "__slab_free"));
+
+ if (likely(!n)) {
+ /*
+ * The list lock was not taken therefore no list
+ * activity can be necessary.
+ */
+ if (was_frozen)
+ stat(s, FREE_FROZEN);
+ goto out_unlock;
+ }
/*
- * Objects left in the slab. If it was not on the partial list before
- * then add it.
+ * was_frozen may have been set after we acquired the list_lock in
+ * an earlier loop. So we need to check it here again.
*/
- if (unlikely(!prior)) {
- struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+ if (was_frozen)
+ stat(s, FREE_FROZEN);
+ else {
+ if (unlikely(!inuse && n->nr_partial > s->min_partial))
+ goto slab_empty;
- spin_lock(&n->list_lock);
- add_partial(get_node(s, page_to_nid(page)), page, 1);
- spin_unlock(&n->list_lock);
- stat(s, FREE_ADD_PARTIAL);
+ /*
+ * Objects left in the slab. If it was not on the partial list before
+ * then add it.
+ */
+ if (unlikely(!prior)) {
+ remove_full(s, page);
+ add_partial(n, page, 0);
+ stat(s, FREE_ADD_PARTIAL);
+ }
}
+ spin_unlock(&n->list_lock);
+
out_unlock:
slab_unlock(page);
local_irq_restore(flags);
@@ -2127,13 +2280,11 @@ slab_empty:
/*
* Slab still on the partial list.
*/
- struct kmem_cache_node *n = get_node(s, page_to_nid(page));
-
- spin_lock(&n->list_lock);
remove_partial(n, page);
- spin_unlock(&n->list_lock);
stat(s, FREE_REMOVE_PARTIAL);
}
+
+ spin_unlock(&n->list_lock);
slab_unlock(page);
local_irq_restore(flags);
stat(s, FREE_SLAB);
next prev parent reply other threads:[~2011-06-01 17:28 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-06-01 17:25 [slubllv7 00/17] SLUB: Lockless freelists for objects V7 Christoph Lameter
2011-06-01 17:25 ` [slubllv7 01/17] slub: Push irq disable into allocate_slab() Christoph Lameter
2011-06-01 17:25 ` [slubllv7 02/17] slub: Do not use frozen page flag but a bit in the page counters Christoph Lameter
2011-06-01 17:25 ` [slubllv7 03/17] slub: Move page->frozen handling near where the page->freelist handling occurs Christoph Lameter
2011-06-01 17:25 ` [slubllv7 04/17] x86: Add support for cmpxchg_double Christoph Lameter
2011-06-09 9:53 ` Pekka Enberg
2011-06-10 15:17 ` Christoph Lameter
2011-06-11 9:50 ` Pekka Enberg
2011-06-11 17:02 ` Christoph Lameter
2011-06-14 5:49 ` Pekka Enberg
2011-06-14 8:04 ` Ingo Molnar
2011-06-14 14:04 ` Christoph Lameter
2011-06-14 15:05 ` H. Peter Anvin
2011-06-15 8:55 ` Tejun Heo
2011-06-15 14:26 ` Christoph Lameter
2011-06-15 16:39 ` Tejun Heo
2011-06-15 17:19 ` Christoph Lameter
2011-06-25 23:49 ` [tip:x86/atomic] " tip-bot for Christoph Lameter
2011-06-01 17:25 ` [slubllv7 05/17] mm: Rearrange struct page Christoph Lameter
2011-06-09 9:57 ` Pekka Enberg
2011-06-09 16:45 ` Andrew Morton
2011-06-09 17:03 ` [PATCH] checkpatch: Add a "prefer __aligned" check Joe Perches
2011-06-01 17:25 ` [slubllv7 06/17] slub: Add cmpxchg_double_slab() Christoph Lameter
2011-07-11 19:55 ` Eric Dumazet
2011-07-12 15:59 ` Christoph Lameter
2011-07-12 16:06 ` Eric Dumazet
2011-07-12 16:47 ` Christoph Lameter
2011-07-12 18:40 ` H. Peter Anvin
2011-07-12 18:53 ` Christoph Lameter
2011-07-12 20:40 ` H. Peter Anvin
2011-06-01 17:25 ` [slubllv7 07/17] slub: explicit list_lock taking Christoph Lameter
2011-06-01 17:25 ` [slubllv7 08/17] slub: Pass kmem_cache struct to lock and freeze slab Christoph Lameter
2011-06-01 17:25 ` Christoph Lameter [this message]
2011-06-01 17:25 ` [slubllv7 10/17] slub: Invert locking and avoid slab lock Christoph Lameter
2011-06-01 17:25 ` [slubllv7 11/17] slub: Disable interrupts in free_debug processing Christoph Lameter
2011-06-01 17:25 ` [slubllv7 12/17] slub: Avoid disabling interrupts in free slowpath Christoph Lameter
2011-06-01 17:25 ` [slubllv7 13/17] slub: Get rid of the another_slab label Christoph Lameter
2011-06-01 17:25 ` [slubllv7 14/17] slub: Add statistics for the case that the current slab does not match the node Christoph Lameter
2011-06-01 17:25 ` [slubllv7 15/17] slub: fast release on full slab Christoph Lameter
2011-06-01 17:25 ` [slubllv7 16/17] slub: Not necessary to check for empty slab on load_freelist Christoph Lameter
2011-06-01 17:26 ` [slubllv7 17/17] slub: slabinfo update for cmpxchg handling Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110601172616.992426589@linux.com \
--to=cl@linux.com \
--cc=penberg@cs.helsinki.fi \
--cc=rientjes@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).