linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* cache_reap(): Further reduction in interrupt holdoff
@ 2006-03-03 23:36 Christoph Lameter
  2006-03-04  0:50 ` Make drain_array more universal by adding more parameters Christoph Lameter
  2006-03-06  7:49 ` cache_reap(): Further reduction in interrupt holdoff Pekka Enberg
  0 siblings, 2 replies; 6+ messages in thread
From: Christoph Lameter @ 2006-03-03 23:36 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, kiran, alokk, Pekka Enberg

cache_reap takes the l3->list_lock (disabling interrupts) unconditionally and
then does a few checks and maybe does some cleanup. This patch makes
cache_reap() only take the lock if there is work to do and then the lock
is taken and released for each cleaning action.

The checking of when to do the next reaping is done without any locking
and becomes racy. Should not matter since reaping can also be skipped
if the slab mutex cannot be acquired.

The same is true for the touched processing. If we get this wrong once
in awhile then we will mistakenly clean or not clean the shared cache.
This will impact performance slightly.

Note that the additional drain_array() function introduced here
will fall out in a subsequent patch since array cleaning will now be
very similar from all callers.

Signed-off-by: Christoph Lameter <clameter@sgi.com>

Index: linux-2.6.16-rc5-mm2/mm/slab.c
===================================================================
--- linux-2.6.16-rc5-mm2.orig/mm/slab.c	2006-03-03 07:50:57.000000000 -0800
+++ linux-2.6.16-rc5-mm2/mm/slab.c	2006-03-03 15:32:49.000000000 -0800
@@ -293,13 +293,13 @@ struct kmem_list3 {
 	struct list_head slabs_full;
 	struct list_head slabs_free;
 	unsigned long free_objects;
-	unsigned long next_reap;
-	int free_touched;
 	unsigned int free_limit;
 	unsigned int colour_next;	/* Per-node cache coloring */
 	spinlock_t list_lock;
 	struct array_cache *shared;	/* shared per node */
 	struct array_cache **alien;	/* on other nodes */
+	unsigned long next_reap;	/* updated without locking */
+	int free_touched;		/* updated without locking */
 };
 
 /*
@@ -3572,6 +3572,22 @@ static void drain_array_locked(struct km
 	}
 }
 
+
+/*
+ * Drain an array if it contains any elements taking the l3 lock only if
+ * necessary.
+ */
+void drain_array(struct kmem_cache *searchp, struct kmem_list3 *l3,
+					 struct array_cache *ac)
+{
+	if (ac && ac->avail) {
+		spin_lock_irq(&l3->list_lock);
+		drain_array_locked(searchp, ac, 0,
+				   numa_node_id());
+		spin_unlock_irq(&l3->list_lock);
+	}
+}
+
 /**
  * cache_reap - Reclaim memory from caches.
  * @unused: unused parameter
@@ -3605,33 +3621,48 @@ static void cache_reap(void *unused)
 		searchp = list_entry(walk, struct kmem_cache, next);
 		check_irq_on();
 
+		/*
+		 * We only take the l3 lock if absolutely necessary and we
+		 * have established with reasonable certainty that
+		 * we can do some work if the lock was obtained.
+		 */
 		l3 = searchp->nodelists[numa_node_id()];
+
 		reap_alien(searchp, l3);
-		spin_lock_irq(&l3->list_lock);
 
-		drain_array_locked(searchp, cpu_cache_get(searchp), 0,
-				   numa_node_id());
+		drain_array(searchp, l3, cpu_cache_get(searchp));
 
+		/*
+		 * These are racy checks but it does not matter
+		 * if we skip one check or scan twice.
+		 */
 		if (time_after(l3->next_reap, jiffies))
-			goto next_unlock;
+			goto next;
 
 		l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
 
-		if (l3->shared)
-			drain_array_locked(searchp, l3->shared, 0,
-					   numa_node_id());
+		drain_array(searchp, l3, l3->shared);
 
 		if (l3->free_touched) {
 			l3->free_touched = 0;
-			goto next_unlock;
+			goto next;
 		}
 
 		tofree = (l3->free_limit + 5 * searchp->num - 1) /
 				(5 * searchp->num);
 		do {
+			/*
+			 * Do not lock if there are no free blocks.
+			 */
+			if (list_empty(&l3->slabs_free))
+				break;
+
+			spin_lock_irq(&l3->list_lock);
 			p = l3->slabs_free.next;
-			if (p == &(l3->slabs_free))
+			if (p == &(l3->slabs_free)) {
+				spin_unlock_irq(&l3->list_lock);
 				break;
+			}
 
 			slabp = list_entry(p, struct slab, list);
 			BUG_ON(slabp->inuse);
@@ -3646,10 +3677,8 @@ static void cache_reap(void *unused)
 			l3->free_objects -= searchp->num;
 			spin_unlock_irq(&l3->list_lock);
 			slab_destroy(searchp, slabp);
-			spin_lock_irq(&l3->list_lock);
 		} while (--tofree > 0);
-next_unlock:
-		spin_unlock_irq(&l3->list_lock);
+next:
 		cond_resched();
 	}
 	check_irq_on();

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Make drain_array more universal by adding more parameters
  2006-03-03 23:36 cache_reap(): Further reduction in interrupt holdoff Christoph Lameter
@ 2006-03-04  0:50 ` Christoph Lameter
  2006-03-04  0:51   ` slab: remove drain_array_locked Christoph Lameter
  2006-03-06  7:49 ` cache_reap(): Further reduction in interrupt holdoff Pekka Enberg
  1 sibling, 1 reply; 6+ messages in thread
From: Christoph Lameter @ 2006-03-04  0:50 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, kiran, alokk, Pekka Enberg

And a parameter to drain_array to control the freeing of all objects
and then use drain_array() to replace instances of drain_array_locked
with drain_array. Doing so will avoid taking locks in those locations
if the arrays are empty.

Signed-off-by: Christoph Lameter <clameter@sgi.com>

Index: linux-2.6.16-rc5-mm2/mm/slab.c
===================================================================
--- linux-2.6.16-rc5-mm2.orig/mm/slab.c	2006-03-03 16:14:49.000000000 -0800
+++ linux-2.6.16-rc5-mm2/mm/slab.c	2006-03-03 16:21:48.000000000 -0800
@@ -2128,6 +2128,10 @@ static void check_spinlock_acquired_node
 static void drain_array_locked(struct kmem_cache *cachep,
 			struct array_cache *ac, int force, int node);
 
+static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
+			struct array_cache *ac,
+			int force, int node);
+
 static void do_drain(void *arg)
 {
 	struct kmem_cache *cachep = arg;
@@ -2152,9 +2156,7 @@ static void drain_cpu_caches(struct kmem
 	for_each_online_node(node) {
 		l3 = cachep->nodelists[node];
 		if (l3) {
-			spin_lock_irq(&l3->list_lock);
-			drain_array_locked(cachep, l3->shared, 1, node);
-			spin_unlock_irq(&l3->list_lock);
+			drain_array(cachep, l3, l3->shared, 1, node);
 			if (l3->alien)
 				drain_alien_cache(cachep, l3->alien);
 		}
@@ -3578,12 +3579,11 @@ static void drain_array_locked(struct km
  * necessary.
  */
 void drain_array(struct kmem_cache *searchp, struct kmem_list3 *l3,
-					 struct array_cache *ac)
+			 struct array_cache *ac, int force, int node)
 {
 	if (ac && ac->avail) {
 		spin_lock_irq(&l3->list_lock);
-		drain_array_locked(searchp, ac, 0,
-				   numa_node_id());
+		drain_array_locked(searchp, ac, force, node);
 		spin_unlock_irq(&l3->list_lock);
 	}
 }
@@ -3604,6 +3604,7 @@ static void cache_reap(void *unused)
 {
 	struct list_head *walk;
 	struct kmem_list3 *l3;
+	int node = numa_node_id();
 
 	if (!mutex_trylock(&cache_chain_mutex)) {
 		/* Give up. Setup the next iteration. */
@@ -3626,11 +3627,11 @@ static void cache_reap(void *unused)
 		 * have established with reasonable certainty that
 		 * we can do some work if the lock was obtained.
 		 */
-		l3 = searchp->nodelists[numa_node_id()];
+		l3 = searchp->nodelists[node];
 
 		reap_alien(searchp, l3);
 
-		drain_array(searchp, l3, cpu_cache_get(searchp));
+		drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
 
 		/*
 		 * These are racy checks but it does not matter
@@ -3641,7 +3642,7 @@ static void cache_reap(void *unused)
 
 		l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
 
-		drain_array(searchp, l3, l3->shared);
+		drain_array(searchp, l3, l3->shared, 0, node);
 
 		if (l3->free_touched) {
 			l3->free_touched = 0;

^ permalink raw reply	[flat|nested] 6+ messages in thread

* slab: remove drain_array_locked
  2006-03-04  0:50 ` Make drain_array more universal by adding more parameters Christoph Lameter
@ 2006-03-04  0:51   ` Christoph Lameter
  2006-03-04  2:44     ` Do not disable interrupts for off node freeing Christoph Lameter
  0 siblings, 1 reply; 6+ messages in thread
From: Christoph Lameter @ 2006-03-04  0:51 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, kiran, alokk, Pekka Enberg

Remove drain_array_locked and use that opportunity to limit the
time the l3 lock is taken further.

Signed-off-by: Christoph Lameter <clameter@sgi.com>

Index: linux-2.6.16-rc5-mm2/mm/slab.c
===================================================================
--- linux-2.6.16-rc5-mm2.orig/mm/slab.c	2006-03-03 16:31:40.000000000 -0800
+++ linux-2.6.16-rc5-mm2/mm/slab.c	2006-03-03 16:43:43.000000000 -0800
@@ -2125,9 +2125,6 @@ static void check_spinlock_acquired_node
 #define check_spinlock_acquired_node(x, y) do { } while(0)
 #endif
 
-static void drain_array_locked(struct kmem_cache *cachep,
-			struct array_cache *ac, int force, int node);
-
 static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
 			struct array_cache *ac,
 			int force, int node);
@@ -3555,40 +3552,33 @@ static void enable_cpucache(struct kmem_
 		       cachep->name, -err);
 }
 
-static void drain_array_locked(struct kmem_cache *cachep,
-				struct array_cache *ac, int force, int node)
+/*
+ * Drain an array if it contains any elements taking the l3 lock only if
+ * necessary.
+ */
+void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
+			 struct array_cache *ac, int force, int node)
 {
 	int tofree;
 
-	check_spinlock_acquired_node(cachep, node);
+	if (!ac || !ac->avail)
+		return;
+
 	if (ac->touched && !force) {
 		ac->touched = 0;
 	} else if (ac->avail) {
 		tofree = force ? ac->avail : (ac->limit + 4) / 5;
 		if (tofree > ac->avail)
 			tofree = (ac->avail + 1) / 2;
+		spin_lock_irq(&l3->list_lock);
 		free_block(cachep, ac->entry, tofree, node);
+		spin_unlock_irq(&l3->list_lock);
 		ac->avail -= tofree;
 		memmove(ac->entry, &(ac->entry[tofree]),
 			sizeof(void *) * ac->avail);
 	}
 }
 
-
-/*
- * Drain an array if it contains any elements taking the l3 lock only if
- * necessary.
- */
-void drain_array(struct kmem_cache *searchp, struct kmem_list3 *l3,
-			 struct array_cache *ac, int force, int node)
-{
-	if (ac && ac->avail) {
-		spin_lock_irq(&l3->list_lock);
-		drain_array_locked(searchp, ac, force, node);
-		spin_unlock_irq(&l3->list_lock);
-	}
-}
-
 /**
  * cache_reap - Reclaim memory from caches.
  * @unused: unused parameter

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Do not disable interrupts for off node freeing
  2006-03-04  0:51   ` slab: remove drain_array_locked Christoph Lameter
@ 2006-03-04  2:44     ` Christoph Lameter
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Lameter @ 2006-03-04  2:44 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, kiran, alokk, Pekka Enberg

Add a free_block_lock function and remove the node parameter
from free_block. That one can be determined from slabp.

Optimize interrupt holdoff in do_tune_cpucache() and in drain_cache.
Only disable interrupts if we are freeing to the local node.

[Umm... Maybe we ought to think about that a bit more... Seems to be
okay to me but I am very fallible....]

Signed-off-by: Christoph Lameter <clameter@sgi.com>

Index: linux-2.6.16-rc5-mm2/mm/slab.c
===================================================================
--- linux-2.6.16-rc5-mm2.orig/mm/slab.c	2006-03-03 16:43:43.000000000 -0800
+++ linux-2.6.16-rc5-mm2/mm/slab.c	2006-03-03 18:41:11.000000000 -0800
@@ -701,12 +701,22 @@ static enum {
 
 static DEFINE_PER_CPU(struct work_struct, reap_work);
 
-static void free_block(struct kmem_cache *cachep, void **objpp, int len,
-			int node);
+static void free_block(struct kmem_cache *cachep, void **objpp, int len);
 static void enable_cpucache(struct kmem_cache *cachep);
 static void cache_reap(void *unused);
 static int __node_shrink(struct kmem_cache *cachep, int node);
 
+/*
+ * Interrupts must be off unless we free to a remote node
+ */
+static void free_block_lock(struct kmem_cache *cachep, struct kmem_list3 *l3,
+			void **objp, int len)
+{
+	spin_lock(&l3->list_lock);
+	free_block(cachep, objp, len);
+	spin_unlock(&l3->list_lock);
+}
+
 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
 {
 	return cachep->array[smp_processor_id()];
@@ -948,10 +958,8 @@ static void __drain_alien_cache(struct k
 	struct kmem_list3 *rl3 = cachep->nodelists[node];
 
 	if (ac->avail) {
-		spin_lock(&rl3->list_lock);
-		free_block(cachep, ac->entry, ac->avail, node);
-		ac->avail = 0;
-		spin_unlock(&rl3->list_lock);
+		free_block_lock(cachep, rl3, ac->entry, ac->avail);
+		ac->avail=0;
 	}
 }
 
@@ -1135,7 +1143,7 @@ static int __devinit cpuup_callback(stru
 			/* Free limit for this kmem_list3 */
 			l3->free_limit -= cachep->batchcount;
 			if (nc)
-				free_block(cachep, nc->entry, nc->avail, node);
+				free_block(cachep, nc->entry, nc->avail);
 
 			if (!cpus_empty(mask)) {
 				spin_unlock_irq(&l3->list_lock);
@@ -1145,7 +1153,7 @@ static int __devinit cpuup_callback(stru
 			shared = l3->shared;
 			if (shared) {
 				free_block(cachep, l3->shared->entry,
-					   l3->shared->avail, node);
+					   l3->shared->avail);
 				l3->shared = NULL;
 			}
 
@@ -2137,9 +2145,8 @@ static void do_drain(void *arg)
 
 	check_irq_off();
 	ac = cpu_cache_get(cachep);
-	spin_lock(&cachep->nodelists[node]->list_lock);
-	free_block(cachep, ac->entry, ac->avail, node);
-	spin_unlock(&cachep->nodelists[node]->list_lock);
+	free_block_lock(cachep, cachep->nodelists[node],
+			ac->entry, ac->avail);
 	ac->avail = 0;
 }
 
@@ -2945,8 +2952,8 @@ done:
 /*
  * Caller needs to acquire correct kmem_list's list_lock
  */
-static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
-		       int node)
+static void free_block(struct kmem_cache *cachep, void **objpp,
+			 int nr_objects)
 {
 	int i;
 	struct kmem_list3 *l3;
@@ -2954,8 +2961,10 @@ static void free_block(struct kmem_cache
 	for (i = 0; i < nr_objects; i++) {
 		void *objp = objpp[i];
 		struct slab *slabp;
+		int node;
 
 		slabp = virt_to_slab(objp);
+		node = slabp->nodeid;
 		l3 = cachep->nodelists[node];
 		list_del(&slabp->list);
 		check_spinlock_acquired_node(cachep, node);
@@ -3009,7 +3018,7 @@ static void cache_flusharray(struct kmem
 		}
 	}
 
-	free_block(cachep, ac->entry, batchcount, node);
+	free_block(cachep, ac->entry, batchcount);
 free_done:
 #if STATS
 	{
@@ -3067,13 +3076,10 @@ static inline void __cache_free(struct k
 							    alien, nodeid);
 				alien->entry[alien->avail++] = objp;
 				spin_unlock(&alien->lock);
-			} else {
-				spin_lock(&(cachep->nodelists[nodeid])->
-					  list_lock);
-				free_block(cachep, &objp, 1, nodeid);
-				spin_unlock(&(cachep->nodelists[nodeid])->
-					    list_lock);
-			}
+			} else
+				free_block_lock(cachep,
+					cachep->nodelists[nodeid],
+					&objp, 1);
 			return;
 		}
 	}
@@ -3402,7 +3408,7 @@ static int alloc_kmemlist(struct kmem_ca
 
 			nc = cachep->nodelists[node]->shared;
 			if (nc)
-				free_block(cachep, nc->entry, nc->avail, node);
+				free_block(cachep, nc->entry, nc->avail);
 
 			l3->shared = new;
 			if (!cachep->nodelists[node]->alien) {
@@ -3480,11 +3486,25 @@ static int do_tune_cpucache(struct kmem_
 
 	for_each_online_cpu(i) {
 		struct array_cache *ccold = new.new[i];
+		int node = cpu_to_node(i);
+
 		if (!ccold)
 			continue;
-		spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
-		free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
-		spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
+
+		/*
+		 * Local interrupts must be disabled only for
+		 * this node because the list_lock may be obtained
+		 * in an interrupt context.
+		 */
+		if (node == numa_node_id())
+			local_irq_disable();
+
+		free_block_lock(cachep, cachep->nodelists[node],
+				ccold->entry, ccold->avail);
+
+		if (node == numa_node_id())
+			local_irq_enable();
+
 		kfree(ccold);
 	}
 
@@ -3570,9 +3590,18 @@ void drain_array(struct kmem_cache *cach
 		tofree = force ? ac->avail : (ac->limit + 4) / 5;
 		if (tofree > ac->avail)
 			tofree = (ac->avail + 1) / 2;
-		spin_lock_irq(&l3->list_lock);
-		free_block(cachep, ac->entry, tofree, node);
-		spin_unlock_irq(&l3->list_lock);
+		/*
+		 * Freeing to the local node also requires that
+		 * interrupts be disabled.
+		 */
+		if (node == numa_node_id())
+			local_irq_disable();
+
+		free_block_lock(cachep, l3, ac->entry, tofree);
+
+		if (node == numa_node_id())
+			local_irq_enable();
+
 		ac->avail -= tofree;
 		memmove(ac->entry, &(ac->entry[tofree]),
 			sizeof(void *) * ac->avail);


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: cache_reap(): Further reduction in interrupt holdoff
  2006-03-03 23:36 cache_reap(): Further reduction in interrupt holdoff Christoph Lameter
  2006-03-04  0:50 ` Make drain_array more universal by adding more parameters Christoph Lameter
@ 2006-03-06  7:49 ` Pekka Enberg
  2006-03-06 16:24   ` Christoph Lameter
  1 sibling, 1 reply; 6+ messages in thread
From: Pekka Enberg @ 2006-03-06  7:49 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: akpm, linux-kernel, kiran, alokk

Hi,

(Christoph, please use penberg@cs.helsinki.fi instead.)

On 3/4/06, Christoph Lameter <clameter@engr.sgi.com> wrote:
> cache_reap takes the l3->list_lock (disabling interrupts) unconditionally and
> then does a few checks and maybe does some cleanup. This patch makes
> cache_reap() only take the lock if there is work to do and then the lock
> is taken and released for each cleaning action.
>
> The checking of when to do the next reaping is done without any locking
> and becomes racy. Should not matter since reaping can also be skipped
> if the slab mutex cannot be acquired.

The cache chain mutex is mostly used by /proc/slabinfo and not taken
that often, I think. But yeah, I don't think next reaping is an issue
either.

On 3/4/06, Christoph Lameter <clameter@engr.sgi.com> wrote:
> The same is true for the touched processing. If we get this wrong once
> in awhile then we will mistakenly clean or not clean the shared cache.
> This will impact performance slightly.

Touched processing worries me little because it's used when there's
high allocation pressure. Do you have any numbers where this patch
helps?

                                         Pekka

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: cache_reap(): Further reduction in interrupt holdoff
  2006-03-06  7:49 ` cache_reap(): Further reduction in interrupt holdoff Pekka Enberg
@ 2006-03-06 16:24   ` Christoph Lameter
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Lameter @ 2006-03-06 16:24 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: akpm, linux-kernel, kiran, alokk

On Mon, 6 Mar 2006, Pekka Enberg wrote:

> Touched processing worries me little because it's used when there's
> high allocation pressure. Do you have any numbers where this patch
> helps?

We had frequent holdoffs >30usec in cache_reap() without this patch. Now 
it happens once in awhile.


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2006-03-06 16:25 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-03-03 23:36 cache_reap(): Further reduction in interrupt holdoff Christoph Lameter
2006-03-04  0:50 ` Make drain_array more universal by adding more parameters Christoph Lameter
2006-03-04  0:51   ` slab: remove drain_array_locked Christoph Lameter
2006-03-04  2:44     ` Do not disable interrupts for off node freeing Christoph Lameter
2006-03-06  7:49 ` cache_reap(): Further reduction in interrupt holdoff Pekka Enberg
2006-03-06 16:24   ` Christoph Lameter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).