linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* slab: Remove SLAB_NO_REAP option
@ 2006-02-22 22:34 Christoph Lameter
  2006-02-23  7:46 ` Pekka Enberg
  2006-02-23  7:54 ` Alok Kataria
  0 siblings, 2 replies; 16+ messages in thread
From: Christoph Lameter @ 2006-02-22 22:34 UTC (permalink / raw)
  To: akpm; +Cc: alokk, manfred, Pekka Enberg, linux-kernel

SLAB_NO_REAP is documented as an option that will cause this slab
not to be reaped under memory pressure. However, that is not what
happens. The only thing that SLAB_NO_REAP controls at the moment
is the reclaim of the unused slab elements that were allocated in
batch in cache_reap(). Cache_reap() is run every few seconds
independently of memory pressure.

Could we remove the whole thing? Its only used by three slabs 
anyways and I cannot find a reason for having this option.

There is an additional problem with SLAB_NO_REAP. If set then
the recovery of objects from alien caches is switched off.
Objects not freed on the same node where they were initially
allocated will only be reused if a certain amount of objects 
accumulates from one alien node (not very likely) or if the cache is 
explicitly shrunk. (Strangely __cache_shrink does not check for 
SLAB_NO_REAP)

Getting rid of SLAB_NO_REAP fixes the problems with alien cache
freeing.

Signed-off-by: Christoph Lameter <clameter@sgi.com>

Index: linux-2.6.16-rc4/mm/slab.c
===================================================================
--- linux-2.6.16-rc4.orig/mm/slab.c	2006-02-17 14:23:45.000000000 -0800
+++ linux-2.6.16-rc4/mm/slab.c	2006-02-22 14:06:23.000000000 -0800
@@ -170,12 +170,12 @@
 #if DEBUG
 # define CREATE_MASK	(SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
 			 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
-			 SLAB_NO_REAP | SLAB_CACHE_DMA | \
+			 SLAB_CACHE_DMA | \
 			 SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
 			 SLAB_DESTROY_BY_RCU)
 #else
-# define CREATE_MASK	(SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
+# define CREATE_MASK	(SLAB_HWCACHE_ALIGN | \
 			 SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
 			 SLAB_DESTROY_BY_RCU)
@@ -642,7 +642,7 @@ static struct kmem_cache cache_cache = {
 	.limit = BOOT_CPUCACHE_ENTRIES,
 	.shared = 1,
 	.buffer_size = sizeof(struct kmem_cache),
-	.flags = SLAB_NO_REAP,
+	.flags = 0,
 	.spinlock = SPIN_LOCK_UNLOCKED,
 	.name = "kmem_cache",
 #if DEBUG
@@ -1689,9 +1689,6 @@ static inline size_t calculate_slab_orde
  * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
  * for buffer overruns.
  *
- * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
- * memory pressure.
- *
  * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
  * cacheline.  This can be beneficial if you're counting cycles as closely
  * as davem.
@@ -3487,10 +3484,6 @@ static void cache_reap(void *unused)
 		struct slab *slabp;
 
 		searchp = list_entry(walk, struct kmem_cache, next);
-
-		if (searchp->flags & SLAB_NO_REAP)
-			goto next;
-
 		check_irq_on();
 
 		l3 = searchp->nodelists[numa_node_id()];
Index: linux-2.6.16-rc4/include/linux/slab.h
===================================================================
--- linux-2.6.16-rc4.orig/include/linux/slab.h	2006-02-17 14:23:45.000000000 -0800
+++ linux-2.6.16-rc4/include/linux/slab.h	2006-02-22 14:05:25.000000000 -0800
@@ -38,7 +38,6 @@ typedef struct kmem_cache kmem_cache_t;
 #define	SLAB_DEBUG_INITIAL	0x00000200UL	/* Call constructor (as verifier) */
 #define	SLAB_RED_ZONE		0x00000400UL	/* Red zone objs in a cache */
 #define	SLAB_POISON		0x00000800UL	/* Poison objects */
-#define	SLAB_NO_REAP		0x00001000UL	/* never reap from the cache */
 #define	SLAB_HWCACHE_ALIGN	0x00002000UL	/* align objs on a h/w cache lines */
 #define SLAB_CACHE_DMA		0x00004000UL	/* use GFP_DMA memory */
 #define SLAB_MUST_HWCACHE_ALIGN	0x00008000UL	/* force alignment */
Index: linux-2.6.16-rc4/drivers/scsi/iscsi_tcp.c
===================================================================
--- linux-2.6.16-rc4.orig/drivers/scsi/iscsi_tcp.c	2006-02-17 14:23:45.000000000 -0800
+++ linux-2.6.16-rc4/drivers/scsi/iscsi_tcp.c	2006-02-22 14:08:46.000000000 -0800
@@ -3639,7 +3639,7 @@ iscsi_tcp_init(void)
 
 	taskcache = kmem_cache_create("iscsi_taskcache",
 			sizeof(struct iscsi_data_task), 0,
-			SLAB_HWCACHE_ALIGN | SLAB_NO_REAP, NULL, NULL);
+			SLAB_HWCACHE_ALIGN, NULL, NULL);
 	if (!taskcache)
 		return -ENOMEM;
 
Index: linux-2.6.16-rc4/fs/ocfs2/super.c
===================================================================
--- linux-2.6.16-rc4.orig/fs/ocfs2/super.c	2006-02-17 14:23:45.000000000 -0800
+++ linux-2.6.16-rc4/fs/ocfs2/super.c	2006-02-22 14:08:17.000000000 -0800
@@ -959,7 +959,7 @@ static int ocfs2_initialize_mem_caches(v
 	ocfs2_lock_cache = kmem_cache_create("ocfs2_lock",
 					     sizeof(struct ocfs2_journal_lock),
 					     0,
-					     SLAB_NO_REAP|SLAB_HWCACHE_ALIGN,
+					     SLAB_HWCACHE_ALIGN,
 					     NULL, NULL);
 	if (!ocfs2_lock_cache)
 		return -ENOMEM;

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
  2006-02-22 22:34 slab: Remove SLAB_NO_REAP option Christoph Lameter
@ 2006-02-23  7:46 ` Pekka Enberg
  2006-02-23  7:54 ` Alok Kataria
  1 sibling, 0 replies; 16+ messages in thread
From: Pekka Enberg @ 2006-02-23  7:46 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: akpm, alokk, manfred, linux-kernel

Hi,

On 2/23/06, Christoph Lameter <clameter@engr.sgi.com> wrote:
> SLAB_NO_REAP is documented as an option that will cause this slab
> not to be reaped under memory pressure. However, that is not what
> happens. The only thing that SLAB_NO_REAP controls at the moment
> is the reclaim of the unused slab elements that were allocated in
> batch in cache_reap(). Cache_reap() is run every few seconds
> independently of memory pressure.
>
> Could we remove the whole thing? Its only used by three slabs
> anyways and I cannot find a reason for having this option.

Looks good, and I have been meaning to do this myself as well.

Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>

P.S. Please use penberg@cs.helsinki.fi, not the gmail one. Thanks.

                               Pekka

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
  2006-02-22 22:34 slab: Remove SLAB_NO_REAP option Christoph Lameter
  2006-02-23  7:46 ` Pekka Enberg
@ 2006-02-23  7:54 ` Alok Kataria
  2006-02-23  8:48   ` Pekka Enberg
  1 sibling, 1 reply; 16+ messages in thread
From: Alok Kataria @ 2006-02-23  7:54 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: akpm, manfred, Pekka Enberg, linux-kernel

On Thu, 2006-02-23 at 04:04, Christoph Lameter wrote:
> SLAB_NO_REAP is documented as an option that will cause this slab
> not to be reaped under memory pressure. However, that is not what
> happens. The only thing that SLAB_NO_REAP controls at the moment
> is the reclaim of the unused slab elements that were allocated in
> batch in cache_reap(). Cache_reap() is run every few seconds
> independently of memory pressure.
> 
> Could we remove the whole thing? Its only used by three slabs 
> anyways and I cannot find a reason for having this option.
> 
There can be some caches which are not used quite often, kmem_cache for
instance. Now from performance perspective having SLAB_NO_REAP for such
caches is good. 
But again i am not sure if there are any other caches which show such a
behavior. What i don't understand is why were ocfs2_lock, and the
iscsi_taskcache made to use this flag.

If there was some specific reason then it would be better we don't
change it. 

> There is an additional problem with SLAB_NO_REAP. If set then
> the recovery of objects from alien caches is switched off.
> Objects not freed on the same node where they were initially
> allocated will only be reused if a certain amount of objects 
> accumulates from one alien node (not very likely) or if the cache is 
> explicitly shrunk. (Strangely __cache_shrink does not check for 
> SLAB_NO_REAP)
> 
> Getting rid of SLAB_NO_REAP fixes the problems with alien cache
> freeing.

As far as this problem is concerned we could move the draining code
above the check for SLAB_NO_REAP flag. That ways the alien caches will
be freed irrespective of the flag.

Thanks & Regards,
Alok

> 
> Signed-off-by: Christoph Lameter <clameter@sgi.com>
> 
> Index: linux-2.6.16-rc4/mm/slab.c
> ===================================================================
> --- linux-2.6.16-rc4.orig/mm/slab.c	2006-02-17 14:23:45.000000000 -0800
> +++ linux-2.6.16-rc4/mm/slab.c	2006-02-22 14:06:23.000000000 -0800
> @@ -170,12 +170,12 @@
>  #if DEBUG
>  # define CREATE_MASK	(SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
>  			 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
> -			 SLAB_NO_REAP | SLAB_CACHE_DMA | \
> +			 SLAB_CACHE_DMA | \
>  			 SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
>  			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
>  			 SLAB_DESTROY_BY_RCU)
>  #else
> -# define CREATE_MASK	(SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
> +# define CREATE_MASK	(SLAB_HWCACHE_ALIGN | \
>  			 SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
>  			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
>  			 SLAB_DESTROY_BY_RCU)
> @@ -642,7 +642,7 @@ static struct kmem_cache cache_cache = {
>  	.limit = BOOT_CPUCACHE_ENTRIES,
>  	.shared = 1,
>  	.buffer_size = sizeof(struct kmem_cache),
> -	.flags = SLAB_NO_REAP,
> +	.flags = 0,
>  	.spinlock = SPIN_LOCK_UNLOCKED,
>  	.name = "kmem_cache",
>  #if DEBUG
> @@ -1689,9 +1689,6 @@ static inline size_t calculate_slab_orde
>   * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
>   * for buffer overruns.
>   *
> - * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
> - * memory pressure.
> - *
>   * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
>   * cacheline.  This can be beneficial if you're counting cycles as closely
>   * as davem.
> @@ -3487,10 +3484,6 @@ static void cache_reap(void *unused)
>  		struct slab *slabp;
>  
>  		searchp = list_entry(walk, struct kmem_cache, next);
> -
> -		if (searchp->flags & SLAB_NO_REAP)
> -			goto next;
> -
>  		check_irq_on();
>  
>  		l3 = searchp->nodelists[numa_node_id()];
> Index: linux-2.6.16-rc4/include/linux/slab.h
> ===================================================================
> --- linux-2.6.16-rc4.orig/include/linux/slab.h	2006-02-17 14:23:45.000000000 -0800
> +++ linux-2.6.16-rc4/include/linux/slab.h	2006-02-22 14:05:25.000000000 -0800
> @@ -38,7 +38,6 @@ typedef struct kmem_cache kmem_cache_t;
>  #define	SLAB_DEBUG_INITIAL	0x00000200UL	/* Call constructor (as verifier) */
>  #define	SLAB_RED_ZONE		0x00000400UL	/* Red zone objs in a cache */
>  #define	SLAB_POISON		0x00000800UL	/* Poison objects */
> -#define	SLAB_NO_REAP		0x00001000UL	/* never reap from the cache */
>  #define	SLAB_HWCACHE_ALIGN	0x00002000UL	/* align objs on a h/w cache lines */
>  #define SLAB_CACHE_DMA		0x00004000UL	/* use GFP_DMA memory */
>  #define SLAB_MUST_HWCACHE_ALIGN	0x00008000UL	/* force alignment */
> Index: linux-2.6.16-rc4/drivers/scsi/iscsi_tcp.c
> ===================================================================
> --- linux-2.6.16-rc4.orig/drivers/scsi/iscsi_tcp.c	2006-02-17 14:23:45.000000000 -0800
> +++ linux-2.6.16-rc4/drivers/scsi/iscsi_tcp.c	2006-02-22 14:08:46.000000000 -0800
> @@ -3639,7 +3639,7 @@ iscsi_tcp_init(void)
>  
>  	taskcache = kmem_cache_create("iscsi_taskcache",
>  			sizeof(struct iscsi_data_task), 0,
> -			SLAB_HWCACHE_ALIGN | SLAB_NO_REAP, NULL, NULL);
> +			SLAB_HWCACHE_ALIGN, NULL, NULL);
>  	if (!taskcache)
>  		return -ENOMEM;
>  
> Index: linux-2.6.16-rc4/fs/ocfs2/super.c
> ===================================================================
> --- linux-2.6.16-rc4.orig/fs/ocfs2/super.c	2006-02-17 14:23:45.000000000 -0800
> +++ linux-2.6.16-rc4/fs/ocfs2/super.c	2006-02-22 14:08:17.000000000 -0800
> @@ -959,7 +959,7 @@ static int ocfs2_initialize_mem_caches(v
>  	ocfs2_lock_cache = kmem_cache_create("ocfs2_lock",
>  					     sizeof(struct ocfs2_journal_lock),
>  					     0,
> -					     SLAB_NO_REAP|SLAB_HWCACHE_ALIGN,
> +					     SLAB_HWCACHE_ALIGN,
>  					     NULL, NULL);
>  	if (!ocfs2_lock_cache)
>  		return -ENOMEM;


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
  2006-02-23  7:54 ` Alok Kataria
@ 2006-02-23  8:48   ` Pekka Enberg
  0 siblings, 0 replies; 16+ messages in thread
From: Pekka Enberg @ 2006-02-23  8:48 UTC (permalink / raw)
  To: Alok Kataria; +Cc: Christoph Lameter, akpm, manfred, linux-kernel

On 2/23/06, Alok Kataria <alok.kataria@calsoftinc.com> wrote:
> There can be some caches which are not used quite often, kmem_cache for
> instance. Now from performance perspective having SLAB_NO_REAP for such
> caches is good.

Yeah, kmem_cache sounds like a realistic user, but I am wondering if
it makes any sense for anyone else to use it? If you're not using a
cache that often, perhaps we're better off using kmalloc() instead?

                                   Pekka

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
  2006-02-24 16:19             ` Christoph Lameter
@ 2006-02-25 22:15               ` Pekka Enberg
  0 siblings, 0 replies; 16+ messages in thread
From: Pekka Enberg @ 2006-02-25 22:15 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: Andrew Morton, Alok Kataria, manfred, linux-kernel

On Fri, 24 Feb 2006, Pekka Enberg wrote:
> > I don't think its worth it. It doesn't make much sense to create a
> > separate object cache if you're not using it, we're better off
> > converting those to kmalloc(). cache_cache is there to make
> > bootstrapping easier, it is very unlikely that you ever have more than
> > one page allocated for that cache which is why scanning the freelist
> > _at all_ is silly. I think SLAB_NO_REAP should go away but we also
> > must ensure we don't introduce a performance regression while doing
> > that.

On Fri, 2006-02-24 at 08:19 -0800, Christoph Lameter wrote:
> Got some way to get rid of cache_cache? Or remove it after boot?

Well, yeah. We can bootsrap a generic cache that can hold sizeof(struct
kmem_cache) first and use that.

				Pekka


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
  2006-02-24  7:36           ` Pekka Enberg
@ 2006-02-24 16:19             ` Christoph Lameter
  2006-02-25 22:15               ` Pekka Enberg
  0 siblings, 1 reply; 16+ messages in thread
From: Christoph Lameter @ 2006-02-24 16:19 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: Andrew Morton, Alok Kataria, manfred, linux-kernel

On Fri, 24 Feb 2006, Pekka Enberg wrote:

> > One cache_reap() may scan the free list but once its free the code is
> > skipped.
> 
> Which is _totally_ redundant for cache_cache.

Correct. So you are going to add a check to the loop that is useless for 
all other caches? The many have to sacrfice for the one?

> I don't think its worth it. It doesn't make much sense to create a
> separate object cache if you're not using it, we're better off
> converting those to kmalloc(). cache_cache is there to make
> bootstrapping easier, it is very unlikely that you ever have more than
> one page allocated for that cache which is why scanning the freelist
> _at all_ is silly. I think SLAB_NO_REAP should go away but we also
> must ensure we don't introduce a performance regression while doing
> that.

Got some way to get rid of cache_cache? Or remove it after boot?



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
  2006-02-23 18:47         ` Christoph Lameter
  2006-02-23 19:17           ` Ravikiran G Thirumalai
@ 2006-02-24  7:36           ` Pekka Enberg
  2006-02-24 16:19             ` Christoph Lameter
  1 sibling, 1 reply; 16+ messages in thread
From: Pekka Enberg @ 2006-02-24  7:36 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: Andrew Morton, Alok Kataria, manfred, linux-kernel

On 2/23/06, Christoph Lameter <clameter@engr.sgi.com> wrote:
> There is a loop but its broken by
>
>                         p = l3->slabs_free.next;
>                         if (p == &(l3->slabs_free))
>                                 break;
>
> One cache_reap() may scan the free list but once its free the code is
> skipped.

Which is _totally_ redundant for cache_cache.

On 2/23/06, Christoph Lameter <clameter@engr.sgi.com> wrote:
> There are potentially large amounts of other caches around that are also
> basically static and which also would need any bypass that we may
> implement.

I don't think its worth it. It doesn't make much sense to create a
separate object cache if you're not using it, we're better off
converting those to kmalloc(). cache_cache is there to make
bootstrapping easier, it is very unlikely that you ever have more than
one page allocated for that cache which is why scanning the freelist
_at all_ is silly. I think SLAB_NO_REAP should go away but we also
must ensure we don't introduce a performance regression while doing
that.

                                 Pekka

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
  2006-02-23 19:17           ` Ravikiran G Thirumalai
@ 2006-02-23 19:24             ` Christoph Lameter
  0 siblings, 0 replies; 16+ messages in thread
From: Christoph Lameter @ 2006-02-23 19:24 UTC (permalink / raw)
  To: Ravikiran G Thirumalai
  Cc: Christoph Lameter, Pekka Enberg, Andrew Morton, Alok Kataria,
	manfred, linux-kernel

On Thu, 23 Feb 2006, Ravikiran G Thirumalai wrote:

> OR, introduce smartness in cache_reap to break the loop earlier if we can
> somehow dynamically recognise the cache is static. 

Right. One could inspect the sizes of the caches without taking the locks 
before committing to a real inspection under lock.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
  2006-02-23 18:47         ` Christoph Lameter
@ 2006-02-23 19:17           ` Ravikiran G Thirumalai
  2006-02-23 19:24             ` Christoph Lameter
  2006-02-24  7:36           ` Pekka Enberg
  1 sibling, 1 reply; 16+ messages in thread
From: Ravikiran G Thirumalai @ 2006-02-23 19:17 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Pekka Enberg, Andrew Morton, Alok Kataria, manfred, linux-kernel

On Thu, Feb 23, 2006 at 10:47:53AM -0800, Christoph Lameter wrote:
> On Thu, 23 Feb 2006, Pekka Enberg wrote:
> 
> > Look at the loop, it is redundant work (like acquiring/releasing a
> > spinlock). The cache_cache is practically static, which is why it makes
> > sense to leave it alone.
> 
> There is a loop but its broken by
> 
> 			p = l3->slabs_free.next;
>                         if (p == &(l3->slabs_free))
>                                 break;
> 
> One cache_reap() may scan the free list but once its free the code is 
> skipped.

I think Pekka is referring to draining of alien cache, array caches and the
shared caches before the loop is is broken by above.

> 
> There are potentially large amounts of other caches around that are also 
> basically static and which also would need any bypass that we may 
> implement.

I agree. That's where SLAB_NO_REAP can be used? or rather, change the
name/documentation to mean something better.

OR, introduce smartness in cache_reap to break the loop earlier if we can
somehow dynamically recognise the cache is static. 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
  2006-02-23 18:36       ` Pekka Enberg
@ 2006-02-23 18:47         ` Christoph Lameter
  2006-02-23 19:17           ` Ravikiran G Thirumalai
  2006-02-24  7:36           ` Pekka Enberg
  0 siblings, 2 replies; 16+ messages in thread
From: Christoph Lameter @ 2006-02-23 18:47 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Christoph Lameter, Andrew Morton, Alok Kataria, manfred, linux-kernel

On Thu, 23 Feb 2006, Pekka Enberg wrote:

> Look at the loop, it is redundant work (like acquiring/releasing a
> spinlock). The cache_cache is practically static, which is why it makes
> sense to leave it alone.

There is a loop but its broken by

			p = l3->slabs_free.next;
                        if (p == &(l3->slabs_free))
                                break;

One cache_reap() may scan the free list but once its free the code is 
skipped.

There are potentially large amounts of other caches around that are also 
basically static and which also would need any bypass that we may 
implement.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
  2006-02-23 17:20     ` Christoph Lameter
@ 2006-02-23 18:36       ` Pekka Enberg
  2006-02-23 18:47         ` Christoph Lameter
  0 siblings, 1 reply; 16+ messages in thread
From: Pekka Enberg @ 2006-02-23 18:36 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: Andrew Morton, Alok Kataria, manfred, linux-kernel

On Thu, 2006-02-23 at 09:20 -0800, Christoph Lameter wrote:
> On Thu, 23 Feb 2006, Pekka J Enberg wrote:
> 
> > We need _something_ to avoid excessive scanning of cache_cache. It takes a 
> > hell of a lot insmod/rmmod to actually free a full page. Maybe something 
> > like this (totally untested) patch?
> 
> What excessive scanning of cache_cache? If the per cpu cache of 
> cache_cache has been drained then there will be no scanning just an 
> inspection if there are zero elements.

Look at the loop, it is redundant work (like acquiring/releasing a
spinlock). The cache_cache is practically static, which is why it makes
sense to leave it alone.

				Pekka


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
  2006-02-23 11:34   ` Pekka J Enberg
@ 2006-02-23 17:20     ` Christoph Lameter
  2006-02-23 18:36       ` Pekka Enberg
  0 siblings, 1 reply; 16+ messages in thread
From: Christoph Lameter @ 2006-02-23 17:20 UTC (permalink / raw)
  To: Pekka J Enberg
  Cc: Andrew Morton, Alok Kataria, clameter, manfred, linux-kernel

On Thu, 23 Feb 2006, Pekka J Enberg wrote:

> We need _something_ to avoid excessive scanning of cache_cache. It takes a 
> hell of a lot insmod/rmmod to actually free a full page. Maybe something 
> like this (totally untested) patch?

What excessive scanning of cache_cache? If the per cpu cache of 
cache_cache has been drained then there will be no scanning just an 
inspection if there are zero elements.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
  2006-02-23 10:09 ` Andrew Morton
  2006-02-23 11:34   ` Pekka J Enberg
@ 2006-02-23 17:17   ` Christoph Lameter
  1 sibling, 0 replies; 16+ messages in thread
From: Christoph Lameter @ 2006-02-23 17:17 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Alok Kataria, penberg, clameter, manfred, linux-kernel

On Thu, 23 Feb 2006, Andrew Morton wrote:

> I'm very much hoping that it is not needed.  Would prefer to just toss the
> whole thing away.

Right.

> What's it supposed to do anyway?  Keep wholly-unused pages hanging about in
> each slab cache?  If so, it may well be a net loss - it'd be better to push
> those pages back into the page allocator so they can get reused for
> something else while they're possibly still in-cache.  Similarly, it's
> better to fall back to the page allocator for a new slab page because
> that's more likely to give us a cache-hot one.

There needs to be some convincing rationale for SLAB_NO_REAP plus the 
documentation should be updated to explain correctly what it does if we 
decide to keep SLAB_NO_REAP.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
  2006-02-23 10:09 ` Andrew Morton
@ 2006-02-23 11:34   ` Pekka J Enberg
  2006-02-23 17:20     ` Christoph Lameter
  2006-02-23 17:17   ` Christoph Lameter
  1 sibling, 1 reply; 16+ messages in thread
From: Pekka J Enberg @ 2006-02-23 11:34 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Alok Kataria, clameter, manfred, linux-kernel

On Thu, 23 Feb 2006, Andrew Morton wrote:
> I'm very much hoping that it is not needed.  Would prefer to just toss the
> whole thing away.
> 
> What's it supposed to do anyway?  Keep wholly-unused pages hanging about in
> each slab cache?  If so, it may well be a net loss - it'd be better to push
> those pages back into the page allocator so they can get reused for
> something else while they're possibly still in-cache.  Similarly, it's
> better to fall back to the page allocator for a new slab page because
> that's more likely to give us a cache-hot one.

We need _something_ to avoid excessive scanning of cache_cache. It takes a 
hell of a lot insmod/rmmod to actually free a full page. Maybe something 
like this (totally untested) patch?

				Pekka

Index: 2.6-git/drivers/scsi/iscsi_tcp.c
===================================================================
--- 2.6-git.orig/drivers/scsi/iscsi_tcp.c
+++ 2.6-git/drivers/scsi/iscsi_tcp.c
@@ -3639,7 +3639,7 @@ iscsi_tcp_init(void)
 
 	taskcache = kmem_cache_create("iscsi_taskcache",
 			sizeof(struct iscsi_data_task), 0,
-			SLAB_HWCACHE_ALIGN | SLAB_NO_REAP, NULL, NULL);
+			SLAB_HWCACHE_ALIGN, NULL, NULL);
 	if (!taskcache)
 		return -ENOMEM;
 
Index: 2.6-git/fs/ocfs2/super.c
===================================================================
--- 2.6-git.orig/fs/ocfs2/super.c
+++ 2.6-git/fs/ocfs2/super.c
@@ -959,7 +959,7 @@ static int ocfs2_initialize_mem_caches(v
 	ocfs2_lock_cache = kmem_cache_create("ocfs2_lock",
 					     sizeof(struct ocfs2_journal_lock),
 					     0,
-					     SLAB_NO_REAP|SLAB_HWCACHE_ALIGN,
+					     SLAB_HWCACHE_ALIGN,
 					     NULL, NULL);
 	if (!ocfs2_lock_cache)
 		return -ENOMEM;
Index: 2.6-git/include/linux/slab.h
===================================================================
--- 2.6-git.orig/include/linux/slab.h
+++ 2.6-git/include/linux/slab.h
@@ -38,7 +38,6 @@ typedef struct kmem_cache kmem_cache_t;
 #define	SLAB_DEBUG_INITIAL	0x00000200UL	/* Call constructor (as verifier) */
 #define	SLAB_RED_ZONE		0x00000400UL	/* Red zone objs in a cache */
 #define	SLAB_POISON		0x00000800UL	/* Poison objects */
-#define	SLAB_NO_REAP		0x00001000UL	/* never reap from the cache */
 #define	SLAB_HWCACHE_ALIGN	0x00002000UL	/* align objs on a h/w cache lines */
 #define SLAB_CACHE_DMA		0x00004000UL	/* use GFP_DMA memory */
 #define SLAB_MUST_HWCACHE_ALIGN	0x00008000UL	/* force alignment */
Index: 2.6-git/mm/slab.c
===================================================================
--- 2.6-git.orig/mm/slab.c
+++ 2.6-git/mm/slab.c
@@ -170,12 +170,12 @@
 #if DEBUG
 # define CREATE_MASK	(SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
 			 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
-			 SLAB_NO_REAP | SLAB_CACHE_DMA | \
+			 SLAB_CACHE_DMA | \
 			 SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
 			 SLAB_DESTROY_BY_RCU)
 #else
-# define CREATE_MASK	(SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
+# define CREATE_MASK	(SLAB_HWCACHE_ALIGN | \
 			 SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
 			 SLAB_DESTROY_BY_RCU)
@@ -642,7 +642,6 @@ static struct kmem_cache cache_cache = {
 	.limit = BOOT_CPUCACHE_ENTRIES,
 	.shared = 1,
 	.buffer_size = sizeof(struct kmem_cache),
-	.flags = SLAB_NO_REAP,
 	.spinlock = SPIN_LOCK_UNLOCKED,
 	.name = "kmem_cache",
 #if DEBUG
@@ -1689,9 +1688,6 @@ static inline size_t calculate_slab_orde
  * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
  * for buffer overruns.
  *
- * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
- * memory pressure.
- *
  * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
  * cacheline.  This can be beneficial if you're counting cycles as closely
  * as davem.
@@ -3487,8 +3483,7 @@ static void cache_reap(void *unused)
 		struct slab *slabp;
 
 		searchp = list_entry(walk, struct kmem_cache, next);
-
-		if (searchp->flags & SLAB_NO_REAP)
+		if (searchp == &cache_cache)
 			goto next;
 
 		check_irq_on();

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
  2006-02-23  9:35 Alok Kataria
@ 2006-02-23 10:09 ` Andrew Morton
  2006-02-23 11:34   ` Pekka J Enberg
  2006-02-23 17:17   ` Christoph Lameter
  0 siblings, 2 replies; 16+ messages in thread
From: Andrew Morton @ 2006-02-23 10:09 UTC (permalink / raw)
  To: Alok Kataria; +Cc: penberg, clameter, manfred, linux-kernel

Alok Kataria <alok.kataria@calsoftinc.com> wrote:
>
>  On Thu, 2006-02-23 at 14:18, Pekka Enberg wrote:
>  On 2/23/06, Alok Kataria <alok.kataria@calsoftinc.com> wrote:
>  > > There can be some caches which are not used quite often, kmem_cache 
>  > > for instance. Now from performance perspective having SLAB_NO_REAP for 
>  > > such caches is good.
>  >
>  > Yeah, kmem_cache sounds like a realistic user, but I am wondering if
>  > it makes any sense for anyone else to use it?
>  >
>  Right, thats why my question still is why do these iscsi & ocfs  cache 
>  have this flag set.

I'm sure there's no good reason.

>  If we are sure that the flag is still required, we can use the patch 
>  below.

I'm very much hoping that it is not needed.  Would prefer to just toss the
whole thing away.

What's it supposed to do anyway?  Keep wholly-unused pages hanging about in
each slab cache?  If so, it may well be a net loss - it'd be better to push
those pages back into the page allocator so they can get reused for
something else while they're possibly still in-cache.  Similarly, it's
better to fall back to the page allocator for a new slab page because
that's more likely to give us a cache-hot one.

I'm convinced, anyway ;)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: slab: Remove SLAB_NO_REAP option
@ 2006-02-23  9:35 Alok Kataria
  2006-02-23 10:09 ` Andrew Morton
  0 siblings, 1 reply; 16+ messages in thread
From: Alok Kataria @ 2006-02-23  9:35 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: Christoph Lameter, akpm, manfred, linux-kernel

On Thu, 2006-02-23 at 14:18, Pekka Enberg wrote:
On 2/23/06, Alok Kataria <alok.kataria@calsoftinc.com> wrote:
> > There can be some caches which are not used quite often, kmem_cache 
> > for instance. Now from performance perspective having SLAB_NO_REAP for 
> > such caches is good.
>
> Yeah, kmem_cache sounds like a realistic user, but I am wondering if
> it makes any sense for anyone else to use it?
>
Right, thats why my question still is why do these iscsi & ocfs  cache 
have this flag set.

If we are sure that the flag is still required, we can use the patch 
below.

> If you're not using a
> cache that often, perhaps we're better off using kmalloc() instead?
>

Right.

Thanks & Regards,
Alok

--
As pointed by Christoph, there is a problem with SLAB_NO_REAP flag. If set 
then the recovery of objects from alien caches is switched off.
Objects not freed on the same node where they were initially
allocated will only be reused if a certain amount of objects
accumulates from one alien node (not very likely) or if the cache is
explicitly shrunk.
This patch facilitates draining of the alien caches irrespective of the value
of SLAB_NO_REAP flag.

Signed-off-by: Alok N Kataria <alok.kataria@calsoftinc.com>

Index: linux-2.6.16-rc4-git5/mm/slab.c
===================================================================
--- linux-2.6.16-rc4-git5.orig/mm/slab.c	2006-02-23 01:09:49.000000000 -0800
+++ linux-2.6.16-rc4-git5/mm/slab.c	2006-02-23 01:12:54.000000000 -0800
@@ -3488,14 +3488,15 @@ static void cache_reap(void *unused)

  		searchp = list_entry(walk, struct kmem_cache, next);

-		if (searchp->flags & SLAB_NO_REAP)
-			goto next;
-
  		check_irq_on();

  		l3 = searchp->nodelists[numa_node_id()];
  		if (l3->alien)
  			drain_alien_cache(searchp, l3->alien);
+
+		if (searchp->flags & SLAB_NO_REAP)
+			goto next;
+
  		spin_lock_irq(&l3->list_lock);

  		drain_array_locked(searchp, cpu_cache_get(searchp), 0,

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2006-02-25 22:15 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-02-22 22:34 slab: Remove SLAB_NO_REAP option Christoph Lameter
2006-02-23  7:46 ` Pekka Enberg
2006-02-23  7:54 ` Alok Kataria
2006-02-23  8:48   ` Pekka Enberg
2006-02-23  9:35 Alok Kataria
2006-02-23 10:09 ` Andrew Morton
2006-02-23 11:34   ` Pekka J Enberg
2006-02-23 17:20     ` Christoph Lameter
2006-02-23 18:36       ` Pekka Enberg
2006-02-23 18:47         ` Christoph Lameter
2006-02-23 19:17           ` Ravikiran G Thirumalai
2006-02-23 19:24             ` Christoph Lameter
2006-02-24  7:36           ` Pekka Enberg
2006-02-24 16:19             ` Christoph Lameter
2006-02-25 22:15               ` Pekka Enberg
2006-02-23 17:17   ` Christoph Lameter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).