linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] [PATCH 1/4] Add extended attributes to ext2/3
@ 2002-10-08 18:08 tytso
  2002-10-08 18:19 ` [Ext2-devel] " Christoph Hellwig
  2002-10-08 18:21 ` Rik van Riel
  0 siblings, 2 replies; 11+ messages in thread
From: tytso @ 2002-10-08 18:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: ext2-devel


This is the first of four patches which add extended attribute support
to the ext2 and ext3 filesystems.  It is a port of Andreas Gruenbacher's
patches, which have been well tested and in a number of distributions
(including RH 8, if I'm not mistaken) already.  I just ported it to 2.5
(these patches are versus 2.5.40).  As always, since I touched the code
last, any problems in it are my fault.  :-) 

These patches are prerequisite for the port of the Andreas Gruenbacher's
ACL patches to 2.5, which I'm currently working on.  But given the short
time-frame before feature freeze, I'd like to get these out for review
ASAP.  Please comment and bleed on them.

This first patch creates a generic interface for registering caches with
the VM subsystem so that they can react appropriately to memory
pressure.


# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
#
# include/linux/cache_def.h |   15 +++++++++++++++
# kernel/ksyms.c            |    3 +++
# mm/vmscan.c               |   29 +++++++++++++++++++++++++++++
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/10/04	tytso@think.thunk.org	1.665
# Port of the 0.8.50 cache-def patch.
# --------------------------------------------
#
diff -Nru a/include/linux/cache_def.h b/include/linux/cache_def.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/include/linux/cache_def.h	Tue Oct  8 13:52:08 2002
@@ -0,0 +1,15 @@
+/*
+ * linux/cache_def.h
+ * Handling of caches defined in drivers, filesystems, ...
+ *
+ * Copyright (C) 2002 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
+ */
+
+struct cache_definition {
+	const char *name;
+	void (*shrink)(int, unsigned int);
+	struct list_head link;
+};
+
+extern void register_cache(struct cache_definition *);
+extern void unregister_cache(struct cache_definition *);
diff -Nru a/kernel/ksyms.c b/kernel/ksyms.c
--- a/kernel/ksyms.c	Tue Oct  8 13:52:08 2002
+++ b/kernel/ksyms.c	Tue Oct  8 13:52:08 2002
@@ -31,6 +31,7 @@
 #include <linux/genhd.h>
 #include <linux/blkpg.h>
 #include <linux/swap.h>
+#include <linux/cache_def.h>
 #include <linux/ctype.h>
 #include <linux/file.h>
 #include <linux/iobuf.h>
@@ -106,6 +107,8 @@
 EXPORT_SYMBOL(kmem_cache_alloc);
 EXPORT_SYMBOL(kmem_cache_free);
 EXPORT_SYMBOL(kmem_cache_size);
+EXPORT_SYMBOL(register_cache);
+EXPORT_SYMBOL(unregister_cache);
 EXPORT_SYMBOL(kmalloc);
 EXPORT_SYMBOL(kfree);
 EXPORT_SYMBOL(vfree);
diff -Nru a/mm/vmscan.c b/mm/vmscan.c
--- a/mm/vmscan.c	Tue Oct  8 13:52:08 2002
+++ b/mm/vmscan.c	Tue Oct  8 13:52:08 2002
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/kernel_stat.h>
 #include <linux/swap.h>
+#include <linux/cache_def.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
@@ -76,6 +77,33 @@
 #define shrink_dqcache_memory(ratio, gfp_mask) do { } while (0)
 #endif
 
+static LIST_HEAD(cache_definitions);
+
+/* BKL must be held */
+void register_cache(struct cache_definition *cache)
+{
+	list_add(&cache->link, &cache_definitions);
+}
+
+/* BLK must be held */
+void unregister_cache(struct cache_definition *cache)
+{
+	list_del(&cache->link);
+}
+
+static void shrink_other_caches(int ratio, int gfp_mask)
+{
+	struct list_head *p = cache_definitions.prev;
+
+	while (p != &cache_definitions) {
+		struct cache_definition *cache =
+			list_entry(p, struct cache_definition, link);
+
+		cache->shrink(ratio, gfp_mask);  /* BLK held */
+		p = p->prev;
+	}
+}
+
 /* Must be called with page's pte_chain_lock held. */
 static inline int page_mapping_inuse(struct page * page)
 {
@@ -614,6 +642,7 @@
 	shrink_dcache_memory(ratio, gfp_mask);
 	shrink_icache_memory(ratio, gfp_mask);
 	shrink_dqcache_memory(ratio, gfp_mask);
+	shrink_other_caches(ratio, gfp_mask);
 	return nr_pages;
 }
 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Ext2-devel] [RFC] [PATCH 1/4] Add extended attributes to ext2/3
  2002-10-08 18:08 [RFC] [PATCH 1/4] Add extended attributes to ext2/3 tytso
@ 2002-10-08 18:19 ` Christoph Hellwig
  2002-10-08 18:38   ` Andrew Morton
  2002-10-08 18:40   ` Theodore Ts'o
  2002-10-08 18:21 ` Rik van Riel
  1 sibling, 2 replies; 11+ messages in thread
From: Christoph Hellwig @ 2002-10-08 18:19 UTC (permalink / raw)
  To: tytso; +Cc: linux-kernel, ext2-devel, Ed Tomlinson

On Tue, Oct 08, 2002 at 02:08:11PM -0400, tytso@mit.edu wrote:
> 
> This is the first of four patches which add extended attribute support
> to the ext2 and ext3 filesystems.  It is a port of Andreas Gruenbacher's
> patches, which have been well tested and in a number of distributions
> (including RH 8, if I'm not mistaken) already.

RH backed it out after the second or third beta due to bugginess..

> This first patch creates a generic interface for registering caches with
> the VM subsystem so that they can react appropriately to memory
> pressure.

I'd suggest Ed Tomlinson's much saner interface that adds a third callbackj
to kmem_cache_t (similar to the Solaris implementation) instead.

Doing this outside slab is not a good idea (and XFS currently does
it too - in it's own code which should be replaced with Ed's one)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] [PATCH 1/4] Add extended attributes to ext2/3
  2002-10-08 18:08 [RFC] [PATCH 1/4] Add extended attributes to ext2/3 tytso
  2002-10-08 18:19 ` [Ext2-devel] " Christoph Hellwig
@ 2002-10-08 18:21 ` Rik van Riel
  2002-10-08 18:59   ` [Ext2-devel] " Andreas Gruenbacher
  1 sibling, 1 reply; 11+ messages in thread
From: Rik van Riel @ 2002-10-08 18:21 UTC (permalink / raw)
  To: tytso; +Cc: linux-kernel, ext2-devel

On Tue, 8 Oct 2002 tytso@mit.edu wrote:

> This first patch creates a generic interface for registering caches with
> the VM subsystem so that they can react appropriately to memory
> pressure.

> +/* BKL must be held */

... but it isn't.  Also, kswapd isn't holding the bkl while
traversing the list.

> +void register_cache(struct cache_definition *cache)
> +{
> +	list_add(&cache->link, &cache_definitions);
> +}

I suspect you'll want a semaphore for the cache_definitions
list.

cheers,

Rik
-- 
A: No.
Q: Should I include quotations after my reply?

http://www.surriel.com/		http://distro.conectiva.com/


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Ext2-devel] [RFC] [PATCH 1/4] Add extended attributes to ext2/3
  2002-10-08 18:19 ` [Ext2-devel] " Christoph Hellwig
@ 2002-10-08 18:38   ` Andrew Morton
  2002-10-08 18:47     ` Andreas Gruenbacher
  2002-10-08 18:40   ` Theodore Ts'o
  1 sibling, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2002-10-08 18:38 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: tytso, linux-kernel, ext2-devel, Ed Tomlinson

Christoph Hellwig wrote:
> 
> On Tue, Oct 08, 2002 at 02:08:11PM -0400, tytso@mit.edu wrote:
> >
> > This is the first of four patches which add extended attribute support
> > to the ext2 and ext3 filesystems.  It is a port of Andreas Gruenbacher's
> > patches, which have been well tested and in a number of distributions
> > (including RH 8, if I'm not mistaken) already.
> 
> RH backed it out after the second or third beta due to bugginess..
> 
> > This first patch creates a generic interface for registering caches with
> > the VM subsystem so that they can react appropriately to memory
> > pressure.
> 
> I'd suggest Ed Tomlinson's much saner interface that adds a third callbackj
> to kmem_cache_t (similar to the Solaris implementation) instead.
> 
> Doing this outside slab is not a good idea (and XFS currently does
> it too - in it's own code which should be replaced with Ed's one)

Yup, although that's a fairly minor point in this context..

The shrinker callback code is not actually in Linus's kernel
at present.  I'm kind of sitting on it until I've had time
to ponder the dirty great lock which Ed added ;)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Ext2-devel] [RFC] [PATCH 1/4] Add extended attributes to ext2/3
  2002-10-08 18:19 ` [Ext2-devel] " Christoph Hellwig
  2002-10-08 18:38   ` Andrew Morton
@ 2002-10-08 18:40   ` Theodore Ts'o
  2002-10-08 18:50     ` Christoph Hellwig
  1 sibling, 1 reply; 11+ messages in thread
From: Theodore Ts'o @ 2002-10-08 18:40 UTC (permalink / raw)
  To: Christoph Hellwig, linux-kernel, ext2-devel, Ed Tomlinson

On Tue, Oct 08, 2002 at 07:19:00PM +0100, Christoph Hellwig wrote:
> > This first patch creates a generic interface for registering caches with
> > the VM subsystem so that they can react appropriately to memory
> > pressure.
> 
> I'd suggest Ed Tomlinson's much saner interface that adds a third callbackj
> to kmem_cache_t (similar to the Solaris implementation) instead.

Can you give me a pointer to his stuff?  Thanks!

						- Ted	

> Doing this outside slab is not a good idea (and XFS currently does
> it too - in it's own code which should be replaced with Ed's one)


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Ext2-devel] [RFC] [PATCH 1/4] Add extended attributes to ext2/3
  2002-10-08 18:38   ` Andrew Morton
@ 2002-10-08 18:47     ` Andreas Gruenbacher
  2002-10-08 19:10       ` Rik van Riel
  2002-10-10  6:56       ` Andrew Morton
  0 siblings, 2 replies; 11+ messages in thread
From: Andreas Gruenbacher @ 2002-10-08 18:47 UTC (permalink / raw)
  To: Andrew Morton, Christoph Hellwig
  Cc: tytso, linux-kernel, ext2-devel, Ed Tomlinson

On Tuesday 08 October 2002 20:38, Andrew Morton wrote:
> [...]
> > > This first patch creates a generic interface for registering caches
> > > with the VM subsystem so that they can react appropriately to memory
> > > pressure.
> >
> > I'd suggest Ed Tomlinson's much saner interface that adds a third
> > callbackj to kmem_cache_t (similar to the Solaris implementation)
> > instead.
> >
> > Doing this outside slab is not a good idea (and XFS currently does
> > it too - in it's own code which should be replaced with Ed's one)
>
> Yup, although that's a fairly minor point in this context..
>
> The shrinker callback code is not actually in Linus's kernel
> at present.  I'm kind of sitting on it until I've had time
> to ponder the dirty great lock which Ed added ;)

Switching to Ed's code once it's in the kernel may be worthwhile; until then 
the dumb shrinking approaah doesn't to do much harm IMHO.

--Andreas.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Ext2-devel] [RFC] [PATCH 1/4] Add extended attributes to ext2/3
  2002-10-08 18:40   ` Theodore Ts'o
@ 2002-10-08 18:50     ` Christoph Hellwig
  2002-10-08 22:37       ` Ed Tomlinson
  0 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2002-10-08 18:50 UTC (permalink / raw)
  To: Theodore Ts'o, linux-kernel, ext2-devel, Ed Tomlinson

On Tue, Oct 08, 2002 at 02:40:39PM -0400, Theodore Ts'o wrote:
> On Tue, Oct 08, 2002 at 07:19:00PM +0100, Christoph Hellwig wrote:
> > > This first patch creates a generic interface for registering caches with
> > > the VM subsystem so that they can react appropriately to memory
> > > pressure.
> > 
> > I'd suggest Ed Tomlinson's much saner interface that adds a third callbackj
> > to kmem_cache_t (similar to the Solaris implementation) instead.
> 
> Can you give me a pointer to his stuff?  Thanks!

It is/was in akpm's -mm tree (http://www.zip.com.au/~akpm/linux/patches/2.5/).
Ed, do you have a pointer to your most recent patch?


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Ext2-devel] Re: [RFC] [PATCH 1/4] Add extended attributes to ext2/3
  2002-10-08 18:21 ` Rik van Riel
@ 2002-10-08 18:59   ` Andreas Gruenbacher
  0 siblings, 0 replies; 11+ messages in thread
From: Andreas Gruenbacher @ 2002-10-08 18:59 UTC (permalink / raw)
  To: tytso; +Cc: linux-kernel, ext2-devel, Rik van Riel

On Tuesday 08 October 2002 20:21, Rik van Riel wrote:
> On Tue, 8 Oct 2002 tytso@mit.edu wrote:
> > This first patch creates a generic interface for registering caches with
> > the VM subsystem so that they can react appropriately to memory
> > pressure.
> >
> > +/* BKL must be held */
>
> ... but it isn't.  Also, kswapd isn't holding the bkl while
> traversing the list.
>
> > +void register_cache(struct cache_definition *cache)
> > +{
> > +	list_add(&cache->link, &cache_definitions);
> > +}
>
> I suspect you'll want a semaphore for the cache_definitions
> list.

My apologies. This has slipped me; I had in fact added a semaphore in a 
different branch. Here is a fixed version.

--Andreas.

diff -Nru a/include/linux/cache_def.h b/include/linux/cache_def.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/include/linux/cache_def.h	Tue Oct  8 13:52:08 2002
@@ -0,0 +1,15 @@
+/*
+ * linux/cache_def.h
+ * Handling of caches defined in drivers, filesystems, ...
+ *
+ * Copyright (C) 2002 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
+ */
+
+struct cache_definition {
+	const char *name;
+	void (*shrink)(int, unsigned int);
+	struct list_head link;
+};
+
+extern void register_cache(struct cache_definition *);
+extern void unregister_cache(struct cache_definition *);
--- a/kernel/ksyms.c	Tue Oct  8 13:52:08 2002
+++ b/kernel/ksyms.c	Tue Oct  8 13:52:08 2002
@@ -31,6 +31,7 @@
 #include <linux/genhd.h>
 #include <linux/blkpg.h>
 #include <linux/swap.h>
+#include <linux/cache_def.h>
 #include <linux/ctype.h>
 #include <linux/file.h>
 #include <linux/iobuf.h>
@@ -106,6 +107,8 @@
 EXPORT_SYMBOL(kmem_cache_alloc);
 EXPORT_SYMBOL(kmem_cache_free);
 EXPORT_SYMBOL(kmem_cache_size);
+EXPORT_SYMBOL(register_cache);
+EXPORT_SYMBOL(unregister_cache);
 EXPORT_SYMBOL(kmalloc);
 EXPORT_SYMBOL(kfree);
 EXPORT_SYMBOL(vfree);
--- a/mm/vmscan.c	Tue Oct  8 13:52:08 2002
+++ b/mm/vmscan.c	Tue Oct  8 13:52:08 2002
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/kernel_stat.h>
 #include <linux/swap.h>
+#include <linux/cache_def.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
@@ -76,6 +77,39 @@
 #define shrink_dqcache_memory(ratio, gfp_mask) do { } while (0)
 #endif
 
+static DECLARE_MUTEX(other_caches_sem);
+static LIST_HEAD(cache_definitions);
+
+void register_cache(struct cache_definition *cache)
+{
+	down(&other_caches_sem);
+	list_add(&cache->link, &cache_definitions);
+	up(&other_caches_sem);
+}
+
+void unregister_cache(struct cache_definition *cache)
+{
+	down(&other_caches_sem);
+	list_del(&cache->link);
+	up(&other_caches_sem);
+}
+
+static void shrink_other_caches(int ratio, int gfp_mask)
+{
+	struct list_head *p;
+
+	down(&other_caches_sem);
+	p = cache_definitions.prev;
+	while (p != &cache_definitions) {
+		struct cache_definition *cache =
+			list_entry(p, struct cache_definition, link);
+
+		cache->shrink(ratio, gfp_mask);
+		p = p->prev;
+	}
+	up(&other_caches_sem);
+}
+
 /* Must be called with page's pte_chain_lock held. */
 static inline int page_mapping_inuse(struct page * page)
 {
@@ -614,6 +648,7 @@
 	shrink_dcache_memory(ratio, gfp_mask);
 	shrink_icache_memory(ratio, gfp_mask);
 	shrink_dqcache_memory(ratio, gfp_mask);
+	shrink_other_caches(ratio, gfp_mask);
 	return nr_pages;
 }


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Ext2-devel] [RFC] [PATCH 1/4] Add extended attributes to ext2/3
  2002-10-08 18:47     ` Andreas Gruenbacher
@ 2002-10-08 19:10       ` Rik van Riel
  2002-10-10  6:56       ` Andrew Morton
  1 sibling, 0 replies; 11+ messages in thread
From: Rik van Riel @ 2002-10-08 19:10 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: Andrew Morton, Christoph Hellwig, tytso, linux-kernel,
	ext2-devel, Ed Tomlinson

On Tue, 8 Oct 2002, Andreas Gruenbacher wrote:

> Switching to Ed's code once it's in the kernel may be worthwhile; until
> then the dumb shrinking approaah doesn't to do much harm IMHO.

Agreed.

Rik
-- 
A: No.
Q: Should I include quotations after my reply?

http://www.surriel.com/		http://distro.conectiva.com/


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Ext2-devel] [RFC] [PATCH 1/4] Add extended attributes to ext2/3
  2002-10-08 18:50     ` Christoph Hellwig
@ 2002-10-08 22:37       ` Ed Tomlinson
  0 siblings, 0 replies; 11+ messages in thread
From: Ed Tomlinson @ 2002-10-08 22:37 UTC (permalink / raw)
  To: Christoph Hellwig, Theodore Ts'o, linux-kernel, ext2-devel

On October 8, 2002 02:50 pm, Christoph Hellwig wrote:
> On Tue, Oct 08, 2002 at 02:40:39PM -0400, Theodore Ts'o wrote:
> > On Tue, Oct 08, 2002 at 07:19:00PM +0100, Christoph Hellwig wrote:
> > > > This first patch creates a generic interface for registering caches
> > > > with the VM subsystem so that they can react appropriately to memory
> > > > pressure.
> > >
> > > I'd suggest Ed Tomlinson's much saner interface that adds a third
> > > callbackj to kmem_cache_t (similar to the Solaris implementation)
> > > instead.
> >
> > Can you give me a pointer to his stuff?  Thanks!
>
> It is/was in akpm's -mm tree
> (http://www.zip.com.au/~akpm/linux/patches/2.5/). Ed, do you have a pointer
> to your most recent patch?

Its in Andrew's tree.

Ed


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Ext2-devel] [RFC] [PATCH 1/4] Add extended attributes to ext2/3
  2002-10-08 18:47     ` Andreas Gruenbacher
  2002-10-08 19:10       ` Rik van Riel
@ 2002-10-10  6:56       ` Andrew Morton
  1 sibling, 0 replies; 11+ messages in thread
From: Andrew Morton @ 2002-10-10  6:56 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: Christoph Hellwig, tytso, linux-kernel, ext2-devel, Ed Tomlinson

Andreas Gruenbacher wrote:
> 
> Switching to Ed's code once it's in the kernel may be worthwhile; until then
> the dumb shrinking approaah doesn't to do much harm IMHO.

I've done another round on Ed's registration and shrinking
API.  Here's what we have.  The reiserfs team need the shrinker
registration API as well.

(Some of the below code still has wet paint ;))





>From Ed Tomlinson, then mauled by yours truly.

The current shrinking of the dentry, inode and dquot caches seems to
work OK, but it is slightly CPU-inefficient: we call the shrinking
functions many times, for tiny numbers of objects.

So here, we just batch that up - shrinking happens at the same rate but
we perform it in larger units of work.

To do this, we need a way of knowing how many objects are currently in
use by individual caches.  slab does not actually track this
information, but the existing shrinkable caches do have this on hand. 
So rather than adding the counters to slab, we require that the
shrinker callback functions keep their own count - we query that via
the callback.

We add a simple registration API which is exported to modules.  A
subsystem may register its own callback function via set_shrinker().

set_shrinker() simply takes a function pointer.  The function is called
with

	int (*shrinker)(int nr_to_shrink, unsigned int gfp_mask);

The shrinker callback must scan `nr_to_scan' objects and free all
freeable scanned objects.  Note: it doesn't have to *free* `nr_to_scan'
objects.  It need only scan that many.  Which is a fairly pedantic
detail, really.

The shrinker callback must return the number of objects which are in
its cache at the end of the scanning attempt.  It will be called with
nr_to_scan == 0 when we're just querying the cache size.

The set_shrinker() registration API is passed a hint as to how many
disk seeks a single cache object is worth.  Everything uses "2" at
present.


- shrink_icache_memory() is no longer exported to modules.

- shrink_icache_memory() is now static to fs/inode.c

- prune_icache() is now static to fs/inode.c, and made inline (single caller)

- shrink_dcache_memory() is made static to fs/dcache.c

- prune_dcache() is no longer exported to modules

- prune_dcache() is made static to fs/dcache.c

- shrink_dqcache_memory() is made static to fs/dquot.c

- All the quota init code has been moved from fs/dcache.c into fs/dquot.c

- All modifications to inodes_stat.nr_inodes are now inside
  inode_lock - the dispose_list one was racy.




 fs/dcache.c            |   50 ++++++++------------
 fs/dquot.c             |   28 ++++++++---
 fs/inode.c             |   49 ++++++++++----------
 include/linux/dcache.h |   11 ----
 include/linux/mm.h     |   23 +++++++++
 kernel/ksyms.c         |    3 -
 mm/vmscan.c            |  119 ++++++++++++++++++++++++++++++++++++-------------
 7 files changed, 179 insertions(+), 104 deletions(-)

--- 2.5.41/fs/dcache.c~batched-slab-asap	Wed Oct  9 22:59:10 2002
+++ 2.5.41-akpm/fs/dcache.c	Wed Oct  9 23:13:04 2002
@@ -328,7 +328,7 @@ static inline void prune_one_dentry(stru
  * all the dentries are in use.
  */
  
-void prune_dcache(int count)
+static void prune_dcache(int count)
 {
 	spin_lock(&dcache_lock);
 	for (; count ; count--) {
@@ -572,25 +572,24 @@ void shrink_dcache_anon(struct list_head
  * This is called from kswapd when we think we need some
  * more memory. 
  */
-int shrink_dcache_memory(int ratio, unsigned int gfp_mask)
+static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
 {
-	int entries = dentry_stat.nr_dentry / ratio + 1;
-	/*
-	 * Nasty deadlock avoidance.
-	 *
-	 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
-	 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->
-	 * put_inode->ext2_discard_prealloc->ext2_free_blocks->lock_super->
-	 * DEADLOCK.
-	 *
-	 * We should make sure we don't hold the superblock lock over
-	 * block allocations, but for now:
-	 */
-	if (!(gfp_mask & __GFP_FS))
-		return 0;
-
-	prune_dcache(entries);
-	return entries;
+	if (nr) {
+		/*
+		 * Nasty deadlock avoidance.
+		 *
+	 	 * ext2_new_block->getblk->GFP->shrink_dcache_memory->
+		 * prune_dcache->prune_one_dentry->dput->dentry_iput->iput->
+		 * inode->i_sb->s_op->put_inode->ext2_discard_prealloc->
+		 * ext2_free_blocks->lock_super->DEADLOCK.
+	 	 *
+	 	 * We should make sure we don't hold the superblock lock over
+	 	 * block allocations, but for now:
+		 */
+		if (gfp_mask & __GFP_FS)
+			prune_dcache(nr);
+	}
+	return dentry_stat.nr_dentry;
 }
 
 #define NAME_ALLOC_LEN(len)	((len+16) & ~15)
@@ -1330,6 +1329,8 @@ static void __init dcache_init(unsigned 
 					 NULL, NULL);
 	if (!dentry_cache)
 		panic("Cannot create dentry cache");
+	
+	set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
 
 #if PAGE_SHIFT < 13
 	mempages >>= (13 - PAGE_SHIFT);
@@ -1375,9 +1376,6 @@ kmem_cache_t *names_cachep;
 /* SLAB cache for file structures */
 kmem_cache_t *filp_cachep;
 
-/* SLAB cache for dquot structures */
-kmem_cache_t *dquot_cachep;
-
 EXPORT_SYMBOL(d_genocide);
 
 extern void bdev_cache_init(void);
@@ -1397,14 +1395,6 @@ void __init vfs_caches_init(unsigned lon
 	if(!filp_cachep)
 		panic("Cannot create filp SLAB cache");
 
-#if defined (CONFIG_QUOTA)
-	dquot_cachep = kmem_cache_create("dquot", 
-			sizeof(struct dquot), sizeof(unsigned long) * 4,
-			SLAB_HWCACHE_ALIGN, NULL, NULL);
-	if (!dquot_cachep)
-		panic("Cannot create dquot SLAB cache");
-#endif
-
 	dcache_init(mempages);
 	inode_init(mempages);
 	files_init(mempages); 
--- 2.5.41/fs/dquot.c~batched-slab-asap	Wed Oct  9 22:59:10 2002
+++ 2.5.41-akpm/fs/dquot.c	Wed Oct  9 23:14:10 2002
@@ -55,6 +55,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
+#include <linux/mm.h>
 #include <linux/time.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -481,14 +482,14 @@ static void prune_dqcache(int count)
  * more memory
  */
 
-int shrink_dqcache_memory(int ratio, unsigned int gfp_mask)
+static int shrink_dqcache_memory(int nr, unsigned int gfp_mask)
 {
-	int entries = dqstats.allocated_dquots / ratio + 1;
-
-	lock_kernel();
-	prune_dqcache(entries);
-	unlock_kernel();
-	return entries;
+	if (nr) {
+		lock_kernel();
+		prune_dqcache(nr);
+		unlock_kernel();
+	}
+	return dqstats.allocated_dquots;
 }
 
 /*
@@ -1490,6 +1491,9 @@ static ctl_table sys_table[] = {
 	{},
 };
 
+/* SLAB cache for dquot structures */
+kmem_cache_t *dquot_cachep;
+
 static int __init dquot_init(void)
 {
 	int i;
@@ -1499,9 +1503,17 @@ static int __init dquot_init(void)
 		INIT_LIST_HEAD(dquot_hash + i);
 	printk(KERN_NOTICE "VFS: Disk quotas v%s\n", __DQUOT_VERSION__);
 
+	dquot_cachep = kmem_cache_create("dquot", 
+			sizeof(struct dquot), sizeof(unsigned long) * 4,
+			SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!dquot_cachep)
+		panic("Cannot create dquot SLAB cache");
+
+	set_shrinker(DEFAULT_SEEKS, shrink_dqcache_memory);
+
 	return 0;
 }
-__initcall(dquot_init);
+module_init(dquot_init);
 
 EXPORT_SYMBOL(register_quota_format);
 EXPORT_SYMBOL(unregister_quota_format);
--- 2.5.41/fs/inode.c~batched-slab-asap	Wed Oct  9 22:59:10 2002
+++ 2.5.41-akpm/fs/inode.c	Wed Oct  9 23:27:19 2002
@@ -243,22 +243,25 @@ void clear_inode(struct inode *inode)
  * Dispose-list gets a local list with local inodes in it, so it doesn't
  * need to worry about list corruption and SMP locks.
  */
-static void dispose_list(struct list_head * head)
+static void dispose_list(struct list_head *head)
 {
-	struct list_head * inode_entry;
-	struct inode * inode;
+	int nr_disposed = 0;
+
+	while (!list_empty(head)) {
+		struct inode *inode;
 
-	while ((inode_entry = head->next) != head)
-	{
-		list_del(inode_entry);
+		inode = list_entry(head->next, struct inode, i_list);
+		list_del(&inode->i_list);
 
-		inode = list_entry(inode_entry, struct inode, i_list);
 		if (inode->i_data.nrpages)
 			truncate_inode_pages(&inode->i_data, 0);
 		clear_inode(inode);
 		destroy_inode(inode);
-		inodes_stat.nr_inodes--;
+		nr_disposed++;
 	}
+	spin_lock(&inode_lock);
+	inodes_stat.nr_inodes -= nr_disposed;
+	spin_unlock(&inode_lock);
 }
 
 /*
@@ -377,7 +380,7 @@ int invalidate_device(kdev_t dev, int do
 	 !inode_has_buffers(inode))
 #define INODE(entry)	(list_entry(entry, struct inode, i_list))
 
-void prune_icache(int goal)
+static inline void prune_icache(int goal)
 {
 	LIST_HEAD(list);
 	struct list_head *entry, *freeable = &list;
@@ -417,23 +420,19 @@ void prune_icache(int goal)
  * This is called from kswapd when we think we need some
  * more memory. 
  */
-int shrink_icache_memory(int ratio, unsigned int gfp_mask)
+static int shrink_icache_memory(int nr, unsigned int gfp_mask)
 {
-	int entries = inodes_stat.nr_inodes / ratio + 1;
-	/*
-	 * Nasty deadlock avoidance..
-	 *
-	 * We may hold various FS locks, and we don't
-	 * want to recurse into the FS that called us
-	 * in clear_inode() and friends..
-	 */
-	if (!(gfp_mask & __GFP_FS))
-		return 0;
-
-	prune_icache(entries);
-	return entries;
+	if (nr) {
+		/*
+		 * Nasty deadlock avoidance.  We may hold various FS locks,
+		 * and we don't want to recurse into the FS that called us
+		 * in clear_inode() and friends..
+	 	 */
+		if (gfp_mask & __GFP_FS)
+			prune_icache(nr);
+	}
+	return inodes_stat.nr_inodes;
 }
-EXPORT_SYMBOL(shrink_icache_memory);
 
 /*
  * Called with the inode lock held.
@@ -1096,4 +1095,6 @@ void __init inode_init(unsigned long mem
 					 NULL);
 	if (!inode_cachep)
 		panic("cannot create inode slab cache");
+
+	set_shrinker(DEFAULT_SEEKS, shrink_icache_memory);
 }
--- 2.5.41/include/linux/mm.h~batched-slab-asap	Wed Oct  9 22:59:10 2002
+++ 2.5.41-akpm/include/linux/mm.h	Wed Oct  9 22:59:10 2002
@@ -392,6 +392,29 @@ extern	int free_hugepages(struct vm_area
 
 
 /*
+ * Prototype to add a shrinker callback for ageable caches.
+ * 
+ * These functions are passed a count `nr_to_scan' and a gfpmask.  They should
+ * scan `nr_to_scan' objects, attempting to free them.
+ *
+ * The callback must the number of objects which remain in the cache.
+ *
+ * The callback will be passes nr_to_scan == 0 when the VM is querying the
+ * cache size, so a fastpath for that case is appropriate.
+ */
+typedef int (*shrinker_t)(int nr_to_scan, unsigned int gfp_mask);
+
+/*
+ * Add an aging callback.  The int is the number of 'seeks' it takes
+ * to recreate one of the objects that these functions age.
+ */
+
+#define DEFAULT_SEEKS 2
+struct shrinker;
+extern struct shrinker *set_shrinker(int, shrinker_t);
+extern void remove_shrinker(struct shrinker *shrinker);
+
+/*
  * If the mapping doesn't provide a set_page_dirty a_op, then
  * just fall through and assume that it wants buffer_heads.
  * FIXME: make the method unconditional.
--- 2.5.41/mm/vmscan.c~batched-slab-asap	Wed Oct  9 22:59:10 2002
+++ 2.5.41-akpm/mm/vmscan.c	Wed Oct  9 22:59:10 2002
@@ -77,9 +77,94 @@ static long total_memory;
 #define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
 #endif
 
-#ifndef CONFIG_QUOTA
-#define shrink_dqcache_memory(ratio, gfp_mask) do { } while (0)
-#endif
+/*
+ * The list of shrinker callbacks used by to apply pressure to
+ * ageable caches.
+ */
+struct shrinker {
+	shrinker_t		shrinker;
+	struct list_head	list;
+	int			seeks;	/* seeks to recreate an obj */
+	int			nr;	/* objs pending delete */
+};
+
+static LIST_HEAD(shrinker_list);
+static DECLARE_MUTEX(shrinker_sem);
+
+/*
+ * Add a shrinker callback to be called from the vm
+ */
+struct shrinker *set_shrinker(int seeks, shrinker_t theshrinker)
+{
+        struct shrinker *shrinker;
+
+        shrinker = kmalloc(sizeof(*shrinker), GFP_KERNEL);
+        if (shrinker) {
+	        shrinker->shrinker = theshrinker;
+	        shrinker->seeks = seeks;
+	        shrinker->nr = 0;
+	        down(&shrinker_sem);
+	        list_add(&shrinker->list, &shrinker_list);
+	        up(&shrinker_sem);
+	}
+	return shrinker;
+}
+
+/*
+ * Remove one
+ */
+void remove_shrinker(struct shrinker *shrinker)
+{
+	down(&shrinker_sem);
+	list_del(&shrinker->list);
+	up(&shrinker_sem);
+	kfree(shrinker);
+}
+ 
+#define SHRINK_BATCH 32
+/*
+ * Call the shrink functions to age shrinkable caches
+ *
+ * Here we assume it costs one seek to replace a lru page and that it also
+ * takes a seek to recreate a cache object.  With this in mind we age equal
+ * percentages of the lru and ageable caches.  This should balance the seeks
+ * generated by these structures.
+ *
+ * If the vm encounted mapped pages on the LRU it increase the pressure on
+ * slab to avoid swapping.
+ *
+ * FIXME: do not do for zone highmem
+ */
+static int shrink_slab(int scanned,  unsigned int gfp_mask)
+{
+	struct list_head *lh;
+	int pages;
+
+	if (down_trylock(&shrinker_sem))
+		return 0;
+
+	pages = nr_used_zone_pages();
+	list_for_each(lh, &shrinker_list) {
+		struct shrinker *shrinker;
+		int entries;
+		unsigned long delta;
+
+		shrinker = list_entry(lh, struct shrinker, list);
+		entries = (*shrinker->shrinker)(0, gfp_mask);
+		if (!entries)
+			continue;
+		delta = scanned * shrinker->seeks * entries;
+		shrinker->nr += delta / (pages + 1);
+		if (shrinker->nr > SHRINK_BATCH) {
+			int nr = shrinker->nr;
+
+			shrinker->nr = 0;
+			(*shrinker->shrinker)(nr, gfp_mask);
+		}
+	}
+	up(&shrinker_sem);
+	return 0;
+}
 
 /* Must be called with page's pte_chain_lock held. */
 static inline int page_mapping_inuse(struct page * page)
@@ -647,32 +732,6 @@ shrink_zone(struct zone *zone, int max_s
 }
 
 /*
- * FIXME: don't do this for ZONE_HIGHMEM
- */
-/*
- * Here we assume it costs one seek to replace a lru page and that it also
- * takes a seek to recreate a cache object.  With this in mind we age equal
- * percentages of the lru and ageable caches.  This should balance the seeks
- * generated by these structures.
- *
- * NOTE: for now I do this for all zones.  If we find this is too aggressive
- * on large boxes we may want to exclude ZONE_HIGHMEM.
- *
- * If we're encountering mapped pages on the LRU then increase the pressure on
- * slab to avoid swapping.
- */
-static void shrink_slab(int total_scanned, int gfp_mask)
-{
-	int shrink_ratio;
-	int pages = nr_used_zone_pages();
-
-	shrink_ratio = (pages / (total_scanned + 1)) + 1;
-	shrink_dcache_memory(shrink_ratio, gfp_mask);
-	shrink_icache_memory(shrink_ratio, gfp_mask);
-	shrink_dqcache_memory(shrink_ratio, gfp_mask);
-}
-
-/*
  * This is the direct reclaim path, for page-allocating processes.  We only
  * try to reclaim pages from zones which will satisfy the caller's allocation
  * request.
@@ -715,7 +774,7 @@ shrink_caches(struct zone *classzone, in
 	}
 	return ret;
 }
-
+ 
 /*
  * This is the main entry point to direct page reclaim.
  *
--- 2.5.41/kernel/ksyms.c~batched-slab-asap	Wed Oct  9 22:59:10 2002
+++ 2.5.41-akpm/kernel/ksyms.c	Wed Oct  9 23:09:23 2002
@@ -105,6 +105,8 @@ EXPORT_SYMBOL(kmem_cache_shrink);
 EXPORT_SYMBOL(kmem_cache_alloc);
 EXPORT_SYMBOL(kmem_cache_free);
 EXPORT_SYMBOL(kmem_cache_size);
+EXPORT_SYMBOL(set_shrinker);
+EXPORT_SYMBOL(remove_shrinker);
 EXPORT_SYMBOL(kmalloc);
 EXPORT_SYMBOL(kfree);
 EXPORT_SYMBOL(vfree);
@@ -248,7 +250,6 @@ EXPORT_SYMBOL(dput);
 EXPORT_SYMBOL(have_submounts);
 EXPORT_SYMBOL(d_find_alias);
 EXPORT_SYMBOL(d_prune_aliases);
-EXPORT_SYMBOL(prune_dcache);
 EXPORT_SYMBOL(shrink_dcache_sb);
 EXPORT_SYMBOL(shrink_dcache_parent);
 EXPORT_SYMBOL(shrink_dcache_anon);
--- 2.5.41/include/linux/dcache.h~batched-slab-asap	Wed Oct  9 23:03:05 2002
+++ 2.5.41-akpm/include/linux/dcache.h	Wed Oct  9 23:14:02 2002
@@ -180,17 +180,6 @@ extern void shrink_dcache_parent(struct 
 extern void shrink_dcache_anon(struct list_head *);
 extern int d_invalidate(struct dentry *);
 
-/* dcache memory management */
-extern int shrink_dcache_memory(int, unsigned int);
-extern void prune_dcache(int);
-
-/* icache memory management (defined in linux/fs/inode.c) */
-extern int shrink_icache_memory(int, unsigned int);
-extern void prune_icache(int);
-
-/* quota cache memory management (defined in linux/fs/dquot.c) */
-extern int shrink_dqcache_memory(int, unsigned int);
-
 /* only used at mount-time */
 extern struct dentry * d_alloc_root(struct inode *);
 

.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2002-10-10  6:51 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-10-08 18:08 [RFC] [PATCH 1/4] Add extended attributes to ext2/3 tytso
2002-10-08 18:19 ` [Ext2-devel] " Christoph Hellwig
2002-10-08 18:38   ` Andrew Morton
2002-10-08 18:47     ` Andreas Gruenbacher
2002-10-08 19:10       ` Rik van Riel
2002-10-10  6:56       ` Andrew Morton
2002-10-08 18:40   ` Theodore Ts'o
2002-10-08 18:50     ` Christoph Hellwig
2002-10-08 22:37       ` Ed Tomlinson
2002-10-08 18:21 ` Rik van Riel
2002-10-08 18:59   ` [Ext2-devel] " Andreas Gruenbacher

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).