All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Dumazet <dada1@cosmosbay.com>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-kernel@vger.kernel.org
Subject: [PATCH] shrinks dentry struct
Date: Tue, 29 Nov 2005 16:22:00 +0100	[thread overview]
Message-ID: <438C7218.8030109@cosmosbay.com> (raw)
In-Reply-To: <20051129000916.6306da8b.akpm@osdl.org>

[-- Attachment #1: Type: text/plain, Size: 1106 bytes --]

Hi Andrew

Could you add this patch to mm ?

Thank you

[PATCH] shrinks dentry struct

Some long time ago, dentry struct was carefully tuned so that on 32 bits UP, 
sizeof(struct dentry) was exactly 128, ie a power of 2, and a multiple of 
memory cache lines.

Then RCU was added and dentry struct enlarged by two pointers, with nice 
results for SMP, but not so good on UP, because breaking the above tuning (128 
+ 8 = 136 bytes)

This patch reverts this unwanted side effect, by using an union (d_u), where 
d_rcu and d_child are placed so that these two fields can share their memory 
needs.

At the time d_free() is called (and d_rcu is really used), d_child is known to 
be empty and not touched by the dentry freeing.

Lockless lookups only access d_name, d_parent, d_lock, d_op, d_flags (so the 
previous content of d_child is not needed if said dentry was unhashed but 
still accessed by a CPU because of RCU constraints)

As dentry cache easily contains millions of entries, a size reduction is worth 
the extra complexity of the ugly C union.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>


[-- Attachment #2: shrink_dentry_struct.patch --]
[-- Type: text/plain, Size: 14773 bytes --]

--- linux-2.6.15-rc3/include/linux/dcache.h	2005-11-29 04:51:27.000000000 +0100
+++ linux-2.6.15-rc3-ed/include/linux/dcache.h	2005-11-29 12:04:54.000000000 +0100
@@ -95,14 +95,19 @@
 	struct qstr d_name;
 
 	struct list_head d_lru;		/* LRU list */
-	struct list_head d_child;	/* child of parent list */
+	/*
+	 * d_child and d_rcu can share memory
+	 */
+	union {
+		struct list_head d_child;	/* child of parent list */
+	 	struct rcu_head d_rcu;
+	} d_u;
 	struct list_head d_subdirs;	/* our children */
 	struct list_head d_alias;	/* inode alias list */
 	unsigned long d_time;		/* used by d_revalidate */
 	struct dentry_operations *d_op;
 	struct super_block *d_sb;	/* The root of the dentry tree */
 	void *d_fsdata;			/* fs-specific data */
- 	struct rcu_head d_rcu;
 	struct dcookie_struct *d_cookie; /* cookie, if any */
 	int d_mounted;
 	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */
--- linux-2.6.15-rc3/fs/dcache.c	2005-11-29 04:51:27.000000000 +0100
+++ linux-2.6.15-rc3-ed/fs/dcache.c	2005-11-29 15:07:10.000000000 +0100
@@ -71,7 +71,7 @@
 
 static void d_callback(struct rcu_head *head)
 {
-	struct dentry * dentry = container_of(head, struct dentry, d_rcu);
+	struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
 
 	if (dname_external(dentry))
 		kfree(dentry->d_name.name);
@@ -86,7 +86,7 @@
 {
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
- 	call_rcu(&dentry->d_rcu, d_callback);
+ 	call_rcu(&dentry->d_u.d_rcu, d_callback);
 }
 
 /*
@@ -193,7 +193,7 @@
   			list_del(&dentry->d_lru);
   			dentry_stat.nr_unused--;
   		}
-  		list_del(&dentry->d_child);
+  		list_del(&dentry->d_u.d_child);
 		dentry_stat.nr_dentry--;	/* For d_free, below */
 		/*drops the locks, at that point nobody can reach this dentry */
 		dentry_iput(dentry);
@@ -367,7 +367,7 @@
 	struct dentry * parent;
 
 	__d_drop(dentry);
-	list_del(&dentry->d_child);
+	list_del(&dentry->d_u.d_child);
 	dentry_stat.nr_dentry--;	/* For d_free, below */
 	dentry_iput(dentry);
 	parent = dentry->d_parent;
@@ -518,7 +518,7 @@
 resume:
 	while (next != &this_parent->d_subdirs) {
 		struct list_head *tmp = next;
-		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
 		/* Have we found a mount point ? */
 		if (d_mountpoint(dentry))
@@ -532,7 +532,7 @@
 	 * All done at this level ... ascend and resume the search.
 	 */
 	if (this_parent != parent) {
-		next = this_parent->d_child.next; 
+		next = this_parent->d_u.d_child.next; 
 		this_parent = this_parent->d_parent;
 		goto resume;
 	}
@@ -569,7 +569,7 @@
 resume:
 	while (next != &this_parent->d_subdirs) {
 		struct list_head *tmp = next;
-		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
 
 		if (!list_empty(&dentry->d_lru)) {
@@ -610,7 +610,7 @@
 	 * All done at this level ... ascend and resume the search.
 	 */
 	if (this_parent != parent) {
-		next = this_parent->d_child.next; 
+		next = this_parent->d_u.d_child.next; 
 		this_parent = this_parent->d_parent;
 #ifdef DCACHE_DEBUG
 printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
@@ -753,12 +753,12 @@
 		dentry->d_parent = dget(parent);
 		dentry->d_sb = parent->d_sb;
 	} else {
-		INIT_LIST_HEAD(&dentry->d_child);
+		INIT_LIST_HEAD(&dentry->d_u.d_child);
 	}
 
 	spin_lock(&dcache_lock);
 	if (parent)
-		list_add(&dentry->d_child, &parent->d_subdirs);
+		list_add(&dentry->d_u.d_child, &parent->d_subdirs);
 	dentry_stat.nr_dentry++;
 	spin_unlock(&dcache_lock);
 
@@ -1310,8 +1310,8 @@
 	/* Unhash the target: dput() will then get rid of it */
 	__d_drop(target);
 
-	list_del(&dentry->d_child);
-	list_del(&target->d_child);
+	list_del(&dentry->d_u.d_child);
+	list_del(&target->d_u.d_child);
 
 	/* Switch the names.. */
 	switch_names(dentry, target);
@@ -1322,15 +1322,15 @@
 	if (IS_ROOT(dentry)) {
 		dentry->d_parent = target->d_parent;
 		target->d_parent = target;
-		INIT_LIST_HEAD(&target->d_child);
+		INIT_LIST_HEAD(&target->d_u.d_child);
 	} else {
 		do_switch(dentry->d_parent, target->d_parent);
 
 		/* And add them back to the (new) parent lists */
-		list_add(&target->d_child, &target->d_parent->d_subdirs);
+		list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
 	}
 
-	list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
+	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
 	spin_unlock(&target->d_lock);
 	spin_unlock(&dentry->d_lock);
 	write_sequnlock(&rename_lock);
@@ -1568,7 +1568,7 @@
 resume:
 	while (next != &this_parent->d_subdirs) {
 		struct list_head *tmp = next;
-		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
 		if (d_unhashed(dentry)||!dentry->d_inode)
 			continue;
@@ -1579,7 +1579,7 @@
 		atomic_dec(&dentry->d_count);
 	}
 	if (this_parent != root) {
-		next = this_parent->d_child.next; 
+		next = this_parent->d_u.d_child.next; 
 		atomic_dec(&this_parent->d_count);
 		this_parent = this_parent->d_parent;
 		goto resume;
--- linux-2.6.15-rc3/fs/autofs4/autofs_i.h	2005-11-29 04:51:27.000000000 +0100
+++ linux-2.6.15-rc3-ed/fs/autofs4/autofs_i.h	2005-11-29 12:04:54.000000000 +0100
@@ -209,7 +209,7 @@
 	struct dentry *child;
 	int ret = 0;
 
-	list_for_each_entry(child, &dentry->d_subdirs, d_child)
+	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
 		if (simple_positive(child))
 			goto out;
 	ret = 1;
--- linux-2.6.15-rc3/fs/autofs4/expire.c	2005-11-29 04:51:27.000000000 +0100
+++ linux-2.6.15-rc3-ed/fs/autofs4/expire.c	2005-11-29 12:04:54.000000000 +0100
@@ -105,7 +105,7 @@
 	next = this_parent->d_subdirs.next;
 resume:
 	while (next != &this_parent->d_subdirs) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_child);
+		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
 		/* Negative dentry - give up */
 		if (!simple_positive(dentry)) {
@@ -138,7 +138,7 @@
 	}
 
 	if (this_parent != top) {
-		next = this_parent->d_child.next;
+		next = this_parent->d_u.d_child.next;
 		this_parent = this_parent->d_parent;
 		goto resume;
 	}
@@ -163,7 +163,7 @@
 	next = this_parent->d_subdirs.next;
 resume:
 	while (next != &this_parent->d_subdirs) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_child);
+		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
 		/* Negative dentry - give up */
 		if (!simple_positive(dentry)) {
@@ -199,7 +199,7 @@
 	}
 
 	if (this_parent != parent) {
-		next = this_parent->d_child.next;
+		next = this_parent->d_u.d_child.next;
 		this_parent = this_parent->d_parent;
 		goto resume;
 	}
@@ -238,7 +238,7 @@
 	/* On exit from the loop expire is set to a dgot dentry
 	 * to expire or it's NULL */
 	while ( next != &root->d_subdirs ) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_child);
+		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
 		/* Negative dentry - give up */
 		if ( !simple_positive(dentry) ) {
@@ -302,7 +302,7 @@
 			expired, (int)expired->d_name.len, expired->d_name.name);
 		spin_lock(&dcache_lock);
 		list_del(&expired->d_parent->d_subdirs);
-		list_add(&expired->d_parent->d_subdirs, &expired->d_child);
+		list_add(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
 		spin_unlock(&dcache_lock);
 		return expired;
 	}
--- linux-2.6.15-rc3/fs/autofs4/inode.c	2005-11-29 04:51:27.000000000 +0100
+++ linux-2.6.15-rc3-ed/fs/autofs4/inode.c	2005-11-29 12:04:54.000000000 +0100
@@ -91,7 +91,7 @@
 	next = this_parent->d_subdirs.next;
 resume:
 	while (next != &this_parent->d_subdirs) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_child);
+		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
 		/* Negative dentry - don`t care */
 		if (!simple_positive(dentry)) {
@@ -117,7 +117,7 @@
 	if (this_parent != sbi->root) {
 		struct dentry *dentry = this_parent;
 
-		next = this_parent->d_child.next;
+		next = this_parent->d_u.d_child.next;
 		this_parent = this_parent->d_parent;
 		spin_unlock(&dcache_lock);
 		DPRINTK("parent dentry %p %.*s",
--- linux-2.6.15-rc3/fs/coda/cache.c	2005-11-29 04:51:27.000000000 +0100
+++ linux-2.6.15-rc3-ed/fs/coda/cache.c	2005-11-29 12:05:21.000000000 +0100
@@ -93,7 +93,7 @@
 	spin_lock(&dcache_lock);
 	list_for_each(child, &parent->d_subdirs)
 	{
-		de = list_entry(child, struct dentry, d_child);
+		de = list_entry(child, struct dentry, d_u.d_child);
 		/* don't know what to do with negative dentries */
 		if ( ! de->d_inode ) 
 			continue;
--- linux-2.6.15-rc3/fs/libfs.c	2005-11-29 04:51:27.000000000 +0100
+++ linux-2.6.15-rc3-ed/fs/libfs.c	2005-11-29 14:58:51.000000000 +0100
@@ -93,16 +93,16 @@
 			loff_t n = file->f_pos - 2;
 
 			spin_lock(&dcache_lock);
-			list_del(&cursor->d_child);
+			list_del(&cursor->d_u.d_child);
 			p = file->f_dentry->d_subdirs.next;
 			while (n && p != &file->f_dentry->d_subdirs) {
 				struct dentry *next;
-				next = list_entry(p, struct dentry, d_child);
+				next = list_entry(p, struct dentry, d_u.d_child);
 				if (!d_unhashed(next) && next->d_inode)
 					n--;
 				p = p->next;
 			}
-			list_add_tail(&cursor->d_child, p);
+			list_add_tail(&cursor->d_u.d_child, p);
 			spin_unlock(&dcache_lock);
 		}
 	}
@@ -126,7 +126,7 @@
 {
 	struct dentry *dentry = filp->f_dentry;
 	struct dentry *cursor = filp->private_data;
-	struct list_head *p, *q = &cursor->d_child;
+	struct list_head *p, *q = &cursor->d_u.d_child;
 	ino_t ino;
 	int i = filp->f_pos;
 
@@ -153,7 +153,7 @@
 			}
 			for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
 				struct dentry *next;
-				next = list_entry(p, struct dentry, d_child);
+				next = list_entry(p, struct dentry, d_u.d_child);
 				if (d_unhashed(next) || !next->d_inode)
 					continue;
 
@@ -261,7 +261,7 @@
 	int ret = 0;
 
 	spin_lock(&dcache_lock);
-	list_for_each_entry(child, &dentry->d_subdirs, d_child)
+	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
 		if (simple_positive(child))
 			goto out;
 	ret = 1;
--- linux-2.6.15-rc3/fs/ncpfs/dir.c	2005-11-29 04:51:27.000000000 +0100
+++ linux-2.6.15-rc3-ed/fs/ncpfs/dir.c	2005-11-29 14:59:31.000000000 +0100
@@ -365,7 +365,7 @@
 	spin_lock(&dcache_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
-		dent = list_entry(next, struct dentry, d_child);
+		dent = list_entry(next, struct dentry, d_u.d_child);
 		if ((unsigned long)dent->d_fsdata == fpos) {
 			if (dent->d_inode)
 				dget_locked(dent);
--- linux-2.6.15-rc3/fs/ncpfs/ncplib_kernel.h	2005-11-29 04:51:27.000000000 +0100
+++ linux-2.6.15-rc3-ed/fs/ncpfs/ncplib_kernel.h	2005-11-29 14:59:31.000000000 +0100
@@ -196,7 +196,7 @@
 	spin_lock(&dcache_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
-		dentry = list_entry(next, struct dentry, d_child);
+		dentry = list_entry(next, struct dentry, d_u.d_child);
 
 		if (dentry->d_fsdata == NULL)
 			ncp_age_dentry(server, dentry);
@@ -218,7 +218,7 @@
 	spin_lock(&dcache_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
-		dentry = list_entry(next, struct dentry, d_child);
+		dentry = list_entry(next, struct dentry, d_u.d_child);
 		dentry->d_fsdata = NULL;
 		ncp_age_dentry(server, dentry);
 		next = next->next;
--- linux-2.6.15-rc3/fs/smbfs/cache.c	2005-11-29 04:51:27.000000000 +0100
+++ linux-2.6.15-rc3-ed/fs/smbfs/cache.c	2005-11-29 14:59:47.000000000 +0100
@@ -66,7 +66,7 @@
 	spin_lock(&dcache_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
-		dentry = list_entry(next, struct dentry, d_child);
+		dentry = list_entry(next, struct dentry, d_u.d_child);
 		dentry->d_fsdata = NULL;
 		smb_age_dentry(server, dentry);
 		next = next->next;
@@ -100,7 +100,7 @@
 	spin_lock(&dcache_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
-		dent = list_entry(next, struct dentry, d_child);
+		dent = list_entry(next, struct dentry, d_u.d_child);
 		if ((unsigned long)dent->d_fsdata == fpos) {
 			if (dent->d_inode)
 				dget_locked(dent);
--- linux-2.6.15-rc3/kernel/cpuset.c	2005-11-29 04:51:27.000000000 +0100
+++ linux-2.6.15-rc3-ed/kernel/cpuset.c	2005-11-29 15:01:01.000000000 +0100
@@ -304,7 +304,7 @@
 	spin_lock(&dcache_lock);
 	node = dentry->d_subdirs.next;
 	while (node != &dentry->d_subdirs) {
-		struct dentry *d = list_entry(node, struct dentry, d_child);
+		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
 		list_del_init(node);
 		if (d->d_inode) {
 			d = dget_locked(d);
@@ -316,7 +316,7 @@
 		}
 		node = dentry->d_subdirs.next;
 	}
-	list_del_init(&dentry->d_child);
+	list_del_init(&dentry->d_u.d_child);
 	spin_unlock(&dcache_lock);
 	remove_dir(dentry);
 }
--- linux-2.6.15-rc3/drivers/usb/core/inode.c	2005-11-29 04:51:27.000000000 +0100
+++ linux-2.6.15-rc3-ed/drivers/usb/core/inode.c	2005-11-29 12:04:54.000000000 +0100
@@ -186,7 +186,7 @@
 
 	down(&bus->d_inode->i_sem);
 
-	list_for_each_entry(dev, &bus->d_subdirs, d_child)
+	list_for_each_entry(dev, &bus->d_subdirs, d_u.d_child)
 		if (dev->d_inode)
 			update_dev(dev);
 
@@ -203,7 +203,7 @@
 
 	down(&root->d_inode->i_sem);
 
-	list_for_each_entry(bus, &root->d_subdirs, d_child) {
+	list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) {
 		if (bus->d_inode) {
 			switch (S_IFMT & bus->d_inode->i_mode) {
 			case S_IFDIR:
@@ -319,7 +319,7 @@
 	spin_lock(&dcache_lock);
 
 	list_for_each(list, &dentry->d_subdirs) {
-		struct dentry *de = list_entry(list, struct dentry, d_child);
+		struct dentry *de = list_entry(list, struct dentry, d_u.d_child);
 		if (usbfs_positive(de)) {
 			spin_unlock(&dcache_lock);
 			return 0;
--- linux-2.6.15-rc3/net/sunrpc/rpc_pipe.c	2005-11-29 04:51:27.000000000 +0100
+++ linux-2.6.15-rc3-ed/net/sunrpc/rpc_pipe.c	2005-11-29 15:01:01.000000000 +0100
@@ -492,7 +492,7 @@
 repeat:
 	spin_lock(&dcache_lock);
 	list_for_each_safe(pos, next, &parent->d_subdirs) {
-		dentry = list_entry(pos, struct dentry, d_child);
+		dentry = list_entry(pos, struct dentry, d_u.d_child);
 		spin_lock(&dentry->d_lock);
 		if (!d_unhashed(dentry)) {
 			dget_locked(dentry);
--- linux-2.6.15-rc3/security/selinux/selinuxfs.c	2005-11-29 04:51:27.000000000 +0100
+++ linux-2.6.15-rc3-ed/security/selinux/selinuxfs.c	2005-11-29 15:01:09.000000000 +0100
@@ -889,7 +889,7 @@
 	spin_lock(&dcache_lock);
 	node = de->d_subdirs.next;
 	while (node != &de->d_subdirs) {
-		struct dentry *d = list_entry(node, struct dentry, d_child);
+		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
 		list_del_init(node);
 
 		if (d->d_inode) {

  parent reply	other threads:[~2005-11-29 15:22 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-11-29  7:17 [PATCH] race condition in procfs Grzegorz Nosek
2005-11-29  8:09 ` Andrew Morton
2005-11-29  8:38   ` Grzegorz Nosek
2005-11-29 13:25     ` Grzegorz Nosek
2005-11-29 14:04       ` Grzegorz Nosek
2005-11-29 14:28         ` Steven Rostedt
2005-11-29 14:39           ` Grzegorz Nosek
2005-11-29 14:49             ` Steven Rostedt
2005-11-30 14:41               ` Grzegorz Nosek
2005-11-30 15:14                 ` Steven Rostedt
2005-11-30 15:29                   ` Grzegorz Nosek
2005-11-30 16:25                     ` Steven Rostedt
2005-11-30 17:23                       ` Grzegorz Nosek
2005-12-01 20:38                         ` Grzegorz Nosek
2005-11-29 15:22   ` Eric Dumazet [this message]
2005-11-30  2:06     ` [PATCH] shrinks dentry struct Paul Jackson
2005-11-30  2:14       ` Andrew Morton
2005-11-30  2:43         ` Paul Jackson
2005-11-30  6:56         ` Hugh Dickins
2005-12-03  1:15     ` [PATCH] remove unused blkp field in percpu_data Eric Dumazet
2005-12-13 18:03     ` [PATCH] shrinks dentry struct Paul E. McKenney
2005-12-13 18:24       ` Eric Dumazet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=438C7218.8030109@cosmosbay.com \
    --to=dada1@cosmosbay.com \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.