All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ian Kent <raven@themaw.net>
To: Jim Carter <jimc@math.ucla.edu>,
	autofs mailing list <autofs@linux.kernel.org>
Subject: [PATCH 06/10] autofs4 - use look aside list for lookups
Date: Thu, 12 Jun 2008 12:51:14 +0800	[thread overview]
Message-ID: <20080612045114.25151.74853.stgit@raven.themaw.net> (raw)
In-Reply-To: <20080612044425.25151.58126.stgit@raven.themaw.net>

A while ago a patch to resolve a deadlock during directory creation
was merged. This delayed the hashing of lookup dentrys until the
->mkdir() operation completed to ensure we always went through
->lookup() instead of also having processes go through ->revalidate()
so our VFS locking remained consistent.

Now we are seeing a couple of side affects of that change in situations
with heavy mount activity.

Two cases have been identified:

1) When a mount request is triggered, due to the delayed hashing, the
directory created by user space for the mount point doesn't have the
DCACHE_AUTOFS_PENDING flag set. In the case of an autofs multi-mount
where a tree of mount point directories are created this can lead to
the path walk continuing rather than the dentry being sent to the wait
queue to wait for request completion. This is because, if the pending
flag isn't set, the criteria for deciding this is a mount in progress
fails to hold, namely that the dentry is not a mount point and has no
subdirectories.

2) If a mount request dentry that has become unhashed during a revalidate
is re-used by ->lookup() it remains unhashed, is made negative and is
removed from the rehash list. It remains this way until the ->mkdir()
callback completes. Since it is unhashed a fresh dentry is used when
the user space mount request creates the mount point directory. This
leaves the original dentry negative and unhashed. But revalidate has
no way to tell the VFS that the dentry has changed, other than to force
another ->lookup() by returning false, which is at best wastefull and
at worst not possible. This results in an -ENOENT return from the
original path walk when in fact the mount succeeded.

To resolve this we need to ensure that the same dentry is used in all
calls to ->lookup() during the course of a mount request. This patch
achieves that by adding the initial dentry to a look aside list and
removes it at ->mkdir() or ->symlink() completeion (or when the dentry
is released), since these are the only create operations autofs4 supports.

Signed-off-by: Ian Kent <raven@themaw.net>

---

 fs/autofs4/autofs_i.h |    2 +
 fs/autofs4/inode.c    |   25 ++++---
 fs/autofs4/root.c     |  169 ++++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 156 insertions(+), 40 deletions(-)


diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index f36d0c5..e285273 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -52,6 +52,7 @@ struct autofs_info {
 
 	int		flags;
 
+	struct list_head active;
 	struct list_head expiring;
 
 	struct autofs_sb_info *sbi;
@@ -113,6 +114,7 @@ struct autofs_sb_info {
 	spinlock_t fs_lock;
 	struct autofs_wait_queue *queues; /* Wait queue pointer */
 	spinlock_t lookup_lock;
+	struct list_head active_list;
 	struct list_head expiring_list;
 };
 
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index fc04334..54279fc 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -24,8 +24,10 @@
 
 static void ino_lnkfree(struct autofs_info *ino)
 {
-	kfree(ino->u.symlink);
-	ino->u.symlink = NULL;
+	if (ino->u.symlink) {
+		kfree(ino->u.symlink);
+		ino->u.symlink = NULL;
+	}
 }
 
 struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
@@ -41,16 +43,18 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
 	if (ino == NULL)
 		return NULL;
 
-	ino->flags = 0;
-	ino->mode = mode;
-	ino->inode = NULL;
-	ino->dentry = NULL;
-	ino->size = 0;
-
-	INIT_LIST_HEAD(&ino->expiring);
+	if (!reinit) {
+		ino->flags = 0;
+		ino->inode = NULL;
+		ino->dentry = NULL;
+		ino->size = 0;
+		INIT_LIST_HEAD(&ino->active);
+		INIT_LIST_HEAD(&ino->expiring);
+		atomic_set(&ino->count, 0);
+	}
 
+	ino->mode = mode;
 	ino->last_used = jiffies;
-	atomic_set(&ino->count, 0);
 
 	ino->sbi = sbi;
 
@@ -334,6 +338,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	spin_lock_init(&sbi->fs_lock);
 	sbi->queues = NULL;
 	spin_lock_init(&sbi->lookup_lock);
+	INIT_LIST_HEAD(&sbi->active_list);
 	INIT_LIST_HEAD(&sbi->expiring_list);
 	s->s_blocksize = 1024;
 	s->s_blocksize_bits = 10;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d5b36e4..a415ebd 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -471,6 +471,8 @@ void autofs4_dentry_release(struct dentry *de)
 
 		if (sbi) {
 			spin_lock(&sbi->lookup_lock);
+			if (!list_empty(&inf->active))
+				list_del(&inf->active);
 			if (!list_empty(&inf->expiring))
 				list_del(&inf->expiring);
 			spin_unlock(&sbi->lookup_lock);
@@ -495,6 +497,58 @@ static struct dentry_operations autofs4_dentry_operations = {
 	.d_release	= autofs4_dentry_release,
 };
 
+static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
+{
+	unsigned int len = name->len;
+	unsigned int hash = name->hash;
+	const unsigned char *str = name->name;
+	struct list_head *p, *head;
+
+	spin_lock(&dcache_lock);
+	spin_lock(&sbi->lookup_lock);
+	head = &sbi->active_list;
+	list_for_each(p, head) {
+		struct autofs_info *ino;
+		struct dentry *dentry;
+		struct qstr *qstr;
+
+		ino = list_entry(p, struct autofs_info, active);
+		dentry = ino->dentry;
+
+		spin_lock(&dentry->d_lock);
+
+		/* Already gone? */
+		if (atomic_read(&dentry->d_count) == 0)
+			goto next;
+
+		qstr = &dentry->d_name;
+
+		if (dentry->d_name.hash != hash)
+			goto next;
+		if (dentry->d_parent != parent)
+			goto next;
+
+		if (qstr->len != len)
+			goto next;
+		if (memcmp(qstr->name, str, len))
+			goto next;
+
+		if (d_unhashed(dentry)) {
+			dget(dentry);
+			spin_unlock(&dentry->d_lock);
+			spin_unlock(&sbi->lookup_lock);
+			spin_unlock(&dcache_lock);
+			return dentry;
+		}
+next:
+		spin_unlock(&dentry->d_lock);
+	}
+	spin_unlock(&sbi->lookup_lock);
+	spin_unlock(&dcache_lock);
+
+	return NULL;
+}
+
 static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
 {
 	unsigned int len = name->len;
@@ -552,7 +606,8 @@ next:
 static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct autofs_sb_info *sbi;
-	struct dentry *expiring;
+	struct autofs_info *ino;
+	struct dentry *expiring, *unhashed;
 	int oz_mode;
 
 	DPRINTK("name = %.*s",
@@ -570,12 +625,12 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 
 	expiring = autofs4_lookup_expiring(sbi, dentry->d_parent, &dentry->d_name);
 	if (expiring) {
-		struct autofs_info *ino = autofs4_dentry_ino(expiring);
 		/*
 		 * If we are racing with expire the request might not
 		 * be quite complete but the directory has been removed
 		 * so it must have been successful, so just wait for it.
 		 */
+		ino = autofs4_dentry_ino(expiring);
 		while (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
 			DPRINTK("wait for incomplete expire %p name=%.*s",
 				expiring, expiring->d_name.len,
@@ -586,21 +641,41 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 		dput(expiring);
 	}
 
-	/*
-	 * Mark the dentry incomplete but don't hash it. We do this
-	 * to serialize our inode creation operations (symlink and
-	 * mkdir) which prevents deadlock during the callback to
-	 * the daemon. Subsequent user space lookups for the same
-	 * dentry are placed on the wait queue while the daemon
-	 * itself is allowed passage unresticted so the create
-	 * operation itself can then hash the dentry. Finally,
-	 * we check for the hashed dentry and return the newly
-	 * hashed dentry.
-	 */
-	dentry->d_op = &autofs4_root_dentry_operations;
+	unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name);
+	if (unhashed)
+		dentry = unhashed;
+	else {
+		/*
+		 * Mark the dentry incomplete but don't hash it. We do this
+		 * to serialize our inode creation operations (symlink and
+		 * mkdir) which prevents deadlock during the callback to
+		 * the daemon. Subsequent user space lookups for the same
+		 * dentry are placed on the wait queue while the daemon
+		 * itself is allowed passage unresticted so the create
+		 * operation itself can then hash the dentry. Finally,
+		 * we check for the hashed dentry and return the newly
+		 * hashed dentry.
+		 */
+		dentry->d_op = &autofs4_root_dentry_operations;
 
-	dentry->d_fsdata = NULL;
-	d_instantiate(dentry, NULL);
+		/*
+		 * And we need to ensure that the same dentry is used for
+		 * all following lookup calls until it is hashed so that
+		 * the dentry flags are persistent throughout the request.
+		 */
+		ino = autofs4_init_ino(NULL, sbi, 0555);
+		if (!ino)
+			return ERR_PTR(-ENOMEM);
+
+		dentry->d_fsdata = ino;
+		ino->dentry = dentry;
+
+		spin_lock(&sbi->lookup_lock);
+		list_add(&ino->active, &sbi->active_list);
+		spin_unlock(&sbi->lookup_lock);
+
+		d_instantiate(dentry, NULL);
+	}
 
 	if (!oz_mode) {
 		spin_lock(&dentry->d_lock);
@@ -625,12 +700,16 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 			if (sigismember (sigset, SIGKILL) ||
 			    sigismember (sigset, SIGQUIT) ||
 			    sigismember (sigset, SIGINT)) {
+			    if (unhashed)
+				dput(unhashed);
 			    return ERR_PTR(-ERESTARTNOINTR);
 			}
 		}
-		spin_lock(&dentry->d_lock);
-		dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
-		spin_unlock(&dentry->d_lock);
+		if (!oz_mode) {
+			spin_lock(&dentry->d_lock);
+			dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
+			spin_unlock(&dentry->d_lock);
+		}
 	}
 
 	/*
@@ -654,9 +733,15 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 		else
 			dentry = ERR_PTR(-ENOENT);
 
+		if (unhashed)
+			dput(unhashed);
+
 		return dentry;
 	}
 
+	if (unhashed)
+		return unhashed;
+
 	return NULL;
 }
 
@@ -677,20 +762,30 @@ static int autofs4_dir_symlink(struct inode *dir,
 		return -EACCES;
 
 	ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555);
-	if (ino == NULL)
-		return -ENOSPC;
+	if (!ino)
+		return -ENOMEM;
 
-	ino->size = strlen(symname);
-	ino->u.symlink = cp = kmalloc(ino->size + 1, GFP_KERNEL);
+	spin_lock(&sbi->lookup_lock);
+	if (!list_empty(&ino->active))
+		list_del_init(&ino->active);
+	spin_unlock(&sbi->lookup_lock);
 
-	if (cp == NULL) {
-		kfree(ino);
-		return -ENOSPC;
+	cp = kmalloc(ino->size + 1, GFP_KERNEL);
+	if (!cp) {
+		if (!dentry->d_fsdata)
+			kfree(ino);
+		return -ENOMEM;
 	}
 
 	strcpy(cp, symname);
 
 	inode = autofs4_get_inode(dir->i_sb, ino);
+	if (!inode) {
+		kfree(cp);
+		if (!dentry->d_fsdata)
+			kfree(ino);
+		return -ENOMEM;
+	}
 	d_add(dentry, inode);
 
 	if (dir == dir->i_sb->s_root->d_inode)
@@ -706,6 +801,8 @@ static int autofs4_dir_symlink(struct inode *dir,
 		atomic_inc(&p_ino->count);
 	ino->inode = inode;
 
+	ino->size = strlen(symname);
+	ino->u.symlink = cp;
 	dir->i_mtime = CURRENT_TIME;
 
 	return 0;
@@ -750,7 +847,8 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
 
 	spin_lock(&dcache_lock);
 	spin_lock(&sbi->lookup_lock);
-	list_add(&ino->expiring, &sbi->expiring_list);
+	if (list_empty(&ino->expiring))
+		list_add(&ino->expiring, &sbi->expiring_list);
 	spin_unlock(&sbi->lookup_lock);
 	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
@@ -778,7 +876,8 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 		return -ENOTEMPTY;
 	}
 	spin_lock(&sbi->lookup_lock);
-	list_add(&ino->expiring, &sbi->expiring_list);
+	if (list_empty(&ino->expiring))
+		list_add(&ino->expiring, &sbi->expiring_list);
 	spin_unlock(&sbi->lookup_lock);
 	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
@@ -814,10 +913,20 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		dentry, dentry->d_name.len, dentry->d_name.name);
 
 	ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555);
-	if (ino == NULL)
-		return -ENOSPC;
+	if (!ino)
+		return -ENOMEM;
+
+	spin_lock(&sbi->lookup_lock);
+	if (!list_empty(&ino->active))
+		list_del_init(&ino->active);
+	spin_unlock(&sbi->lookup_lock);
 
 	inode = autofs4_get_inode(dir->i_sb, ino);
+	if (!inode) {
+		if (!dentry->d_fsdata)
+			kfree(ino);
+		return -ENOMEM;
+	}
 	d_add(dentry, inode);
 
 	if (dir == dir->i_sb->s_root->d_inode)

  parent reply	other threads:[~2008-06-12  4:51 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-06-12  4:50 [PATCH 00/10] Kernel patch series Ian Kent
2008-06-12  4:50 ` [PATCH 01/10] autofs4 - check for invalid dentry in getpath Ian Kent
2008-06-12  4:50 ` [PATCH 02/10] autofs4 - fix sparse warning in waitq.c:autofs4_expire_indirect() Ian Kent
2008-06-12  4:50 ` [PATCH 03/10] autofs4 - fix incorrect return from root.c:try_to_fill_dentry() Ian Kent
2008-06-12  4:51 ` [PATCH 04/10] autofs4 - fix mntput, dput order bug Ian Kent
2008-06-12  4:51 ` [PATCH 05/10] autofs4 - don't make expiring dentry negative Ian Kent
2008-06-12  4:51 ` Ian Kent [this message]
2008-06-12  4:51 ` [PATCH 07/10] autofs4 - don't release directory mutex if called in oz_mode Ian Kent
2008-06-12  4:51 ` [PATCH 08/10] autofs4 - use lookup intent flags to trigger mounts Ian Kent
2008-06-12  4:51 ` [PATCH 09/10] autofs4 - use struct qstr in waitq.c Ian Kent
2008-06-12  4:51 ` [PATCH 10/10] autofs4 - fix pending mount race Ian Kent
2008-06-14  1:13 ` [PATCH 00/10] Kernel patch series Jim Carter
2008-06-14  3:30   ` Ian Kent
2008-06-14  3:42     ` Ian Kent
2008-06-19  0:40       ` clients suddenly start hanging (was: (no subject)) Jim Carter
2008-06-19  3:14         ` Ian Kent
2008-06-19 17:08           ` Jim Carter
2008-06-19 18:34           ` Jim Carter
2008-06-20  4:09             ` Ian Kent
2008-06-21  1:02               ` Jim Carter
2008-06-21  3:12                 ` Ian Kent
2008-06-23  3:49                   ` Jim Carter
2008-06-23  4:46                     ` Ian Kent
2008-06-24  3:08                       ` Ian Kent
2008-06-24 17:02                         ` Stephen Biggs
2008-06-24 23:39                         ` Jim Carter
2008-06-25  3:33                           ` Ian Kent
2008-06-25  5:00                             ` Ian Kent
2008-06-23  4:15                   ` Ian Kent
  -- strict thread matches above, loose matches on Subject: below --
2008-04-23 18:50 (no subject) Jim Carter
2008-04-23 20:04 ` Jeff Moyer
2008-04-24  3:10   ` Ian Kent
2008-04-24 16:52   ` clients suddenly start hanging (was: (no subject)) Jim Carter
2008-04-26  1:17   ` Jim Carter
2008-04-26  5:34     ` Ian Kent
2008-04-26 18:48       ` Jim Carter
2008-04-27  5:52         ` Ian Kent
2008-04-26 22:16       ` Jim Carter
2008-04-28  6:26 ` [PATCH 1/2] autofs4 - fix execution order race in mount request code Ian Kent
2008-05-08  4:52   ` clients suddenly start hanging (was: (no subject)) Jim Carter
2008-05-08  6:13     ` Ian Kent
2008-05-11  4:14       ` Jim Carter
2008-05-11  7:57         ` Ian Kent
2008-05-15 21:59           ` Jim Carter
2008-05-16  3:00             ` Ian Kent
2008-05-18  4:07             ` Ian Kent
2008-05-21  6:58               ` Ian Kent
2008-05-22 21:42               ` Jim Carter
2008-05-23  2:35                 ` Ian Kent
2008-05-26  0:34                   ` Jim Carter
2008-06-12  3:20                     ` Ian Kent

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080612045114.25151.74853.stgit@raven.themaw.net \
    --to=raven@themaw.net \
    --cc=autofs@linux.kernel.org \
    --cc=jimc@math.ucla.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.