linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 00/10] [v3] safely drop directory dentry on failed revalidate
@ 2013-09-04 14:05 Miklos Szeredi
  2013-09-04 14:05 ` [PATCH 01/10] vfs: add d_walk() Miklos Szeredi
                   ` (9 more replies)
  0 siblings, 10 replies; 16+ messages in thread
From: Miklos Szeredi @ 2013-09-04 14:05 UTC (permalink / raw)
  To: rwheeler, avati, viro
  Cc: bfoster, dhowells, eparis, raven, linux-fsdevel, linux-kernel, mszeredi

Here's a series for fixing issues with d_drop on a directory dentry with
children and adding support for such dropped directories in fuse.

This one adds a helper for walking the dentry tree.  This reduces code
duplication and reduces the chances of a bug creeping into one of the instances.

The only user of have_submounts() is now autofs4, but after discussing with Ian
I'm still not sure how we should deal with that.

Thanks,
Miklos

---
Anand Avati (1):
      fuse: drop dentry on failed revalidate

Miklos Szeredi (9):
      vfs: add d_walk()
      vfs: check submounts and drop atomically
      vfs: check unlinked ancestors before mount
      afs: use check_submounts_and_drop()
      gfs2: use check_submounts_and_drop()
      nfs: use check_submounts_and_drop()
      sysfs: use check_submounts_and_drop()
      fuse: use d_materialise_unique()
      fuse: clean up return in fuse_dentry_revalidate()

---
 fs/afs/dir.c           |  10 +-
 fs/dcache.c            | 452 +++++++++++++++++++++++++++++++------------------
 fs/fuse/dir.c          |  97 +++++------
 fs/gfs2/dentry.c       |   9 +-
 fs/internal.h          |   1 +
 fs/namespace.c         |   9 +
 fs/nfs/dir.c           |   9 +-
 fs/sysfs/dir.c         |  20 +--
 include/linux/dcache.h |   1 +
 9 files changed, 364 insertions(+), 244 deletions(-)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 01/10] vfs: add d_walk()
  2013-09-04 14:05 [PATCH 00/10] [v3] safely drop directory dentry on failed revalidate Miklos Szeredi
@ 2013-09-04 14:05 ` Miklos Szeredi
  2013-09-04 18:12   ` Al Viro
  2013-09-04 14:05 ` [PATCH 02/10] vfs: check submounts and drop atomically Miklos Szeredi
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 16+ messages in thread
From: Miklos Szeredi @ 2013-09-04 14:05 UTC (permalink / raw)
  To: rwheeler, avati, viro
  Cc: bfoster, dhowells, eparis, raven, linux-fsdevel, linux-kernel, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

This one replaces three instances open coded tree walking (have_submounts,
select_parent, d_genocide) with a common helper.

In addition to slightly reducing the kernel size, this simplifies the
callers and makes them less bug prone.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/dcache.c | 325 ++++++++++++++++++++++++++++++------------------------------
 1 file changed, 162 insertions(+), 163 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index b949af8..53dbae1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1007,34 +1007,56 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq
 	return new;
 }
 
+/**
+ * enum d_walk_ret - action to talke during tree walk
+ * @D_WALK_CONTINUE:	contrinue walk
+ * @D_WALK_QUIT:	quit walk
+ * @D_WALK_NORETRY:	quit when retry is needed
+ * @D_WALK_SKIP:	skip this dentry and its children
+ */
+enum d_walk_ret {
+	D_WALK_CONTINUE,
+	D_WALK_QUIT,
+	D_WALK_NORETRY,
+	D_WALK_SKIP,
+};
 
-/*
- * Search for at least 1 mount point in the dentry's subdirs.
- * We descend to the next level whenever the d_subdirs
- * list is non-empty and continue searching.
- */
- 
 /**
- * have_submounts - check for mounts over a dentry
- * @parent: dentry to check.
+ * d_walk - walk the dentry tree
+ * @parent:	start of walk
+ * @data:	data passed to @enter() and @leave()
+ * @enter:	callback when first entering the dentry
+ * @leave:	callback before finally leaving the dentry
  *
- * Return true if the parent or its subdirectories contain
- * a mount point
+ * The @enter() and @leave() callbacks are called with d_lock held.
  */
-int have_submounts(struct dentry *parent)
+static void d_walk(struct dentry *parent, void *data,
+		   enum d_walk_ret (*enter)(void *, struct dentry *),
+		   void (*leave)(void *, struct dentry *))
 {
 	struct dentry *this_parent;
 	struct list_head *next;
 	unsigned seq;
 	int locked = 0;
+	enum d_walk_ret ret;
+	bool retry = true;
 
 	seq = read_seqbegin(&rename_lock);
 again:
 	this_parent = parent;
-
-	if (d_mountpoint(parent))
-		goto positive;
 	spin_lock(&this_parent->d_lock);
+
+	ret = enter(data, this_parent);
+	switch (ret) {
+	case D_WALK_CONTINUE:
+		break;
+	case D_WALK_QUIT:
+	case D_WALK_SKIP:
+		goto out_unlock;
+	case D_WALK_NORETRY:
+		retry = false;
+		break;
+	}
 repeat:
 	next = this_parent->d_subdirs.next;
 resume:
@@ -1044,12 +1066,22 @@ resume:
 		next = tmp->next;
 
 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-		/* Have we found a mount point ? */
-		if (d_mountpoint(dentry)) {
+
+		ret = enter(data, dentry);
+		switch (ret) {
+		case D_WALK_CONTINUE:
+			break;
+		case D_WALK_QUIT:
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&this_parent->d_lock);
-			goto positive;
+			goto out_unlock;
+		case D_WALK_NORETRY:
+			retry = false;
+			break;
+		case D_WALK_SKIP:
+			spin_unlock(&dentry->d_lock);
+			continue;
 		}
+
 		if (!list_empty(&dentry->d_subdirs)) {
 			spin_unlock(&this_parent->d_lock);
 			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
@@ -1057,6 +1089,9 @@ resume:
 			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
 			goto repeat;
 		}
+		if (leave)
+			leave(data, dentry);
+
 		spin_unlock(&dentry->d_lock);
 	}
 	/*
@@ -1064,32 +1099,71 @@ resume:
 	 */
 	if (this_parent != parent) {
 		struct dentry *child = this_parent;
+
+		if (leave)
+			leave(data, this_parent);
+
 		this_parent = try_to_ascend(this_parent, locked, seq);
 		if (!this_parent)
 			goto rename_retry;
 		next = child->d_u.d_child.next;
 		goto resume;
 	}
-	spin_unlock(&this_parent->d_lock);
-	if (!locked && read_seqretry(&rename_lock, seq))
-		goto rename_retry;
-	if (locked)
-		write_sequnlock(&rename_lock);
-	return 0; /* No mount points found in tree */
-positive:
-	if (!locked && read_seqretry(&rename_lock, seq))
+	if (!locked && read_seqretry(&rename_lock, seq)) {
+		spin_unlock(&this_parent->d_lock);
 		goto rename_retry;
+	}
+	if (leave)
+		leave(data, this_parent);
+
+out_unlock:
+	spin_unlock(&this_parent->d_lock);
 	if (locked)
 		write_sequnlock(&rename_lock);
-	return 1;
+	return;
 
 rename_retry:
+	if (!retry)
+		return;
 	if (locked)
 		goto again;
 	locked = 1;
 	write_seqlock(&rename_lock);
 	goto again;
 }
+
+/*
+ * Search for at least 1 mount point in the dentry's subdirs.
+ * We descend to the next level whenever the d_subdirs
+ * list is non-empty and continue searching.
+ */
+
+/**
+ * have_submounts - check for mounts over a dentry
+ * @parent: dentry to check.
+ *
+ * Return true if the parent or its subdirectories contain
+ * a mount point
+ */
+
+static enum d_walk_ret check_mount(void *data, struct dentry *dentry)
+{
+	int *ret = data;
+	if (d_mountpoint(dentry)) {
+		*ret = 1;
+		return D_WALK_QUIT;
+	}
+	return D_WALK_CONTINUE;
+}
+
+int have_submounts(struct dentry *parent)
+{
+	int ret = 0;
+
+	d_walk(parent, &ret, check_mount, NULL);
+
+	return ret;
+}
 EXPORT_SYMBOL(have_submounts);
 
 /*
@@ -1106,93 +1180,59 @@ EXPORT_SYMBOL(have_submounts);
  * drop the lock and return early due to latency
  * constraints.
  */
-static int select_parent(struct dentry *parent, struct list_head *dispose)
-{
-	struct dentry *this_parent;
-	struct list_head *next;
-	unsigned seq;
-	int found = 0;
-	int locked = 0;
-
-	seq = read_seqbegin(&rename_lock);
-again:
-	this_parent = parent;
-	spin_lock(&this_parent->d_lock);
-repeat:
-	next = this_parent->d_subdirs.next;
-resume:
-	while (next != &this_parent->d_subdirs) {
-		struct list_head *tmp = next;
-		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
-		next = tmp->next;
 
-		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+struct select_data {
+	struct dentry *start;
+	struct list_head *dispose;
+	int found;
+};
 
-		/*
-		 * move only zero ref count dentries to the dispose list.
-		 *
-		 * Those which are presently on the shrink list, being processed
-		 * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
-		 * loop in shrink_dcache_parent() might not make any progress
-		 * and loop forever.
-		 */
-		if (dentry->d_lockref.count) {
-			dentry_lru_del(dentry);
-		} else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
-			dentry_lru_move_list(dentry, dispose);
-			dentry->d_flags |= DCACHE_SHRINK_LIST;
-			found++;
-		}
-		/*
-		 * We can return to the caller if we have found some (this
-		 * ensures forward progress). We'll be coming back to find
-		 * the rest.
-		 */
-		if (found && need_resched()) {
-			spin_unlock(&dentry->d_lock);
-			goto out;
-		}
+static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
+{
+	struct select_data *data = _data;
+	enum d_walk_ret ret = D_WALK_CONTINUE;
 
-		/*
-		 * Descend a level if the d_subdirs list is non-empty.
-		 */
-		if (!list_empty(&dentry->d_subdirs)) {
-			spin_unlock(&this_parent->d_lock);
-			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
-			this_parent = dentry;
-			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
-			goto repeat;
-		}
+	if (data->start == dentry)
+		goto out;
 
-		spin_unlock(&dentry->d_lock);
-	}
 	/*
-	 * All done at this level ... ascend and resume the search.
+	 * move only zero ref count dentries to the dispose list.
+	 *
+	 * Those which are presently on the shrink list, being processed
+	 * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
+	 * loop in shrink_dcache_parent() might not make any progress
+	 * and loop forever.
 	 */
-	if (this_parent != parent) {
-		struct dentry *child = this_parent;
-		this_parent = try_to_ascend(this_parent, locked, seq);
-		if (!this_parent)
-			goto rename_retry;
-		next = child->d_u.d_child.next;
-		goto resume;
+	if (dentry->d_lockref.count) {
+		dentry_lru_del(dentry);
+	} else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
+		dentry_lru_move_list(dentry, data->dispose);
+		dentry->d_flags |= DCACHE_SHRINK_LIST;
+		data->found++;
+		ret = D_WALK_NORETRY;
 	}
+	/*
+	 * We can return to the caller if we have found some (this
+	 * ensures forward progress). We'll be coming back to find
+	 * the rest.
+	 */
+	if (data->found && need_resched())
+		ret = D_WALK_QUIT;
 out:
-	spin_unlock(&this_parent->d_lock);
-	if (!locked && read_seqretry(&rename_lock, seq))
-		goto rename_retry;
-	if (locked)
-		write_sequnlock(&rename_lock);
-	return found;
+	return ret;
+}
 
-rename_retry:
-	if (found)
-		return found;
-	if (locked)
-		goto again;
-	locked = 1;
-	write_seqlock(&rename_lock);
-	goto again;
+static int select_parent(struct dentry *parent, struct list_head *dispose)
+{
+	struct select_data data = {
+		.start = parent,
+		.dispose = dispose,
+		.found = 0,
+	};
+
+	d_walk(parent, &data, select_collect, NULL);
+
+	return data.found;
 }
 
 /**
@@ -2904,68 +2944,27 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
 	return result;
 }
 
-void d_genocide(struct dentry *root)
+static enum d_walk_ret d_genocide_check(void *data, struct dentry *dentry)
 {
-	struct dentry *this_parent;
-	struct list_head *next;
-	unsigned seq;
-	int locked = 0;
-
-	seq = read_seqbegin(&rename_lock);
-again:
-	this_parent = root;
-	spin_lock(&this_parent->d_lock);
-repeat:
-	next = this_parent->d_subdirs.next;
-resume:
-	while (next != &this_parent->d_subdirs) {
-		struct list_head *tmp = next;
-		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
-		next = tmp->next;
+	struct dentry *root = data;
+	if (dentry != root && (d_unhashed(dentry) || !dentry->d_inode))
+		return D_WALK_SKIP;
+	else
+		return D_WALK_CONTINUE;
+}
 
-		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-		if (d_unhashed(dentry) || !dentry->d_inode) {
-			spin_unlock(&dentry->d_lock);
-			continue;
-		}
-		if (!list_empty(&dentry->d_subdirs)) {
-			spin_unlock(&this_parent->d_lock);
-			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
-			this_parent = dentry;
-			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
-			goto repeat;
-		}
-		if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
-			dentry->d_flags |= DCACHE_GENOCIDE;
-			dentry->d_lockref.count--;
-		}
-		spin_unlock(&dentry->d_lock);
-	}
-	if (this_parent != root) {
-		struct dentry *child = this_parent;
-		if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
-			this_parent->d_flags |= DCACHE_GENOCIDE;
-			this_parent->d_lockref.count--;
-		}
-		this_parent = try_to_ascend(this_parent, locked, seq);
-		if (!this_parent)
-			goto rename_retry;
-		next = child->d_u.d_child.next;
-		goto resume;
+static void d_genocide_kill(void *data, struct dentry *dentry)
+{
+	struct dentry *root = data;
+	if (dentry != root && !(dentry->d_flags & DCACHE_GENOCIDE)) {
+		dentry->d_flags |= DCACHE_GENOCIDE;
+		dentry->d_lockref.count--;
 	}
-	spin_unlock(&this_parent->d_lock);
-	if (!locked && read_seqretry(&rename_lock, seq))
-		goto rename_retry;
-	if (locked)
-		write_sequnlock(&rename_lock);
-	return;
+}
 
-rename_retry:
-	if (locked)
-		goto again;
-	locked = 1;
-	write_seqlock(&rename_lock);
-	goto again;
+void d_genocide(struct dentry *root)
+{
+	d_walk(root, root, d_genocide_check, d_genocide_kill);
 }
 
 void d_tmpfile(struct dentry *dentry, struct inode *inode)
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 02/10] vfs: check submounts and drop atomically
  2013-09-04 14:05 [PATCH 00/10] [v3] safely drop directory dentry on failed revalidate Miklos Szeredi
  2013-09-04 14:05 ` [PATCH 01/10] vfs: add d_walk() Miklos Szeredi
@ 2013-09-04 14:05 ` Miklos Szeredi
  2013-09-04 17:58   ` Al Viro
  2013-09-04 14:05 ` [PATCH 03/10] vfs: check unlinked ancestors before mount Miklos Szeredi
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 16+ messages in thread
From: Miklos Szeredi @ 2013-09-04 14:05 UTC (permalink / raw)
  To: rwheeler, avati, viro
  Cc: bfoster, dhowells, eparis, raven, linux-fsdevel, linux-kernel,
	mszeredi, Steven Whitehouse, Trond Myklebust, Greg Kroah-Hartman

From: Miklos Szeredi <mszeredi@suse.cz>

We check submounts before doing d_drop() on a non-empty directory dentry in
NFS (have_submounts()), but we do not exclude a racing mount.

 Process A: have_submounts() -> returns false
 Process B: mount() -> success
 Process A: d_drop()

This patch prepares the ground for the fix by doing the following
operations all under the same rename lock:

  have_submounts()
  shrink_dcache_parent()
  d_drop()

This is actually an optimization since have_submounts() and
shrink_dcache_parent() both traverse the same dentry tree separately.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: David Howells <dhowells@redhat.com>
CC: Steven Whitehouse <swhiteho@redhat.com>
CC: Trond Myklebust <Trond.Myklebust@netapp.com>
CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/dcache.c            | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dcache.h |  1 +
 2 files changed, 93 insertions(+)

diff --git a/fs/dcache.c b/fs/dcache.c
index 53dbae1..d0673f7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1253,6 +1253,98 @@ void shrink_dcache_parent(struct dentry * parent)
 }
 EXPORT_SYMBOL(shrink_dcache_parent);
 
+static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry)
+{
+	struct select_data *data = _data;
+
+	if (d_mountpoint(dentry)) {
+		data->found = -EBUSY;
+		return D_WALK_QUIT;
+	}
+
+	return select_collect(_data, dentry);
+}
+
+static void check_and_drop(void *_data, struct dentry *dentry)
+{
+	struct select_data *data = _data;
+
+	/* We're only interested in the root of this subtree */
+	if (data->start == dentry) {
+		if (d_mountpoint(dentry))
+			data->found = -EBUSY;
+		if (!data->found)
+			__d_drop(dentry);
+	}
+}
+
+static int __check_submounts_and_drop(struct dentry *parent,
+					     struct list_head *dispose)
+{
+	struct select_data data = {
+		.start = parent,
+		.dispose = dispose,
+		.found = 0,
+	};
+
+	d_walk(parent, &data, check_and_collect, check_and_drop);
+
+	return data.found;
+}
+
+/**
+ * check_submounts_and_drop - prune dcache, check for submounts and drop
+ *
+ * All done as a single atomic operation relative to has_unlinked_ancestor().
+ * Returns 0 if successfully unhashed @parent.  If there were submounts then
+ * return -EBUSY.
+ *
+ * @dentry: dentry to prune and drop
+ */
+int check_submounts_and_drop(struct dentry *dentry)
+{
+	int ret = 0;
+
+	/* Negative dentries can be dropped without further checks */
+	if (!dentry->d_inode) {
+		d_drop(dentry);
+		goto out;
+	}
+
+	spin_lock(&dentry->d_lock);
+	if (d_unhashed(dentry))
+		goto out_unlock;
+	if (list_empty(&dentry->d_subdirs)) {
+		if (d_mountpoint(dentry)) {
+			ret = -EBUSY;
+			goto out_unlock;
+		}
+		__d_drop(dentry);
+		goto out_unlock;
+	}
+	spin_unlock(&dentry->d_lock);
+
+	for (;;) {
+		LIST_HEAD(dispose);
+		ret = __check_submounts_and_drop(dentry, &dispose);
+		if (!list_empty(&dispose))
+			shrink_dentry_list(&dispose);
+
+		if (ret <= 0)
+			break;
+
+		cond_resched();
+	}
+
+out:
+	return ret;
+
+out_unlock:
+	spin_unlock(&dentry->d_lock);
+	goto out;
+}
+EXPORT_SYMBOL(check_submounts_and_drop);
+
 /**
  * __d_alloc	-	allocate a dcache entry
  * @sb: filesystem it will belong to
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index efdc944..87bd0d7 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -253,6 +253,7 @@ extern void d_prune_aliases(struct inode *);
 
 /* test whether we have any submounts in a subdir tree */
 extern int have_submounts(struct dentry *);
+extern int check_submounts_and_drop(struct dentry *);
 
 /*
  * This adds the entry to the hash queues.
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 03/10] vfs: check unlinked ancestors before mount
  2013-09-04 14:05 [PATCH 00/10] [v3] safely drop directory dentry on failed revalidate Miklos Szeredi
  2013-09-04 14:05 ` [PATCH 01/10] vfs: add d_walk() Miklos Szeredi
  2013-09-04 14:05 ` [PATCH 02/10] vfs: check submounts and drop atomically Miklos Szeredi
@ 2013-09-04 14:05 ` Miklos Szeredi
  2013-09-04 14:05 ` [PATCH 04/10] afs: use check_submounts_and_drop() Miklos Szeredi
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 16+ messages in thread
From: Miklos Szeredi @ 2013-09-04 14:05 UTC (permalink / raw)
  To: rwheeler, avati, viro
  Cc: bfoster, dhowells, eparis, raven, linux-fsdevel, linux-kernel,
	mszeredi, Steven Whitehouse, Trond Myklebust, Greg Kroah-Hartman

From: Miklos Szeredi <mszeredi@suse.cz>

We check submounts before doing d_drop() on a non-empty directory dentry in
NFS (have_submounts()), but we do not exclude a racing mount.  Nor do we
prevent mounts to be added to the disconnected subtree using relative paths
after the d_drop().

This patch fixes these issues by checking for unlinked (unhashed, non-root)
ancestors before proceeding with the mount.  This is done after setting
DCACHE_MOUNTED on the soon-to-be mountpoint and with the rename seqlock
taken for write.  This ensures that the only one of
check_submounts_and_drop() or has_unlinked_ancestor() can succeed.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: David Howells <dhowells@redhat.com>
CC: Steven Whitehouse <swhiteho@redhat.com>
CC: Trond Myklebust <Trond.Myklebust@netapp.com>
CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/dcache.c    | 35 +++++++++++++++++++++++++++++++++++
 fs/internal.h  |  1 +
 fs/namespace.c |  9 +++++++++
 3 files changed, 45 insertions(+)

diff --git a/fs/dcache.c b/fs/dcache.c
index d0673f7..ae60a83 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1166,6 +1166,41 @@ int have_submounts(struct dentry *parent)
 }
 EXPORT_SYMBOL(have_submounts);
 
+static bool __has_unlinked_ancestor(struct dentry *dentry)
+{
+	struct dentry *this;
+
+	for (this = dentry; !IS_ROOT(this); this = this->d_parent) {
+		int is_unhashed;
+
+		/* Need exclusion wrt. check_submounts_and_drop() */
+		spin_lock(&this->d_lock);
+		is_unhashed = d_unhashed(this);
+		spin_unlock(&this->d_lock);
+
+		if (is_unhashed)
+			return true;
+	}
+	return false;
+}
+
+/*
+ * Called by mount code to check if the mountpoint is reachable (e.g. NFS can
+ * unhash a directory dentry and then the complete subtree can become
+ * unreachable).
+ */
+bool has_unlinked_ancestor(struct dentry *dentry)
+{
+	bool found;
+
+	/* Need exclusion wrt. check_submounts_and_drop() */
+	write_seqlock(&rename_lock);
+	found = __has_unlinked_ancestor(dentry);
+	write_sequnlock(&rename_lock);
+
+	return found;
+}
+
 /*
  * Search the dentry child list of the specified parent,
  * and move any unused dentries to the end of the unused
diff --git a/fs/internal.h b/fs/internal.h
index 7c5f01c..d232355 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -126,6 +126,7 @@ extern int invalidate_inodes(struct super_block *, bool);
  * dcache.c
  */
 extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
+extern bool has_unlinked_ancestor(struct dentry *dentry);
 
 /*
  * read_write.c
diff --git a/fs/namespace.c b/fs/namespace.c
index a45ba4f..91b1c39 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -634,6 +634,15 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry)
 	}
 	dentry->d_flags |= DCACHE_MOUNTED;
 	spin_unlock(&dentry->d_lock);
+
+	if (has_unlinked_ancestor(dentry)) {
+		spin_lock(&dentry->d_lock);
+		dentry->d_flags &= ~DCACHE_MOUNTED;
+		spin_unlock(&dentry->d_lock);
+		kfree(mp);
+		return ERR_PTR(-ENOENT);
+	}
+
 	mp->m_dentry = dentry;
 	mp->m_count = 1;
 	list_add(&mp->m_hash, chain);
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 04/10] afs: use check_submounts_and_drop()
  2013-09-04 14:05 [PATCH 00/10] [v3] safely drop directory dentry on failed revalidate Miklos Szeredi
                   ` (2 preceding siblings ...)
  2013-09-04 14:05 ` [PATCH 03/10] vfs: check unlinked ancestors before mount Miklos Szeredi
@ 2013-09-04 14:05 ` Miklos Szeredi
  2013-09-04 14:05 ` [PATCH 05/10] gfs2: " Miklos Szeredi
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 16+ messages in thread
From: Miklos Szeredi @ 2013-09-04 14:05 UTC (permalink / raw)
  To: rwheeler, avati, viro
  Cc: bfoster, dhowells, eparis, raven, linux-fsdevel, linux-kernel, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

Do have_submounts(), shrink_dcache_parent() and d_drop() atomically.

check_submounts_and_drop() can deal with negative dentries as well.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: David Howells <dhowells@redhat.com>
---
 fs/afs/dir.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 34494fb..0b74d31 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -685,16 +685,12 @@ not_found:
 	spin_unlock(&dentry->d_lock);
 
 out_bad:
-	if (dentry->d_inode) {
-		/* don't unhash if we have submounts */
-		if (have_submounts(dentry))
-			goto out_skip;
-	}
+	/* don't unhash if we have submounts */
+	if (check_submounts_and_drop(dentry) != 0)
+		goto out_skip;
 
 	_debug("dropping dentry %s/%s",
 	       parent->d_name.name, dentry->d_name.name);
-	shrink_dcache_parent(dentry);
-	d_drop(dentry);
 	dput(parent);
 	key_put(key);
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 05/10] gfs2: use check_submounts_and_drop()
  2013-09-04 14:05 [PATCH 00/10] [v3] safely drop directory dentry on failed revalidate Miklos Szeredi
                   ` (3 preceding siblings ...)
  2013-09-04 14:05 ` [PATCH 04/10] afs: use check_submounts_and_drop() Miklos Szeredi
@ 2013-09-04 14:05 ` Miklos Szeredi
  2013-09-04 14:05 ` [PATCH 06/10] nfs: " Miklos Szeredi
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 16+ messages in thread
From: Miklos Szeredi @ 2013-09-04 14:05 UTC (permalink / raw)
  To: rwheeler, avati, viro
  Cc: bfoster, dhowells, eparis, raven, linux-fsdevel, linux-kernel,
	mszeredi, Steven Whitehouse

From: Miklos Szeredi <mszeredi@suse.cz>

Do have_submounts(), shrink_dcache_parent() and d_drop() atomically.

check_submounts_and_drop() can deal with negative dentries and
non-directories as well.

Non-directories can also be mounted on.  And just like directories we don't
want these to disappear with invalidation.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/dentry.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index f2448ab..d3a5d4e 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -93,12 +93,9 @@ invalid_gunlock:
 	if (!had_lock)
 		gfs2_glock_dq_uninit(&d_gh);
 invalid:
-	if (inode && S_ISDIR(inode->i_mode)) {
-		if (have_submounts(dentry))
-			goto valid;
-		shrink_dcache_parent(dentry);
-	}
-	d_drop(dentry);
+	if (check_submounts_and_drop(dentry) != 0)
+		goto valid;
+
 	dput(parent);
 	return 0;
 
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 06/10] nfs: use check_submounts_and_drop()
  2013-09-04 14:05 [PATCH 00/10] [v3] safely drop directory dentry on failed revalidate Miklos Szeredi
                   ` (4 preceding siblings ...)
  2013-09-04 14:05 ` [PATCH 05/10] gfs2: " Miklos Szeredi
@ 2013-09-04 14:05 ` Miklos Szeredi
  2013-09-04 14:05 ` [PATCH 07/10] sysfs: " Miklos Szeredi
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 16+ messages in thread
From: Miklos Szeredi @ 2013-09-04 14:05 UTC (permalink / raw)
  To: rwheeler, avati, viro
  Cc: bfoster, dhowells, eparis, raven, linux-fsdevel, linux-kernel,
	mszeredi, Trond Myklebust

From: Miklos Szeredi <mszeredi@suse.cz>

Do have_submounts(), shrink_dcache_parent() and d_drop() atomically.

check_submounts_and_drop() can deal with negative dentries and
non-directories as well.

Non-directories can also be mounted on.  And just like directories we don't
want these to disappear with invalidation.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e474ca2b..7468735 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1135,14 +1135,13 @@ out_zap_parent:
 	if (inode && S_ISDIR(inode->i_mode)) {
 		/* Purge readdir caches. */
 		nfs_zap_caches(inode);
-		/* If we have submounts, don't unhash ! */
-		if (have_submounts(dentry))
-			goto out_valid;
 		if (dentry->d_flags & DCACHE_DISCONNECTED)
 			goto out_valid;
-		shrink_dcache_parent(dentry);
 	}
-	d_drop(dentry);
+	/* If we have submounts, don't unhash ! */
+	if (check_submounts_and_drop(dentry) != 0)
+		goto out_valid;
+
 	dput(parent);
 	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
 			__func__, dentry->d_parent->d_name.name,
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 07/10] sysfs: use check_submounts_and_drop()
  2013-09-04 14:05 [PATCH 00/10] [v3] safely drop directory dentry on failed revalidate Miklos Szeredi
                   ` (5 preceding siblings ...)
  2013-09-04 14:05 ` [PATCH 06/10] nfs: " Miklos Szeredi
@ 2013-09-04 14:05 ` Miklos Szeredi
  2013-09-04 15:17   ` Greg Kroah-Hartman
  2013-09-04 14:05 ` [PATCH 08/10] fuse: use d_materialise_unique() Miklos Szeredi
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 16+ messages in thread
From: Miklos Szeredi @ 2013-09-04 14:05 UTC (permalink / raw)
  To: rwheeler, avati, viro
  Cc: bfoster, dhowells, eparis, raven, linux-fsdevel, linux-kernel,
	mszeredi, Greg Kroah-Hartman

From: Miklos Szeredi <mszeredi@suse.cz>

Do have_submounts(), shrink_dcache_parent() and d_drop() atomically.

check_submounts_and_drop() can deal with negative dentries and
non-directories as well.

Non-directories can also be mounted on.  And just like directories we don't
want these to disappear with invalidation.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/dir.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e068e74..fde8856 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -297,7 +297,6 @@ static int sysfs_dentry_delete(const struct dentry *dentry)
 static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct sysfs_dirent *sd;
-	int is_dir;
 	int type;
 
 	if (flags & LOOKUP_RCU)
@@ -341,18 +340,15 @@ out_bad:
 	 * is performed at its new name the dentry will be readded
 	 * to the dcache hashes.
 	 */
-	is_dir = (sysfs_type(sd) == SYSFS_DIR);
 	mutex_unlock(&sysfs_mutex);
-	if (is_dir) {
-		/* If we have submounts we must allow the vfs caches
-		 * to lie about the state of the filesystem to prevent
-		 * leaks and other nasty things.
-		 */
-		if (have_submounts(dentry))
-			goto out_valid;
-		shrink_dcache_parent(dentry);
-	}
-	d_drop(dentry);
+
+	/* If we have submounts we must allow the vfs caches
+	 * to lie about the state of the filesystem to prevent
+	 * leaks and other nasty things.
+	 */
+	if (check_submounts_and_drop(dentry) != 0)
+		goto out_valid;
+
 	return 0;
 }
 
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 08/10] fuse: use d_materialise_unique()
  2013-09-04 14:05 [PATCH 00/10] [v3] safely drop directory dentry on failed revalidate Miklos Szeredi
                   ` (6 preceding siblings ...)
  2013-09-04 14:05 ` [PATCH 07/10] sysfs: " Miklos Szeredi
@ 2013-09-04 14:05 ` Miklos Szeredi
  2013-09-05 21:29   ` J. Bruce Fields
  2013-09-04 14:05 ` [PATCH 09/10] fuse: clean up return in fuse_dentry_revalidate() Miklos Szeredi
  2013-09-04 14:05 ` [PATCH 10/10] fuse: drop dentry on failed revalidate Miklos Szeredi
  9 siblings, 1 reply; 16+ messages in thread
From: Miklos Szeredi @ 2013-09-04 14:05 UTC (permalink / raw)
  To: rwheeler, avati, viro
  Cc: bfoster, dhowells, eparis, raven, linux-fsdevel, linux-kernel, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

Use d_materialise_unique() instead of d_splice_alias().  This allows dentry
subtrees to be moved to a new place if there moved, even if something is
referencing a dentry in the subtree (open fd, cwd, etc..).

This will also allow us to drop a subtree if it is found to be replaced by
something else.  In this case the disconnected subtree can later be
reconnected to its new location.

d_materialise_unique() ensures that a directory entry only ever has one
alias.  We keep fc->inst_mutex around the calls for d_materialise_unique()
on directories to prevent a race with mkdir "stealing" the inode.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dir.c | 69 ++++++++++++++++++++++-------------------------------------
 1 file changed, 26 insertions(+), 43 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 72a5d5b..131d14b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -267,26 +267,6 @@ int fuse_valid_type(int m)
 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
 }
 
-/*
- * Add a directory inode to a dentry, ensuring that no other dentry
- * refers to this inode.  Called with fc->inst_mutex.
- */
-static struct dentry *fuse_d_add_directory(struct dentry *entry,
-					   struct inode *inode)
-{
-	struct dentry *alias = d_find_alias(inode);
-	if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
-		/* This tries to shrink the subtree below alias */
-		fuse_invalidate_entry(alias);
-		dput(alias);
-		if (!hlist_empty(&inode->i_dentry))
-			return ERR_PTR(-EBUSY);
-	} else {
-		dput(alias);
-	}
-	return d_splice_alias(inode, entry);
-}
-
 int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
 		     struct fuse_entry_out *outarg, struct inode **inode)
 {
@@ -345,6 +325,24 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
 	return err;
 }
 
+static struct dentry *fuse_materialise_dentry(struct dentry *dentry,
+					      struct inode *inode)
+{
+	struct dentry *newent;
+
+	if (inode && S_ISDIR(inode->i_mode)) {
+		struct fuse_conn *fc = get_fuse_conn(inode);
+
+		mutex_lock(&fc->inst_mutex);
+		newent = d_materialise_unique(dentry, inode);
+		mutex_unlock(&fc->inst_mutex);
+	} else {
+		newent = d_materialise_unique(dentry, inode);
+	}
+
+	return newent;
+}
+
 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 				  unsigned int flags)
 {
@@ -352,7 +350,6 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	struct fuse_entry_out outarg;
 	struct inode *inode;
 	struct dentry *newent;
-	struct fuse_conn *fc = get_fuse_conn(dir);
 	bool outarg_valid = true;
 
 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
@@ -368,16 +365,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
 		goto out_iput;
 
-	if (inode && S_ISDIR(inode->i_mode)) {
-		mutex_lock(&fc->inst_mutex);
-		newent = fuse_d_add_directory(entry, inode);
-		mutex_unlock(&fc->inst_mutex);
-		err = PTR_ERR(newent);
-		if (IS_ERR(newent))
-			goto out_iput;
-	} else {
-		newent = d_splice_alias(inode, entry);
-	}
+	newent = fuse_materialise_dentry(entry, inode);
+	err = PTR_ERR(newent);
+	if (IS_ERR(newent))
+		goto out_err;
 
 	entry = newent ? newent : entry;
 	if (outarg_valid)
@@ -1275,18 +1266,10 @@ static int fuse_direntplus_link(struct file *file,
 	if (!inode)
 		goto out;
 
-	if (S_ISDIR(inode->i_mode)) {
-		mutex_lock(&fc->inst_mutex);
-		alias = fuse_d_add_directory(dentry, inode);
-		mutex_unlock(&fc->inst_mutex);
-		err = PTR_ERR(alias);
-		if (IS_ERR(alias)) {
-			iput(inode);
-			goto out;
-		}
-	} else {
-		alias = d_splice_alias(inode, dentry);
-	}
+	alias = fuse_materialise_dentry(dentry, inode);
+	err = PTR_ERR(alias);
+	if (IS_ERR(alias))
+		goto out;
 
 	if (alias) {
 		dput(dentry);
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 09/10] fuse: clean up return in fuse_dentry_revalidate()
  2013-09-04 14:05 [PATCH 00/10] [v3] safely drop directory dentry on failed revalidate Miklos Szeredi
                   ` (7 preceding siblings ...)
  2013-09-04 14:05 ` [PATCH 08/10] fuse: use d_materialise_unique() Miklos Szeredi
@ 2013-09-04 14:05 ` Miklos Szeredi
  2013-09-04 14:05 ` [PATCH 10/10] fuse: drop dentry on failed revalidate Miklos Szeredi
  9 siblings, 0 replies; 16+ messages in thread
From: Miklos Szeredi @ 2013-09-04 14:05 UTC (permalink / raw)
  To: rwheeler, avati, viro
  Cc: bfoster, dhowells, eparis, raven, linux-fsdevel, linux-kernel, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

On errors unrelated to the filesystem's state (ENOMEM, ENOTCONN) return the
error itself from ->d_revalidate() insted of returning zero (invalid).

Also make a common label for invalidating the dentry.  This will be used by
the next patch.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dir.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 131d14b..25c6cfe 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -182,10 +182,11 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 	struct inode *inode;
 	struct dentry *parent;
 	struct fuse_conn *fc;
+	int ret;
 
 	inode = ACCESS_ONCE(entry->d_inode);
 	if (inode && is_bad_inode(inode))
-		return 0;
+		goto invalid;
 	else if (fuse_dentry_time(entry) < get_jiffies_64()) {
 		int err;
 		struct fuse_entry_out outarg;
@@ -195,20 +196,23 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 
 		/* For negative dentries, always do a fresh lookup */
 		if (!inode)
-			return 0;
+			goto invalid;
 
+		ret = -ECHILD;
 		if (flags & LOOKUP_RCU)
-			return -ECHILD;
+			goto out;
 
 		fc = get_fuse_conn(inode);
 		req = fuse_get_req_nopages(fc);
+		ret = PTR_ERR(req);
 		if (IS_ERR(req))
-			return 0;
+			goto out;
 
 		forget = fuse_alloc_forget();
 		if (!forget) {
 			fuse_put_request(fc, req);
-			return 0;
+			ret = -ENOMEM;
+			goto out;
 		}
 
 		attr_version = fuse_get_attr_version(fc);
@@ -227,7 +231,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 			struct fuse_inode *fi = get_fuse_inode(inode);
 			if (outarg.nodeid != get_node_id(inode)) {
 				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
-				return 0;
+				goto invalid;
 			}
 			spin_lock(&fc->lock);
 			fi->nlookup++;
@@ -235,7 +239,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 		}
 		kfree(forget);
 		if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
-			return 0;
+			goto invalid;
 
 		fuse_change_attributes(inode, &outarg.attr,
 				       entry_attr_timeout(&outarg),
@@ -249,7 +253,13 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 			dput(parent);
 		}
 	}
-	return 1;
+	ret = 1;
+out:
+	return ret;
+
+invalid:
+	ret = 0;
+	goto out;
 }
 
 static int invalid_nodeid(u64 nodeid)
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 10/10] fuse: drop dentry on failed revalidate
  2013-09-04 14:05 [PATCH 00/10] [v3] safely drop directory dentry on failed revalidate Miklos Szeredi
                   ` (8 preceding siblings ...)
  2013-09-04 14:05 ` [PATCH 09/10] fuse: clean up return in fuse_dentry_revalidate() Miklos Szeredi
@ 2013-09-04 14:05 ` Miklos Szeredi
  9 siblings, 0 replies; 16+ messages in thread
From: Miklos Szeredi @ 2013-09-04 14:05 UTC (permalink / raw)
  To: rwheeler, avati, viro
  Cc: bfoster, dhowells, eparis, raven, linux-fsdevel, linux-kernel, mszeredi

From: Anand Avati <avati@redhat.com>

Drop a subtree when we find that it has moved or been delated.  This can be
done as long as there are no submounts under this location.

If the directory was moved and we come across the same directory in a
future lookup it will be reconnected by d_materialise_unique().

Signed-off-by: Anand Avati <avati@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dir.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 25c6cfe..0e6961a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -259,6 +259,8 @@ out:
 
 invalid:
 	ret = 0;
+	if (check_submounts_and_drop(entry) != 0)
+		ret = 1;
 	goto out;
 }
 
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 07/10] sysfs: use check_submounts_and_drop()
  2013-09-04 14:05 ` [PATCH 07/10] sysfs: " Miklos Szeredi
@ 2013-09-04 15:17   ` Greg Kroah-Hartman
  0 siblings, 0 replies; 16+ messages in thread
From: Greg Kroah-Hartman @ 2013-09-04 15:17 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: rwheeler, avati, viro, bfoster, dhowells, eparis, raven,
	linux-fsdevel, linux-kernel, mszeredi

On Wed, Sep 04, 2013 at 04:05:53PM +0200, Miklos Szeredi wrote:
> From: Miklos Szeredi <mszeredi@suse.cz>
> 
> Do have_submounts(), shrink_dcache_parent() and d_drop() atomically.
> 
> check_submounts_and_drop() can deal with negative dentries and
> non-directories as well.
> 
> Non-directories can also be mounted on.  And just like directories we don't
> want these to disappear with invalidation.
> 
> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
> CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 02/10] vfs: check submounts and drop atomically
  2013-09-04 14:05 ` [PATCH 02/10] vfs: check submounts and drop atomically Miklos Szeredi
@ 2013-09-04 17:58   ` Al Viro
  2013-09-05  9:11     ` Miklos Szeredi
  0 siblings, 1 reply; 16+ messages in thread
From: Al Viro @ 2013-09-04 17:58 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: rwheeler, avati, bfoster, dhowells, eparis, raven, linux-fsdevel,
	linux-kernel, mszeredi, Steven Whitehouse, Trond Myklebust,
	Greg Kroah-Hartman

On Wed, Sep 04, 2013 at 04:05:48PM +0200, Miklos Szeredi wrote:

> +static void check_and_drop(void *_data, struct dentry *dentry)
> +{
> +	struct select_data *data = _data;
> +
> +	/* We're only interested in the root of this subtree */
> +	if (data->start == dentry) {
> +		if (d_mountpoint(dentry))
> +			data->found = -EBUSY;
> +		if (!data->found)
> +			__d_drop(dentry);
> +	}
> +}

Wouldn't it be better to do that in caller?  Granted, it's not a hot
path, but... why bother calling it for each dentry in a subtree, only
to return immediately on all calls but the last one?

> +static int __check_submounts_and_drop(struct dentry *parent,
> +					     struct list_head *dispose)
> +{
> +	struct select_data data = {
> +		.start = parent,
> +		.dispose = dispose,
> +		.found = 0,
> +	};
> +
> +	d_walk(parent, &data, check_and_collect, check_and_drop);
> +
> +	return data.found;
> +}

Incidentally, I would rather expand that in the only caller...

> +/**
> + * check_submounts_and_drop - prune dcache, check for submounts and drop
> + *
> + * All done as a single atomic operation relative to has_unlinked_ancestor().
> + * Returns 0 if successfully unhashed @parent.  If there were submounts then
> + * return -EBUSY.
> + *
> + * @dentry: dentry to prune and drop
> + */
> +int check_submounts_and_drop(struct dentry *dentry)
> +{
> +	int ret = 0;
> +
> +	/* Negative dentries can be dropped without further checks */
> +	if (!dentry->d_inode) {
> +		d_drop(dentry);
> +		goto out;
> +	}
> +
> +	spin_lock(&dentry->d_lock);
> +	if (d_unhashed(dentry))
> +		goto out_unlock;
> +	if (list_empty(&dentry->d_subdirs)) {
> +		if (d_mountpoint(dentry)) {
> +			ret = -EBUSY;
> +			goto out_unlock;
> +		}
> +		__d_drop(dentry);
> +		goto out_unlock;
> +	}
> +	spin_unlock(&dentry->d_lock);
> +
> +	for (;;) {
> +		LIST_HEAD(dispose);
> +		ret = __check_submounts_and_drop(dentry, &dispose);
> +		if (!list_empty(&dispose))
> +			shrink_dentry_list(&dispose);
> +
> +		if (ret <= 0)
> +			break;
> +
> +		cond_resched();
> +	}
> +
> +out:
> +	return ret;
> +
> +out_unlock:
> +	spin_unlock(&dentry->d_lock);
> +	goto out;
> +}
> +EXPORT_SYMBOL(check_submounts_and_drop);

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 01/10] vfs: add d_walk()
  2013-09-04 14:05 ` [PATCH 01/10] vfs: add d_walk() Miklos Szeredi
@ 2013-09-04 18:12   ` Al Viro
  0 siblings, 0 replies; 16+ messages in thread
From: Al Viro @ 2013-09-04 18:12 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: rwheeler, avati, bfoster, dhowells, eparis, raven, linux-fsdevel,
	linux-kernel, mszeredi

On Wed, Sep 04, 2013 at 04:05:47PM +0200, Miklos Szeredi wrote:
> From: Miklos Szeredi <mszeredi@suse.cz>
> 
> This one replaces three instances open coded tree walking (have_submounts,
> select_parent, d_genocide) with a common helper.
> 
> In addition to slightly reducing the kernel size, this simplifies the
> callers and makes them less bug prone.

Two notes:

1) I'd probably kill select_parent() - it made sense when it was a huge
function called in a loop, but when it's a simple call of d_walk()...
Might as well do it directly in shrink_dcache_parent().  Moreover, in
that case we'd be free to embed list_head into your struct select_data,
rather than passing pointers.

2) I'm not sure we need the "leave" callback at all; the only user is
d_genocide() and AFAICS it can bloody well be done as part of
d_genocide_check() - we care about having it done to all nodes in
the tree, but we don't really care about the order in which it's
done...

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 02/10] vfs: check submounts and drop atomically
  2013-09-04 17:58   ` Al Viro
@ 2013-09-05  9:11     ` Miklos Szeredi
  0 siblings, 0 replies; 16+ messages in thread
From: Miklos Szeredi @ 2013-09-05  9:11 UTC (permalink / raw)
  To: Al Viro
  Cc: Ric Wheeler, Anand Avati, Brian Foster, David Howells,
	Eric Paris, Ian Kent, Linux-Fsdevel, Kernel Mailing List,
	mszeredi, Steven Whitehouse, Trond Myklebust, Greg Kroah-Hartman

On Wed, Sep 4, 2013 at 7:58 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> On Wed, Sep 04, 2013 at 04:05:48PM +0200, Miklos Szeredi wrote:
>
>> +static void check_and_drop(void *_data, struct dentry *dentry)
>> +{
>> +     struct select_data *data = _data;
>> +
>> +     /* We're only interested in the root of this subtree */
>> +     if (data->start == dentry) {
>> +             if (d_mountpoint(dentry))
>> +                     data->found = -EBUSY;
>> +             if (!data->found)
>> +                     __d_drop(dentry);
>> +     }
>> +}
>
> Wouldn't it be better to do that in caller?

It needs to stay under d_lock.  Moving the unlock to caller would be
rather awkward (if we quit walking the start dentry is not even
locked, etc..)  So instead of a ->leave() callback, I just added a
->finish() callback which is called once, when the walk was finished
successfully.

Fixed your other comments too.  I'll do a quick testing and then post
the changed patches.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 08/10] fuse: use d_materialise_unique()
  2013-09-04 14:05 ` [PATCH 08/10] fuse: use d_materialise_unique() Miklos Szeredi
@ 2013-09-05 21:29   ` J. Bruce Fields
  0 siblings, 0 replies; 16+ messages in thread
From: J. Bruce Fields @ 2013-09-05 21:29 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: rwheeler, avati, viro, bfoster, dhowells, eparis, raven,
	linux-fsdevel, linux-kernel, mszeredi

On Wed, Sep 04, 2013 at 04:05:54PM +0200, Miklos Szeredi wrote:
> From: Miklos Szeredi <mszeredi@suse.cz>
> 
> Use d_materialise_unique() instead of d_splice_alias().  This allows dentry
> subtrees to be moved to a new place if there moved, even if something is
> referencing a dentry in the subtree (open fd, cwd, etc..).
> 
> This will also allow us to drop a subtree if it is found to be replaced by
> something else.  In this case the disconnected subtree can later be
> reconnected to its new location.
> 
> d_materialise_unique() ensures that a directory entry only ever has one
> alias.  We keep fc->inst_mutex around the calls for d_materialise_unique()
> on directories to prevent a race with mkdir "stealing" the inode.

One worry I have with d_materialise_unique is the

	anon->d_flags &= ~DCACHE_DISCONNECTED

at the end of __d_materialise_dentry.  The export code depends on
DCACHE_DISCONNECTED not being cleared until it's verified that the
dentry is connected all the way back up to the root of the filesystem,
and it looks like this can clear DCACHE_DISCONNECTED sooner than that.
This hasn't been an issue since all the exportable filesystems use
d_splice_alias().

I think maybe this is just a bug and we could safely delete that line.

--b.

> 
> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
> ---
>  fs/fuse/dir.c | 69 ++++++++++++++++++++++-------------------------------------
>  1 file changed, 26 insertions(+), 43 deletions(-)
> 
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index 72a5d5b..131d14b 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -267,26 +267,6 @@ int fuse_valid_type(int m)
>  		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
>  }
>  
> -/*
> - * Add a directory inode to a dentry, ensuring that no other dentry
> - * refers to this inode.  Called with fc->inst_mutex.
> - */
> -static struct dentry *fuse_d_add_directory(struct dentry *entry,
> -					   struct inode *inode)
> -{
> -	struct dentry *alias = d_find_alias(inode);
> -	if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
> -		/* This tries to shrink the subtree below alias */
> -		fuse_invalidate_entry(alias);
> -		dput(alias);
> -		if (!hlist_empty(&inode->i_dentry))
> -			return ERR_PTR(-EBUSY);
> -	} else {
> -		dput(alias);
> -	}
> -	return d_splice_alias(inode, entry);
> -}
> -
>  int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
>  		     struct fuse_entry_out *outarg, struct inode **inode)
>  {
> @@ -345,6 +325,24 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
>  	return err;
>  }
>  
> +static struct dentry *fuse_materialise_dentry(struct dentry *dentry,
> +					      struct inode *inode)
> +{
> +	struct dentry *newent;
> +
> +	if (inode && S_ISDIR(inode->i_mode)) {
> +		struct fuse_conn *fc = get_fuse_conn(inode);
> +
> +		mutex_lock(&fc->inst_mutex);
> +		newent = d_materialise_unique(dentry, inode);
> +		mutex_unlock(&fc->inst_mutex);
> +	} else {
> +		newent = d_materialise_unique(dentry, inode);
> +	}
> +
> +	return newent;
> +}
> +
>  static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
>  				  unsigned int flags)
>  {
> @@ -352,7 +350,6 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
>  	struct fuse_entry_out outarg;
>  	struct inode *inode;
>  	struct dentry *newent;
> -	struct fuse_conn *fc = get_fuse_conn(dir);
>  	bool outarg_valid = true;
>  
>  	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
> @@ -368,16 +365,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
>  	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
>  		goto out_iput;
>  
> -	if (inode && S_ISDIR(inode->i_mode)) {
> -		mutex_lock(&fc->inst_mutex);
> -		newent = fuse_d_add_directory(entry, inode);
> -		mutex_unlock(&fc->inst_mutex);
> -		err = PTR_ERR(newent);
> -		if (IS_ERR(newent))
> -			goto out_iput;
> -	} else {
> -		newent = d_splice_alias(inode, entry);
> -	}
> +	newent = fuse_materialise_dentry(entry, inode);
> +	err = PTR_ERR(newent);
> +	if (IS_ERR(newent))
> +		goto out_err;
>  
>  	entry = newent ? newent : entry;
>  	if (outarg_valid)
> @@ -1275,18 +1266,10 @@ static int fuse_direntplus_link(struct file *file,
>  	if (!inode)
>  		goto out;
>  
> -	if (S_ISDIR(inode->i_mode)) {
> -		mutex_lock(&fc->inst_mutex);
> -		alias = fuse_d_add_directory(dentry, inode);
> -		mutex_unlock(&fc->inst_mutex);
> -		err = PTR_ERR(alias);
> -		if (IS_ERR(alias)) {
> -			iput(inode);
> -			goto out;
> -		}
> -	} else {
> -		alias = d_splice_alias(inode, dentry);
> -	}
> +	alias = fuse_materialise_dentry(dentry, inode);
> +	err = PTR_ERR(alias);
> +	if (IS_ERR(alias))
> +		goto out;
>  
>  	if (alias) {
>  		dput(dentry);
> -- 
> 1.8.1.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2013-09-05 21:29 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-09-04 14:05 [PATCH 00/10] [v3] safely drop directory dentry on failed revalidate Miklos Szeredi
2013-09-04 14:05 ` [PATCH 01/10] vfs: add d_walk() Miklos Szeredi
2013-09-04 18:12   ` Al Viro
2013-09-04 14:05 ` [PATCH 02/10] vfs: check submounts and drop atomically Miklos Szeredi
2013-09-04 17:58   ` Al Viro
2013-09-05  9:11     ` Miklos Szeredi
2013-09-04 14:05 ` [PATCH 03/10] vfs: check unlinked ancestors before mount Miklos Szeredi
2013-09-04 14:05 ` [PATCH 04/10] afs: use check_submounts_and_drop() Miklos Szeredi
2013-09-04 14:05 ` [PATCH 05/10] gfs2: " Miklos Szeredi
2013-09-04 14:05 ` [PATCH 06/10] nfs: " Miklos Szeredi
2013-09-04 14:05 ` [PATCH 07/10] sysfs: " Miklos Szeredi
2013-09-04 15:17   ` Greg Kroah-Hartman
2013-09-04 14:05 ` [PATCH 08/10] fuse: use d_materialise_unique() Miklos Szeredi
2013-09-05 21:29   ` J. Bruce Fields
2013-09-04 14:05 ` [PATCH 09/10] fuse: clean up return in fuse_dentry_revalidate() Miklos Szeredi
2013-09-04 14:05 ` [PATCH 10/10] fuse: drop dentry on failed revalidate Miklos Szeredi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).