linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 00/16] vfs: atomic open v4 (part 1)
@ 2012-04-25 12:44 Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 01/16] vfs: split do_lookup() Miklos Szeredi
                   ` (17 more replies)
  0 siblings, 18 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

Part 1 of the atomic open series (split the patch bomb into 2 parts).  This goes
as far as moving NFS open code out from ->revalidate and into ->open.

Al, can you please review and apply?

git tree is here:

  git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git atomic-open.v4

Thanks,
Miklos
---


Miklos Szeredi (16):
      vfs: split do_lookup()
      vfs: do_last(): make exit RCU safe
      vfs: do_last(): inline walk_component()
      vfs: do_last(): use inode variable
      vfs: make follow_link check RCU safe
      vfs: do_last(): make ENOENT exit RCU safe
      vfs: do_last(): check LOOKUP_DIRECTORY
      vfs: do_last(): only return EISDIR for O_CREAT
      vfs: do_last(): add audit_inode before open
      vfs: do_last() common post lookup
      vfs: split __dentry_open()
      vfs: do_dentry_open(): don't put filp
      vfs: nameidata_to_filp(): inline __dentry_open()
      vfs: nameidata_to_filp(): don't throw away file on error
      vfs: retry last component if opening stale dentry
      nfs: don't open in ->d_revalidate

---
 fs/internal.h         |    1 +
 fs/namei.c            |  151 ++++++++++++++++++++++++++++++++++++------------
 fs/nfs/dir.c          |   56 ++-----------------
 fs/nfs/file.c         |   77 ++++++++++++++++++++++++-
 fs/open.c             |   76 ++++++++++++++++++------
 include/linux/errno.h |    1 +
 6 files changed, 250 insertions(+), 112 deletions(-)


^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 01/16] vfs: split do_lookup()
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 02/16] vfs: do_last(): make exit RCU safe Miklos Szeredi
                   ` (16 subsequent siblings)
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

Split do_lookup() into two functions:

  lookup_fast() - does cached lookup without i_mutex
  lookup_slow() - does lookup with i_mutex

Both follow managed dentries.

The new functions are needed by atomic_open.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c |   59 +++++++++++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 0062dd1..fcd807a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1138,8 +1138,8 @@ static struct dentry *__lookup_hash(struct qstr *name,
  *  small and for now I'd prefer to have fast path as straight as possible.
  *  It _is_ time-critical.
  */
-static int do_lookup(struct nameidata *nd, struct qstr *name,
-			struct path *path, struct inode **inode)
+static int lookup_fast(struct nameidata *nd, struct qstr *name,
+		       struct path *path, struct inode **inode)
 {
 	struct vfsmount *mnt = nd->path.mnt;
 	struct dentry *dentry, *parent = nd->path.dentry;
@@ -1208,7 +1208,7 @@ unlazy:
 			goto need_lookup;
 		}
 	}
-done:
+
 	path->mnt = mnt;
 	path->dentry = dentry;
 	err = follow_managed(path, nd->flags);
@@ -1222,6 +1222,17 @@ done:
 	return 0;
 
 need_lookup:
+	return 1;
+}
+
+/* Fast lookup failed, do it the slow way */
+static int lookup_slow(struct nameidata *nd, struct qstr *name,
+		       struct path *path)
+{
+	struct dentry *dentry, *parent;
+	int err;
+
+	parent = nd->path.dentry;
 	BUG_ON(nd->inode != parent->d_inode);
 
 	mutex_lock(&parent->d_inode->i_mutex);
@@ -1229,7 +1240,16 @@ need_lookup:
 	mutex_unlock(&parent->d_inode->i_mutex);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
-	goto done;
+	path->mnt = nd->path.mnt;
+	path->dentry = dentry;
+	err = follow_managed(path, nd->flags);
+	if (unlikely(err < 0)) {
+		path_put_conditional(path, nd);
+		return err;
+	}
+	if (err)
+		nd->flags |= LOOKUP_JUMPED;
+	return 0;
 }
 
 static inline int may_lookup(struct nameidata *nd)
@@ -1301,21 +1321,26 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
 	 */
 	if (unlikely(type != LAST_NORM))
 		return handle_dots(nd, type);
-	err = do_lookup(nd, name, path, &inode);
+	err = lookup_fast(nd, name, path, &inode);
 	if (unlikely(err)) {
-		terminate_walk(nd);
-		return err;
-	}
-	if (!inode) {
-		path_to_nameidata(path, nd);
-		terminate_walk(nd);
-		return -ENOENT;
+		if (err < 0)
+			goto out_err;
+
+		err = lookup_slow(nd, name, path);
+		if (err < 0)
+			goto out_err;
+
+		inode = path->dentry->d_inode;
 	}
+	err = -ENOENT;
+	if (!inode)
+		goto out_path_put;
+
 	if (should_follow_link(inode, follow)) {
 		if (nd->flags & LOOKUP_RCU) {
 			if (unlikely(unlazy_walk(nd, path->dentry))) {
-				terminate_walk(nd);
-				return -ECHILD;
+				err = -ECHILD;
+				goto out_err;
 			}
 		}
 		BUG_ON(inode != path->dentry->d_inode);
@@ -1324,6 +1349,12 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
 	path_to_nameidata(path, nd);
 	nd->inode = inode;
 	return 0;
+
+out_path_put:
+	path_to_nameidata(path, nd);
+out_err:
+	terminate_walk(nd);
+	return err;
 }
 
 /*
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 02/16] vfs: do_last(): make exit RCU safe
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 01/16] vfs: split do_lookup() Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 03/16] vfs: do_last(): inline walk_component() Miklos Szeredi
                   ` (15 subsequent siblings)
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

Allow returning from do_last() with LOOKUP_RCU still set on the "out:" and
"exit:" labels.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index fcd807a..020a62f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2385,7 +2385,7 @@ common:
 out:
 	if (want_write)
 		mnt_drop_write(nd->path.mnt);
-	path_put(&nd->path);
+	terminate_walk(nd);
 	return filp;
 
 exit_mutex_unlock:
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 03/16] vfs: do_last(): inline walk_component()
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 01/16] vfs: split do_lookup() Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 02/16] vfs: do_last(): make exit RCU safe Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 04/16] vfs: do_last(): use inode variable Miklos Szeredi
                   ` (14 subsequent siblings)
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

Copy walk_component() into do_lookup().

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c |   35 ++++++++++++++++++++++++++++++-----
 1 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 020a62f..46d4bf6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2202,6 +2202,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	int want_write = 0;
 	int acc_mode = op->acc_mode;
 	struct file *filp;
+	struct inode *inode;
 	int error;
 
 	nd->flags &= ~LOOKUP_PARENT;
@@ -2239,12 +2240,36 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 		if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
 			symlink_ok = 1;
 		/* we _can_ be in RCU mode here */
-		error = walk_component(nd, path, &nd->last, LAST_NORM,
-					!symlink_ok);
-		if (error < 0)
-			return ERR_PTR(error);
-		if (error) /* symlink */
+		error = lookup_fast(nd, &nd->last, path, &inode);
+		if (unlikely(error)) {
+			if (error < 0)
+				goto exit;
+
+			error = lookup_slow(nd, &nd->last, path);
+			if (error < 0)
+				goto exit;
+
+			inode = path->dentry->d_inode;
+		}
+		error = -ENOENT;
+		if (!inode) {
+			path_to_nameidata(path, nd);
+			goto exit;
+		}
+
+		if (should_follow_link(inode, !symlink_ok)) {
+			if (nd->flags & LOOKUP_RCU) {
+				if (unlikely(unlazy_walk(nd, path->dentry))) {
+					error = -ECHILD;
+					goto exit;
+				}
+			}
+			BUG_ON(inode != path->dentry->d_inode);
 			return NULL;
+		}
+		path_to_nameidata(path, nd);
+		nd->inode = inode;
+
 		/* sayonara */
 		error = complete_walk(nd);
 		if (error)
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 04/16] vfs: do_last(): use inode variable
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (2 preceding siblings ...)
  2012-04-25 12:44 ` [PATCH 03/16] vfs: do_last(): inline walk_component() Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-05-01  4:06   ` Nick Piggin
  2012-04-25 12:44 ` [PATCH 05/16] vfs: make follow_link check RCU safe Miklos Szeredi
                   ` (13 subsequent siblings)
  17 siblings, 1 reply; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

Use helper variable instead of path->dentry->d_inode before complete_walk().
This will allow this code to be used in RCU mode.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c |    7 ++++---
 1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 46d4bf6..f21ddb3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2360,15 +2360,16 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	if (error)
 		nd->flags |= LOOKUP_JUMPED;
 
+	inode = path->dentry->d_inode;
 	error = -ENOENT;
-	if (!path->dentry->d_inode)
+	if (!inode)
 		goto exit_dput;
 
-	if (path->dentry->d_inode->i_op->follow_link)
+	if (inode->i_op->follow_link)
 		return NULL;
 
 	path_to_nameidata(path, nd);
-	nd->inode = path->dentry->d_inode;
+	nd->inode = inode;
 	/* Why this, you ask?  _Now_ we might have grown LOOKUP_JUMPED... */
 	error = complete_walk(nd);
 	if (error)
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 05/16] vfs: make follow_link check RCU safe
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (3 preceding siblings ...)
  2012-04-25 12:44 ` [PATCH 04/16] vfs: do_last(): use inode variable Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 06/16] vfs: do_last(): make ENOENT exit " Miklos Szeredi
                   ` (12 subsequent siblings)
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

This will allow this code to be used in RCU mode.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c |   12 ++++++++++--
 1 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index f21ddb3..a3a0fa1 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2203,6 +2203,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	int acc_mode = op->acc_mode;
 	struct file *filp;
 	struct inode *inode;
+	int symlink_ok = 0;
 	int error;
 
 	nd->flags &= ~LOOKUP_PARENT;
@@ -2234,7 +2235,6 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	}
 
 	if (!(open_flag & O_CREAT)) {
-		int symlink_ok = 0;
 		if (nd->last.name[nd->last.len])
 			nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 		if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
@@ -2365,8 +2365,16 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	if (!inode)
 		goto exit_dput;
 
-	if (inode->i_op->follow_link)
+	if (should_follow_link(inode, !symlink_ok)) {
+		if (nd->flags & LOOKUP_RCU) {
+			if (unlikely(unlazy_walk(nd, path->dentry))) {
+				error = -ECHILD;
+				goto exit;
+			}
+		}
+		BUG_ON(inode != path->dentry->d_inode);
 		return NULL;
+	}
 
 	path_to_nameidata(path, nd);
 	nd->inode = inode;
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 06/16] vfs: do_last(): make ENOENT exit RCU safe
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (4 preceding siblings ...)
  2012-04-25 12:44 ` [PATCH 05/16] vfs: make follow_link check RCU safe Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 07/16] vfs: do_last(): check LOOKUP_DIRECTORY Miklos Szeredi
                   ` (11 subsequent siblings)
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

This will allow this code to be used in RCU mode.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c |    6 ++++--
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index a3a0fa1..d9b0eba 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2362,8 +2362,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 
 	inode = path->dentry->d_inode;
 	error = -ENOENT;
-	if (!inode)
-		goto exit_dput;
+	if (!inode) {
+		path_to_nameidata(path, nd);
+		goto exit;
+	}
 
 	if (should_follow_link(inode, !symlink_ok)) {
 		if (nd->flags & LOOKUP_RCU) {
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 07/16] vfs: do_last(): check LOOKUP_DIRECTORY
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (5 preceding siblings ...)
  2012-04-25 12:44 ` [PATCH 06/16] vfs: do_last(): make ENOENT exit " Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 08/16] vfs: do_last(): only return EISDIR for O_CREAT Miklos Szeredi
                   ` (10 subsequent siblings)
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

Check for ENOTDIR before finishing open.  This allows this code to be shared
between O_CREAT and plain opens.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index d9b0eba..597e52d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2387,6 +2387,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	error = -EISDIR;
 	if (S_ISDIR(nd->inode->i_mode))
 		goto exit;
+	error = -ENOTDIR;
+	if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup)
+		goto exit;
 ok:
 	if (!S_ISREG(nd->inode->i_mode))
 		will_truncate = 0;
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 08/16] vfs: do_last(): only return EISDIR for O_CREAT
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (6 preceding siblings ...)
  2012-04-25 12:44 ` [PATCH 07/16] vfs: do_last(): check LOOKUP_DIRECTORY Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 09/16] vfs: do_last(): add audit_inode before open Miklos Szeredi
                   ` (9 subsequent siblings)
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

This allows this code to be shared between O_CREAT and plain opens.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 597e52d..4fd802c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2385,7 +2385,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	if (error)
 		return ERR_PTR(error);
 	error = -EISDIR;
-	if (S_ISDIR(nd->inode->i_mode))
+	if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
 		goto exit;
 	error = -ENOTDIR;
 	if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup)
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 09/16] vfs: do_last(): add audit_inode before open
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (7 preceding siblings ...)
  2012-04-25 12:44 ` [PATCH 08/16] vfs: do_last(): only return EISDIR for O_CREAT Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 10/16] vfs: do_last() common post lookup Miklos Szeredi
                   ` (8 subsequent siblings)
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

This allows this code to be shared between O_CREAT and plain opens.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 4fd802c..6e94c14 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2390,6 +2390,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	error = -ENOTDIR;
 	if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup)
 		goto exit;
+	audit_inode(pathname, nd->path.dentry);
 ok:
 	if (!S_ISREG(nd->inode->i_mode))
 		will_truncate = 0;
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 10/16] vfs: do_last() common post lookup
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (8 preceding siblings ...)
  2012-04-25 12:44 ` [PATCH 09/16] vfs: do_last(): add audit_inode before open Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 11/16] vfs: split __dentry_open() Miklos Szeredi
                   ` (7 subsequent siblings)
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

Now the post lookup code can be shared between O_CREAT and plain opens since
they are essentially the same.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c |   33 ++-------------------------------
 1 files changed, 2 insertions(+), 31 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 6e94c14..f7e7d76 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2251,37 +2251,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 
 			inode = path->dentry->d_inode;
 		}
-		error = -ENOENT;
-		if (!inode) {
-			path_to_nameidata(path, nd);
-			goto exit;
-		}
-
-		if (should_follow_link(inode, !symlink_ok)) {
-			if (nd->flags & LOOKUP_RCU) {
-				if (unlikely(unlazy_walk(nd, path->dentry))) {
-					error = -ECHILD;
-					goto exit;
-				}
-			}
-			BUG_ON(inode != path->dentry->d_inode);
-			return NULL;
-		}
-		path_to_nameidata(path, nd);
-		nd->inode = inode;
-
-		/* sayonara */
-		error = complete_walk(nd);
-		if (error)
-			return ERR_PTR(error);
-
-		error = -ENOTDIR;
-		if (nd->flags & LOOKUP_DIRECTORY) {
-			if (!nd->inode->i_op->lookup)
-				goto exit;
-		}
-		audit_inode(pathname, nd->path.dentry);
-		goto ok;
+		goto finish_lookup;
 	}
 
 	/* create side of things */
@@ -2361,6 +2331,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 		nd->flags |= LOOKUP_JUMPED;
 
 	inode = path->dentry->d_inode;
+finish_lookup:
 	error = -ENOENT;
 	if (!inode) {
 		path_to_nameidata(path, nd);
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 11/16] vfs: split __dentry_open()
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (9 preceding siblings ...)
  2012-04-25 12:44 ` [PATCH 10/16] vfs: do_last() common post lookup Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 12/16] vfs: do_dentry_open(): don't put filp Miklos Szeredi
                   ` (6 subsequent siblings)
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

Split __dentry_open() into two functions:

  do_dentry_open() - does most of the actual work, doesn't put file on failure
  open_check_o_direct() - after a successful open, checks direct_IO method

This will allow i_op->atomic_open to do just the file initialization and leave
the direct_IO checking to the VFS.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/internal.h |    1 +
 fs/open.c     |   47 +++++++++++++++++++++++++++++++++--------------
 2 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index 9962c59..4d69fdd 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -100,6 +100,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
 
 extern long do_handle_open(int mountdirfd,
 			   struct file_handle __user *ufh, int open_flag);
+extern int open_check_o_direct(struct file *f);
 
 /*
  * inode.c
diff --git a/fs/open.c b/fs/open.c
index 5720854..971b474 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -644,10 +644,23 @@ static inline int __get_file_write_access(struct inode *inode,
 	return error;
 }
 
-static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
-					struct file *f,
-					int (*open)(struct inode *, struct file *),
-					const struct cred *cred)
+int open_check_o_direct(struct file *f)
+{
+	/* NB: we're sure to have correct a_ops only after f_op->open */
+	if (f->f_flags & O_DIRECT) {
+		if (!f->f_mapping->a_ops ||
+		    ((!f->f_mapping->a_ops->direct_IO) &&
+		    (!f->f_mapping->a_ops->get_xip_mem))) {
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
+				   struct file *f,
+				   int (*open)(struct inode *, struct file *),
+				   const struct cred *cred)
 {
 	static const struct file_operations empty_fops = {};
 	struct inode *inode;
@@ -703,16 +716,6 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 
 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
 
-	/* NB: we're sure to have correct a_ops only after f_op->open */
-	if (f->f_flags & O_DIRECT) {
-		if (!f->f_mapping->a_ops ||
-		    ((!f->f_mapping->a_ops->direct_IO) &&
-		    (!f->f_mapping->a_ops->get_xip_mem))) {
-			fput(f);
-			f = ERR_PTR(-EINVAL);
-		}
-	}
-
 	return f;
 
 cleanup_all:
@@ -740,6 +743,22 @@ cleanup_file:
 	return ERR_PTR(error);
 }
 
+static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
+				struct file *f,
+				int (*open)(struct inode *, struct file *),
+				const struct cred *cred)
+{
+	struct file *res = do_dentry_open(dentry, mnt, f, open, cred);
+	if (!IS_ERR(res)) {
+		int error = open_check_o_direct(f);
+		if (error) {
+			fput(res);
+			res = ERR_PTR(error);
+		}
+	}
+	return res;
+}
+
 /**
  * lookup_instantiate_filp - instantiates the open intent filp
  * @nd: pointer to nameidata
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 12/16] vfs: do_dentry_open(): don't put filp
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (10 preceding siblings ...)
  2012-04-25 12:44 ` [PATCH 11/16] vfs: split __dentry_open() Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 13/16] vfs: nameidata_to_filp(): inline __dentry_open() Miklos Szeredi
                   ` (5 subsequent siblings)
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

Move put_filp() out to __dentry_open(), the only caller now.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/open.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index 971b474..e1448cd 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -737,7 +737,6 @@ cleanup_all:
 	f->f_path.dentry = NULL;
 	f->f_path.mnt = NULL;
 cleanup_file:
-	put_filp(f);
 	dput(dentry);
 	mntput(mnt);
 	return ERR_PTR(error);
@@ -755,6 +754,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 			fput(res);
 			res = ERR_PTR(error);
 		}
+	} else {
+		put_filp(f);
 	}
 	return res;
 }
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 13/16] vfs: nameidata_to_filp(): inline __dentry_open()
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (11 preceding siblings ...)
  2012-04-25 12:44 ` [PATCH 12/16] vfs: do_dentry_open(): don't put filp Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 14/16] vfs: nameidata_to_filp(): don't throw away file on error Miklos Szeredi
                   ` (4 subsequent siblings)
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

Copy __dentry_open() into nameidata_to_filp().

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/open.c |   20 ++++++++++++++++++--
 1 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index e1448cd..161c15f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -818,9 +818,25 @@ struct file *nameidata_to_filp(struct nameidata *nd)
 
 	/* Has the filesystem initialised the file for us? */
 	if (filp->f_path.dentry == NULL) {
+		struct file *res;
+
 		path_get(&nd->path);
-		filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
-				     NULL, cred);
+		res = do_dentry_open(nd->path.dentry, nd->path.mnt,
+				     filp, NULL, cred);
+		if (!IS_ERR(res)) {
+			int error;
+
+			BUG_ON(res != filp);
+
+			error = open_check_o_direct(filp);
+			if (error) {
+				fput(filp);
+				filp = ERR_PTR(error);
+			}
+		} else {
+			put_filp(filp);
+			filp = res;
+		}
 	}
 	return filp;
 }
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 14/16] vfs: nameidata_to_filp(): don't throw away file on error
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (12 preceding siblings ...)
  2012-04-25 12:44 ` [PATCH 13/16] vfs: nameidata_to_filp(): inline __dentry_open() Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 15/16] vfs: retry last component if opening stale dentry Miklos Szeredi
                   ` (3 subsequent siblings)
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

If open fails, don't put the file.  This allows it to be reused if open needs to
be retried.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/open.c |    8 +++++---
 1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index 161c15f..0510e58 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -814,10 +814,11 @@ struct file *nameidata_to_filp(struct nameidata *nd)
 
 	/* Pick up the filp from the open intent */
 	filp = nd->intent.open.file;
-	nd->intent.open.file = NULL;
 
 	/* Has the filesystem initialised the file for us? */
-	if (filp->f_path.dentry == NULL) {
+	if (filp->f_path.dentry != NULL) {
+		nd->intent.open.file = NULL;
+	} else {
 		struct file *res;
 
 		path_get(&nd->path);
@@ -826,6 +827,7 @@ struct file *nameidata_to_filp(struct nameidata *nd)
 		if (!IS_ERR(res)) {
 			int error;
 
+			nd->intent.open.file = NULL;
 			BUG_ON(res != filp);
 
 			error = open_check_o_direct(filp);
@@ -834,7 +836,7 @@ struct file *nameidata_to_filp(struct nameidata *nd)
 				filp = ERR_PTR(error);
 			}
 		} else {
-			put_filp(filp);
+			/* Allow nd->intent.open.file to be recycled */
 			filp = res;
 		}
 	}
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 15/16] vfs: retry last component if opening stale dentry
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (13 preceding siblings ...)
  2012-04-25 12:44 ` [PATCH 14/16] vfs: nameidata_to_filp(): don't throw away file on error Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-04-25 12:44 ` [PATCH 16/16] nfs: don't open in ->d_revalidate Miklos Szeredi
                   ` (2 subsequent siblings)
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

From: Miklos Szeredi <mszeredi@suse.cz>

NFS optimizes away d_revalidates for last component of open.  This means that
open itself can find the dentry stale.

This patch allows the filesystem to return EOPENSTALE and the VFS will retry the
lookup on just the last component if possible.

If the lookup was done using RCU mode, including the last component, then this
is not possible since the parent dentry is lost.  In this case fall back to
non-RCU lookup.  Currently this is not used since NFS will always leave RCU
mode.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c            |   37 +++++++++++++++++++++++++++++++++++--
 include/linux/errno.h |    1 +
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index f7e7d76..e34df8d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2204,6 +2204,8 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	struct file *filp;
 	struct inode *inode;
 	int symlink_ok = 0;
+	struct path save_parent = { .dentry = NULL, .mnt = NULL };
+	bool retried = false;
 	int error;
 
 	nd->flags &= ~LOOKUP_PARENT;
@@ -2269,6 +2271,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	if (nd->last.name[nd->last.len])
 		goto exit;
 
+retry_lookup:
 	mutex_lock(&dir->d_inode->i_mutex);
 
 	dentry = lookup_hash(nd);
@@ -2349,12 +2352,21 @@ finish_lookup:
 		return NULL;
 	}
 
-	path_to_nameidata(path, nd);
+	if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {
+		path_to_nameidata(path, nd);
+	} else {
+		save_parent.dentry = nd->path.dentry;
+		save_parent.mnt = mntget(path->mnt);
+		nd->path.dentry = path->dentry;
+
+	}
 	nd->inode = inode;
 	/* Why this, you ask?  _Now_ we might have grown LOOKUP_JUMPED... */
 	error = complete_walk(nd);
-	if (error)
+	if (error) {
+		path_put(&save_parent);
 		return ERR_PTR(error);
+	}
 	error = -EISDIR;
 	if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
 		goto exit;
@@ -2377,6 +2389,20 @@ common:
 	if (error)
 		goto exit;
 	filp = nameidata_to_filp(nd);
+	if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) {
+		BUG_ON(save_parent.dentry != dir);
+		path_put(&nd->path);
+		nd->path = save_parent;
+		nd->inode = dir->d_inode;
+		save_parent.mnt = NULL;
+		save_parent.dentry = NULL;
+		if (want_write) {
+			mnt_drop_write(nd->path.mnt);
+			want_write = 0;
+		}
+		retried = true;
+		goto retry_lookup;
+	}
 	if (!IS_ERR(filp)) {
 		error = ima_file_check(filp, op->acc_mode);
 		if (error) {
@@ -2396,6 +2422,7 @@ common:
 out:
 	if (want_write)
 		mnt_drop_write(nd->path.mnt);
+	path_put(&save_parent);
 	terminate_walk(nd);
 	return filp;
 
@@ -2459,6 +2486,12 @@ out:
 	if (base)
 		fput(base);
 	release_open_intent(nd);
+	if (filp == ERR_PTR(-EOPENSTALE)) {
+		if (flags & LOOKUP_RCU)
+			filp = ERR_PTR(-ECHILD);
+		else
+			filp = ERR_PTR(-ESTALE);
+	}
 	return filp;
 
 out_filp:
diff --git a/include/linux/errno.h b/include/linux/errno.h
index 2d09bfa..e0de516 100644
--- a/include/linux/errno.h
+++ b/include/linux/errno.h
@@ -17,6 +17,7 @@
 #define ENOIOCTLCMD	515	/* No ioctl command */
 #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */
 #define EPROBE_DEFER	517	/* Driver requests probe retry */
+#define EOPENSTALE	518	/* open found a stale dentry */
 
 /* Defined for the NFSv3 protocol */
 #define EBADHANDLE	521	/* Illegal NFS file handle */
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 16/16] nfs: don't open in ->d_revalidate
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (14 preceding siblings ...)
  2012-04-25 12:44 ` [PATCH 15/16] vfs: retry last component if opening stale dentry Miklos Szeredi
@ 2012-04-25 12:44 ` Miklos Szeredi
  2012-05-24 15:07 ` [PATCH 00/16] vfs: atomic open v4 (part 1) David Howells
  2012-05-24 15:52 ` David Howells
  17 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-04-25 12:44 UTC (permalink / raw)
  To: viro
  Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi, Trond Myklebust

From: Miklos Szeredi <mszeredi@suse.cz>

NFSv4 can't do reliable opens in d_revalidate, since it cannot know whether a
mount needs to be followed or not.  It does check d_mountpoint() on the dentry,
which can result in a weird error if the VFS found that the mount does not in
fact need to be followed, e.g.:

  # mount --bind /mnt/nfs /mnt/nfs-clone
  # echo something > /mnt/nfs/tmp/bar
  # echo x > /tmp/file
  # mount --bind /tmp/file /mnt/nfs-clone/tmp/bar
  # cat  /mnt/nfs/tmp/bar
  cat: /mnt/nfs/tmp/bar: Not a directory

Which should, by any sane filesystem, result in "something" being printed.

So instead do the open in f_op->open() and in the unlikely case that the cached
dentry turned out to be invalid, drop the dentry and return EOPENSTALE to let
the VFS retry.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c  |   56 +++-------------------------------------
 fs/nfs/file.c |   77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 78 insertions(+), 55 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4aaf031..10ce72e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1328,10 +1328,10 @@ out:
 }
 
 #ifdef CONFIG_NFS_V4
-static int nfs_open_revalidate(struct dentry *, struct nameidata *);
+static int nfs4_lookup_revalidate(struct dentry *, struct nameidata *);
 
 const struct dentry_operations nfs4_dentry_operations = {
-	.d_revalidate	= nfs_open_revalidate,
+	.d_revalidate	= nfs4_lookup_revalidate,
 	.d_delete	= nfs_dentry_delete,
 	.d_iput		= nfs_dentry_iput,
 	.d_automount	= nfs_d_automount,
@@ -1493,13 +1493,11 @@ no_open:
 	return nfs_lookup(dir, dentry, nd);
 }
 
-static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
 	struct dentry *parent = NULL;
 	struct inode *inode;
 	struct inode *dir;
-	struct nfs_open_context *ctx;
-	struct iattr attr;
 	int openflags, ret = 0;
 
 	if (nd->flags & LOOKUP_RCU)
@@ -1528,57 +1526,13 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
 	/* We cannot do exclusive creation on a positive dentry */
 	if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
 		goto no_open_dput;
-	/* We can't create new files here */
-	openflags &= ~(O_CREAT|O_EXCL);
-
-	ctx = create_nfs_open_context(dentry, openflags);
-	ret = PTR_ERR(ctx);
-	if (IS_ERR(ctx))
-		goto out;
 
-	attr.ia_valid = 0;
-	if (openflags & O_TRUNC) {
-		attr.ia_valid |= ATTR_SIZE;
-		attr.ia_size = 0;
-		nfs_wb_all(inode);
-	}
-
-	/*
-	 * Note: we're not holding inode->i_mutex and so may be racing with
-	 * operations that change the directory. We therefore save the
-	 * change attribute *before* we do the RPC call.
-	 */
-	inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr);
-	if (IS_ERR(inode)) {
-		ret = PTR_ERR(inode);
-		switch (ret) {
-		case -EPERM:
-		case -EACCES:
-		case -EDQUOT:
-		case -ENOSPC:
-		case -EROFS:
-			goto out_put_ctx;
-		default:
-			goto out_drop;
-		}
-	}
-	iput(inode);
-	if (inode != dentry->d_inode)
-		goto out_drop;
+	/* Let f_op->open() actually open (and revalidate) the file */
+	ret = 1;
 
-	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-	ret = nfs_intent_set_file(nd, ctx);
-	if (ret >= 0)
-		ret = 1;
 out:
 	dput(parent);
 	return ret;
-out_drop:
-	d_drop(dentry);
-	ret = 0;
-out_put_ctx:
-	put_nfs_open_context(ctx);
-	goto out;
 
 no_open_dput:
 	dput(parent);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index aa9b709..92268bf 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -871,12 +871,81 @@ const struct file_operations nfs_file_operations = {
 static int
 nfs4_file_open(struct inode *inode, struct file *filp)
 {
+	struct nfs_open_context *ctx;
+	struct dentry *dentry = filp->f_path.dentry;
+	struct dentry *parent = NULL;
+	struct inode *dir;
+	unsigned openflags = filp->f_flags;
+	struct iattr attr;
+	int err;
+
+	BUG_ON(inode != dentry->d_inode);
 	/*
-	 * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to
-	 * this point, then something is very wrong
+	 * If no cached dentry exists or if it's negative, NFSv4 handled the
+	 * opens in ->lookup() or ->create().
+	 *
+	 * We only get this far for a cached positive dentry.  We skipped
+	 * revalidation, so handle it here by dropping the dentry and returning
+	 * -EOPENSTALE.  The VFS will retry the lookup/create/open.
 	 */
-	dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp);
-	return -ENOTDIR;
+
+	dprintk("NFS: open file(%s/%s)\n",
+		dentry->d_parent->d_name.name,
+		dentry->d_name.name);
+
+	if ((openflags & O_ACCMODE) == 3)
+		openflags--;
+
+	/* We can't create new files here */
+	openflags &= ~(O_CREAT|O_EXCL);
+
+	parent = dget_parent(dentry);
+	dir = parent->d_inode;
+
+	ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
+	err = PTR_ERR(ctx);
+	if (IS_ERR(ctx))
+		goto out;
+
+	attr.ia_valid = 0;
+	if (openflags & O_TRUNC) {
+		attr.ia_valid |= ATTR_SIZE;
+		attr.ia_size = 0;
+		nfs_wb_all(inode);
+	}
+
+	inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		switch (err) {
+		case -EPERM:
+		case -EACCES:
+		case -EDQUOT:
+		case -ENOSPC:
+		case -EROFS:
+			goto out_put_ctx;
+		default:
+			goto out_drop;
+		}
+	}
+	iput(inode);
+	if (inode != dentry->d_inode)
+		goto out_drop;
+
+	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+	nfs_file_set_open_context(filp, ctx);
+	err = 0;
+
+out_put_ctx:
+	put_nfs_open_context(ctx);
+out:
+	dput(parent);
+	return err;
+
+out_drop:
+	d_drop(dentry);
+	err = -EOPENSTALE;
+	goto out_put_ctx;
 }
 
 const struct file_operations nfs4_file_operations = {
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 04/16] vfs: do_last(): use inode variable
  2012-04-25 12:44 ` [PATCH 04/16] vfs: do_last(): use inode variable Miklos Szeredi
@ 2012-05-01  4:06   ` Nick Piggin
  2012-05-07 14:28     ` Miklos Szeredi
  0 siblings, 1 reply; 27+ messages in thread
From: Nick Piggin @ 2012-05-01  4:06 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: viro, linux-fsdevel, linux-kernel, hch, torvalds, mszeredi

On 25 April 2012 22:44, Miklos Szeredi <miklos@szeredi.hu> wrote:
> From: Miklos Szeredi <mszeredi@suse.cz>
>
> Use helper variable instead of path->dentry->d_inode before complete_walk().
> This will allow this code to be used in RCU mode.

What do you mean, allow it to be used?

>
> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
> ---
>  fs/namei.c |    7 ++++---
>  1 files changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/fs/namei.c b/fs/namei.c
> index 46d4bf6..f21ddb3 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -2360,15 +2360,16 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
>        if (error)
>                nd->flags |= LOOKUP_JUMPED;
>
> +       inode = path->dentry->d_inode;
>        error = -ENOENT;
> -       if (!path->dentry->d_inode)
> +       if (!inode)
>                goto exit_dput;
>
> -       if (path->dentry->d_inode->i_op->follow_link)
> +       if (inode->i_op->follow_link)
>                return NULL;
>
>        path_to_nameidata(path, nd);
> -       nd->inode = path->dentry->d_inode;
> +       nd->inode = inode;
>        /* Why this, you ask?  _Now_ we might have grown LOOKUP_JUMPED... */
>        error = complete_walk(nd);
>        if (error)

In rcu-walk mode, dentry->d_inode should not be accessed at all,
outside of the core lookup code that (should) have the correct
barriers and sequence locks.

That logic should not escape into here, so I'm just not sure what
you're doing here.

This code runs in rcu-walk mode, then at the very least you need
ACCESS_ONCE to load the inode, because ->d_inode can
become NULL right after you test it for NULL (but that would likely
be a bandaid, I'm just pointing out there's raft of potential
problems).

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 04/16] vfs: do_last(): use inode variable
  2012-05-01  4:06   ` Nick Piggin
@ 2012-05-07 14:28     ` Miklos Szeredi
  2012-05-08 23:57       ` Nick Piggin
  0 siblings, 1 reply; 27+ messages in thread
From: Miklos Szeredi @ 2012-05-07 14:28 UTC (permalink / raw)
  To: Nick Piggin; +Cc: viro, linux-fsdevel, linux-kernel, hch, torvalds

Nick Piggin <npiggin@gmail.com> writes:

> On 25 April 2012 22:44, Miklos Szeredi <miklos@szeredi.hu> wrote:
>> From: Miklos Szeredi <mszeredi@suse.cz>
>>
>> Use helper variable instead of path->dentry->d_inode before complete_walk().
>> This will allow this code to be used in RCU mode.
>
> What do you mean, allow it to be used?

I mean allow the code to be shared between RCU and non-RCU mode.  See
10/16.

>
>>
>> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
>> ---
>>  fs/namei.c |    7 ++++---
>>  1 files changed, 4 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/namei.c b/fs/namei.c
>> index 46d4bf6..f21ddb3 100644
>> --- a/fs/namei.c
>> +++ b/fs/namei.c
>> @@ -2360,15 +2360,16 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
>>        if (error)
>>                nd->flags |= LOOKUP_JUMPED;
>>
>> +       inode = path->dentry->d_inode ;
>>        error = -ENOENT;
>> -       if (!path->dentry->d_inode)
>> +       if (!inode)
>>                goto exit_dput;
>>
>> -       if (path->dentry->d_inode->i_op->follow_link)
>> +       if (inode->i_op->follow_link)
>>                return NULL;
>>
>>        path_to_nameidata(path, nd);
>> -       nd->inode = path->dentry->d_inode;
>> +       nd->inode = inode;
>>        /* Why this, you ask?  _Now_ we might have grown LOOKUP_JUMPED... */
>>        error = complete_walk(nd);
>>        if (error)
>
> In rcu-walk mode, dentry->d_inode should not be accessed at all,
> outside of the core lookup code that (should) have the correct
> barriers and sequence locks.
>
> That logic should not escape into here, so I'm just not sure what
> you're doing here.

Right, dentry->d_inode is *not* going to be dereferenced in RCU mode.
In RCU mode it will jump to the place just after the "inode =
path->dentry->d_inode;" line.  See patch 10/16.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 04/16] vfs: do_last(): use inode variable
  2012-05-07 14:28     ` Miklos Szeredi
@ 2012-05-08 23:57       ` Nick Piggin
  0 siblings, 0 replies; 27+ messages in thread
From: Nick Piggin @ 2012-05-08 23:57 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: viro, linux-fsdevel, linux-kernel, hch, torvalds

On 8 May 2012 00:28, Miklos Szeredi <miklos@szeredi.hu> wrote:
> Nick Piggin <npiggin@gmail.com> writes:
>
>> On 25 April 2012 22:44, Miklos Szeredi <miklos@szeredi.hu> wrote:
>>> From: Miklos Szeredi <mszeredi@suse.cz>
>>>
>>> Use helper variable instead of path->dentry->d_inode before complete_walk().
>>> This will allow this code to be used in RCU mode.
>>
>> What do you mean, allow it to be used?
>
> I mean allow the code to be shared between RCU and non-RCU mode.  See
> 10/16.
>
>>
>>>
>>> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
>>> ---
>>>  fs/namei.c |    7 ++++---
>>>  1 files changed, 4 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/fs/namei.c b/fs/namei.c
>>> index 46d4bf6..f21ddb3 100644
>>> --- a/fs/namei.c
>>> +++ b/fs/namei.c
>>> @@ -2360,15 +2360,16 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
>>>        if (error)
>>>                nd->flags |= LOOKUP_JUMPED;
>>>
>>> +       inode = path->dentry->d_inode ;
>>>        error = -ENOENT;
>>> -       if (!path->dentry->d_inode)
>>> +       if (!inode)
>>>                goto exit_dput;
>>>
>>> -       if (path->dentry->d_inode->i_op->follow_link)
>>> +       if (inode->i_op->follow_link)
>>>                return NULL;
>>>
>>>        path_to_nameidata(path, nd);
>>> -       nd->inode = path->dentry->d_inode;
>>> +       nd->inode = inode;
>>>        /* Why this, you ask?  _Now_ we might have grown LOOKUP_JUMPED... */
>>>        error = complete_walk(nd);
>>>        if (error)
>>
>> In rcu-walk mode, dentry->d_inode should not be accessed at all,
>> outside of the core lookup code that (should) have the correct
>> barriers and sequence locks.
>>
>> That logic should not escape into here, so I'm just not sure what
>> you're doing here.
>
> Right, dentry->d_inode is *not* going to be dereferenced in RCU mode.
> In RCU mode it will jump to the place just after the "inode =
> path->dentry->d_inode;" line.  See patch 10/16.

Hmm, OK, I admittedly didn't apply all patches and look at the result.

I wonder if that's getting too hairy... I'm sure consolidation is worthwhile
though. Perhaps comment or even a BUG_ON to ensure it is not in
rcu-walk mode, at points where you load the inode?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 00/16] vfs: atomic open v4 (part 1)
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (15 preceding siblings ...)
  2012-04-25 12:44 ` [PATCH 16/16] nfs: don't open in ->d_revalidate Miklos Szeredi
@ 2012-05-24 15:07 ` David Howells
  2012-05-25 14:58   ` Miklos Szeredi
  2012-05-25 15:18   ` David Howells
  2012-05-24 15:52 ` David Howells
  17 siblings, 2 replies; 27+ messages in thread
From: David Howells @ 2012-05-24 15:07 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: dhowells, viro, linux-fsdevel, linux-kernel, hch, torvalds, mszeredi


I've been looking at your patches when they're all applied, and I suspect
you're missing some security calls.

For instance, in lookup_open(), you call security_path_mknod() prior to
calling vfs_create(), but you don't call it prior to calling atomic_open() or
in, say, nfs_atomic_open().  You do need to, however, though I can see it's
difficult to work out where.  Is it possible to call it if O_CREAT is
specified and d_inode is NULL right before calling atomic_open()?

I'm also wondering if you're missing an audit_inode() call in the if (created)
path after the retry_lookup label.

David

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 00/16] vfs: atomic open v4 (part 1)
  2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
                   ` (16 preceding siblings ...)
  2012-05-24 15:07 ` [PATCH 00/16] vfs: atomic open v4 (part 1) David Howells
@ 2012-05-24 15:52 ` David Howells
  2012-05-25 15:12   ` Miklos Szeredi
  2012-05-25 15:20   ` David Howells
  17 siblings, 2 replies; 27+ messages in thread
From: David Howells @ 2012-05-24 15:52 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: dhowells, viro, linux-fsdevel, linux-kernel, hch, torvalds, mszeredi


I'd also recommend changing the "ok" and "common" labels in do_last() to
something a bit more meaningful, perhaps:

	common -> finish_open
	ok -> finish_open_may_want_write

Also, does it make sense to combine:

	if (!S_ISREG(nd->inode->i_mode))
		will_truncate = 0;

with:

	int will_truncate = open_flag & O_TRUNC;

up at the top of the function.

As the code stands, if ->atomic_open() opens the file but does not create it,
handle_truncate() will be called on it even if it is not a regular file,
whereas by the normal path, it won't.

I would also be tempted to move the body of:

	if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) {
		BUG_ON(save_parent.dentry != dir);
		path_put(&nd->path);
		nd->path = save_parent;
		nd->inode = dir->d_inode;
		save_parent.mnt = NULL;
		save_parent.dentry = NULL;
		if (want_write) {
			mnt_drop_write(nd->path.mnt);
			want_write = 0;
		}
		retried = true;
		goto retry_lookup;
	}

before the retry_lookup label and then goto around it from the preceding
if-else statement or place it at the bottom to make the "common:" block simpler
to read.  Also, you could nest the if (filp == ERR_PTR(-EOPENSTALE)...) inside
if (IS_ERR(filp)).

Can I also suggest being consistent about the use of int v bool?  "created"
and "retried" are bool, but "will_truncate", "want_write" and "symlink_ok" are
not.  Granted some of this is likely inherited from the previous incarnation.

David

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 00/16] vfs: atomic open v4 (part 1)
  2012-05-24 15:07 ` [PATCH 00/16] vfs: atomic open v4 (part 1) David Howells
@ 2012-05-25 14:58   ` Miklos Szeredi
  2012-05-25 15:18   ` David Howells
  1 sibling, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-05-25 14:58 UTC (permalink / raw)
  To: David Howells; +Cc: viro, linux-fsdevel, linux-kernel, hch, torvalds

David Howells <dhowells@redhat.com> writes:

> I've been looking at your patches when they're all applied, and I suspect
> you're missing some security calls.
>
> For instance, in lookup_open(), you call security_path_mknod() prior to
> calling vfs_create(), but you don't call it prior to calling atomic_open() or
> in, say, nfs_atomic_open().

We call security_path_mknod() before ->atomic_open() in may_o_create().

>   You do need to, however, though I can see it's
> difficult to work out where.  Is it possible to call it if O_CREAT is
> specified and d_inode is NULL right before calling atomic_open()?
>
> I'm also wondering if you're missing an audit_inode() call in the if (created)
> path after the retry_lookup label.

There's no audit_inode() on the created dentry neither in the original
code nor in the modified code.

But that may be a bug regardless, it's just independent of my changes.
At least AFAICS.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 00/16] vfs: atomic open v4 (part 1)
  2012-05-24 15:52 ` David Howells
@ 2012-05-25 15:12   ` Miklos Szeredi
  2012-05-25 15:20   ` David Howells
  1 sibling, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-05-25 15:12 UTC (permalink / raw)
  To: David Howells; +Cc: viro, linux-fsdevel, linux-kernel, hch, torvalds

David Howells <dhowells@redhat.com> writes:

> I'd also recommend changing the "ok" and "common" labels in do_last() to
> something a bit more meaningful, perhaps:
>
> 	common -> finish_open
> 	ok -> finish_open_may_want_write

Okay.  I'll do a separate label cleanup patch.

>
> Also, does it make sense to combine:
>
> 	if (!S_ISREG(nd->inode->i_mode))
> 		will_truncate = 0;
>
> with:
>
> 	int will_truncate = open_flag & O_TRUNC;
>
> up at the top of the function.

We need to check nd->inode->i_mode *after* the lookup.  So top of the
function is not a good place.

>
> As the code stands, if ->atomic_open() opens the file but does not create it,
> handle_truncate() will be called on it even if it is not a regular file,
> whereas by the normal path, it won't.

Right, that appears to be a bug.  Thanks for spotting.

>
> I would also be tempted to move the body of:
>
> 	if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) {
> 		BUG_ON(save_parent.dentry != dir);
> 		path_put(&nd->path);
> 		nd->path = save_parent;
> 		nd->inode = dir->d_inode;
> 		save_parent.mnt = NULL;
> 		save_parent.dentry = NULL;
> 		if (want_write) {
> 			mnt_drop_write(nd->path.mnt);
> 			want_write = 0;
> 		}
> 		retried = true;
> 		goto retry_lookup;
> 	}
>
> before the retry_lookup label and then goto around it from the preceding
> if-else statement or place it at the bottom to make the "common:" block simpler
> to read.  Also, you could nest the if (filp == ERR_PTR(-EOPENSTALE)...) inside
> if (IS_ERR(filp)).

Yeah, moving to the bottom sounds like a good cleanup.

>
> Can I also suggest being consistent about the use of int v bool?  "created"
> and "retried" are bool, but "will_truncate", "want_write" and "symlink_ok" are
> not.  Granted some of this is likely inherited from the previous
> incarnation.

Yes, will do a cleanup patch.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 00/16] vfs: atomic open v4 (part 1)
  2012-05-24 15:07 ` [PATCH 00/16] vfs: atomic open v4 (part 1) David Howells
  2012-05-25 14:58   ` Miklos Szeredi
@ 2012-05-25 15:18   ` David Howells
  1 sibling, 0 replies; 27+ messages in thread
From: David Howells @ 2012-05-25 15:18 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: dhowells, viro, linux-fsdevel, linux-kernel, hch, torvalds

Miklos Szeredi <miklos@szeredi.hu> wrote:

> > For instance, in lookup_open(), you call security_path_mknod() prior to
> > calling vfs_create(), but you don't call it prior to calling atomic_open()
> > or in, say, nfs_atomic_open().
> 
> We call security_path_mknod() before ->atomic_open() in may_o_create().

Okay.

> > I'm also wondering if you're missing an audit_inode() call in the if
> > (created) path after the retry_lookup label.
> 
> There's no audit_inode() on the created dentry neither in the original
> code nor in the modified code.
> 
> But that may be a bug regardless, it's just independent of my changes.
> At least AFAICS.

Fair enough.

David

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 00/16] vfs: atomic open v4 (part 1)
  2012-05-24 15:52 ` David Howells
  2012-05-25 15:12   ` Miklos Szeredi
@ 2012-05-25 15:20   ` David Howells
  1 sibling, 0 replies; 27+ messages in thread
From: David Howells @ 2012-05-25 15:20 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: dhowells, viro, linux-fsdevel, linux-kernel, hch, torvalds

Miklos Szeredi <miklos@szeredi.hu> wrote:

> > Also, does it make sense to combine:
> >
> > 	if (!S_ISREG(nd->inode->i_mode))
> > 		will_truncate = 0;
> >
> > with:
> >
> > 	int will_truncate = open_flag & O_TRUNC;
> >
> > up at the top of the function.
> 
> We need to check nd->inode->i_mode *after* the lookup.  So top of the
> function is not a good place.

Good point.

David

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 16/16] nfs: don't open in ->d_revalidate
  2012-05-21 15:30 [PATCH 00/16] vfs: atomic open v5 " Miklos Szeredi
@ 2012-05-21 15:30 ` Miklos Szeredi
  0 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2012-05-21 15:30 UTC (permalink / raw)
  To: viro
  Cc: linux-fsdevel, linux-kernel, hch, torvalds, mszeredi, Trond Myklebust

From: Miklos Szeredi <mszeredi@suse.cz>

NFSv4 can't do reliable opens in d_revalidate, since it cannot know whether a
mount needs to be followed or not.  It does check d_mountpoint() on the dentry,
which can result in a weird error if the VFS found that the mount does not in
fact need to be followed, e.g.:

  # mount --bind /mnt/nfs /mnt/nfs-clone
  # echo something > /mnt/nfs/tmp/bar
  # echo x > /tmp/file
  # mount --bind /tmp/file /mnt/nfs-clone/tmp/bar
  # cat  /mnt/nfs/tmp/bar
  cat: /mnt/nfs/tmp/bar: Not a directory

Which should, by any sane filesystem, result in "something" being printed.

So instead do the open in f_op->open() and in the unlikely case that the cached
dentry turned out to be invalid, drop the dentry and return EOPENSTALE to let
the VFS retry.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c  |   56 +++-------------------------------------
 fs/nfs/file.c |   77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 78 insertions(+), 55 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8789210..a9fe9a4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1328,10 +1328,10 @@ out:
 }
 
 #ifdef CONFIG_NFS_V4
-static int nfs_open_revalidate(struct dentry *, struct nameidata *);
+static int nfs4_lookup_revalidate(struct dentry *, struct nameidata *);
 
 const struct dentry_operations nfs4_dentry_operations = {
-	.d_revalidate	= nfs_open_revalidate,
+	.d_revalidate	= nfs4_lookup_revalidate,
 	.d_delete	= nfs_dentry_delete,
 	.d_iput		= nfs_dentry_iput,
 	.d_automount	= nfs_d_automount,
@@ -1493,13 +1493,11 @@ no_open:
 	return nfs_lookup(dir, dentry, nd);
 }
 
-static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
 	struct dentry *parent = NULL;
 	struct inode *inode;
 	struct inode *dir;
-	struct nfs_open_context *ctx;
-	struct iattr attr;
 	int openflags, ret = 0;
 
 	if (nd->flags & LOOKUP_RCU)
@@ -1528,57 +1526,13 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
 	/* We cannot do exclusive creation on a positive dentry */
 	if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
 		goto no_open_dput;
-	/* We can't create new files here */
-	openflags &= ~(O_CREAT|O_EXCL);
-
-	ctx = create_nfs_open_context(dentry, openflags);
-	ret = PTR_ERR(ctx);
-	if (IS_ERR(ctx))
-		goto out;
 
-	attr.ia_valid = ATTR_OPEN;
-	if (openflags & O_TRUNC) {
-		attr.ia_valid |= ATTR_SIZE;
-		attr.ia_size = 0;
-		nfs_wb_all(inode);
-	}
-
-	/*
-	 * Note: we're not holding inode->i_mutex and so may be racing with
-	 * operations that change the directory. We therefore save the
-	 * change attribute *before* we do the RPC call.
-	 */
-	inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr);
-	if (IS_ERR(inode)) {
-		ret = PTR_ERR(inode);
-		switch (ret) {
-		case -EPERM:
-		case -EACCES:
-		case -EDQUOT:
-		case -ENOSPC:
-		case -EROFS:
-			goto out_put_ctx;
-		default:
-			goto out_drop;
-		}
-	}
-	iput(inode);
-	if (inode != dentry->d_inode)
-		goto out_drop;
+	/* Let f_op->open() actually open (and revalidate) the file */
+	ret = 1;
 
-	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-	ret = nfs_intent_set_file(nd, ctx);
-	if (ret >= 0)
-		ret = 1;
 out:
 	dput(parent);
 	return ret;
-out_drop:
-	d_drop(dentry);
-	ret = 0;
-out_put_ctx:
-	put_nfs_open_context(ctx);
-	goto out;
 
 no_open_dput:
 	dput(parent);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index aa9b709..dc45ada 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -871,12 +871,81 @@ const struct file_operations nfs_file_operations = {
 static int
 nfs4_file_open(struct inode *inode, struct file *filp)
 {
+	struct nfs_open_context *ctx;
+	struct dentry *dentry = filp->f_path.dentry;
+	struct dentry *parent = NULL;
+	struct inode *dir;
+	unsigned openflags = filp->f_flags;
+	struct iattr attr;
+	int err;
+
+	BUG_ON(inode != dentry->d_inode);
 	/*
-	 * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to
-	 * this point, then something is very wrong
+	 * If no cached dentry exists or if it's negative, NFSv4 handled the
+	 * opens in ->lookup() or ->create().
+	 *
+	 * We only get this far for a cached positive dentry.  We skipped
+	 * revalidation, so handle it here by dropping the dentry and returning
+	 * -EOPENSTALE.  The VFS will retry the lookup/create/open.
 	 */
-	dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp);
-	return -ENOTDIR;
+
+	dprintk("NFS: open file(%s/%s)\n",
+		dentry->d_parent->d_name.name,
+		dentry->d_name.name);
+
+	if ((openflags & O_ACCMODE) == 3)
+		openflags--;
+
+	/* We can't create new files here */
+	openflags &= ~(O_CREAT|O_EXCL);
+
+	parent = dget_parent(dentry);
+	dir = parent->d_inode;
+
+	ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
+	err = PTR_ERR(ctx);
+	if (IS_ERR(ctx))
+		goto out;
+
+	attr.ia_valid = ATTR_OPEN;
+	if (openflags & O_TRUNC) {
+		attr.ia_valid |= ATTR_SIZE;
+		attr.ia_size = 0;
+		nfs_wb_all(inode);
+	}
+
+	inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		switch (err) {
+		case -EPERM:
+		case -EACCES:
+		case -EDQUOT:
+		case -ENOSPC:
+		case -EROFS:
+			goto out_put_ctx;
+		default:
+			goto out_drop;
+		}
+	}
+	iput(inode);
+	if (inode != dentry->d_inode)
+		goto out_drop;
+
+	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+	nfs_file_set_open_context(filp, ctx);
+	err = 0;
+
+out_put_ctx:
+	put_nfs_open_context(ctx);
+out:
+	dput(parent);
+	return err;
+
+out_drop:
+	d_drop(dentry);
+	err = -EOPENSTALE;
+	goto out_put_ctx;
 }
 
 const struct file_operations nfs4_file_operations = {
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2012-05-25 15:20 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-04-25 12:44 [PATCH 00/16] vfs: atomic open v4 (part 1) Miklos Szeredi
2012-04-25 12:44 ` [PATCH 01/16] vfs: split do_lookup() Miklos Szeredi
2012-04-25 12:44 ` [PATCH 02/16] vfs: do_last(): make exit RCU safe Miklos Szeredi
2012-04-25 12:44 ` [PATCH 03/16] vfs: do_last(): inline walk_component() Miklos Szeredi
2012-04-25 12:44 ` [PATCH 04/16] vfs: do_last(): use inode variable Miklos Szeredi
2012-05-01  4:06   ` Nick Piggin
2012-05-07 14:28     ` Miklos Szeredi
2012-05-08 23:57       ` Nick Piggin
2012-04-25 12:44 ` [PATCH 05/16] vfs: make follow_link check RCU safe Miklos Szeredi
2012-04-25 12:44 ` [PATCH 06/16] vfs: do_last(): make ENOENT exit " Miklos Szeredi
2012-04-25 12:44 ` [PATCH 07/16] vfs: do_last(): check LOOKUP_DIRECTORY Miklos Szeredi
2012-04-25 12:44 ` [PATCH 08/16] vfs: do_last(): only return EISDIR for O_CREAT Miklos Szeredi
2012-04-25 12:44 ` [PATCH 09/16] vfs: do_last(): add audit_inode before open Miklos Szeredi
2012-04-25 12:44 ` [PATCH 10/16] vfs: do_last() common post lookup Miklos Szeredi
2012-04-25 12:44 ` [PATCH 11/16] vfs: split __dentry_open() Miklos Szeredi
2012-04-25 12:44 ` [PATCH 12/16] vfs: do_dentry_open(): don't put filp Miklos Szeredi
2012-04-25 12:44 ` [PATCH 13/16] vfs: nameidata_to_filp(): inline __dentry_open() Miklos Szeredi
2012-04-25 12:44 ` [PATCH 14/16] vfs: nameidata_to_filp(): don't throw away file on error Miklos Szeredi
2012-04-25 12:44 ` [PATCH 15/16] vfs: retry last component if opening stale dentry Miklos Szeredi
2012-04-25 12:44 ` [PATCH 16/16] nfs: don't open in ->d_revalidate Miklos Szeredi
2012-05-24 15:07 ` [PATCH 00/16] vfs: atomic open v4 (part 1) David Howells
2012-05-25 14:58   ` Miklos Szeredi
2012-05-25 15:18   ` David Howells
2012-05-24 15:52 ` David Howells
2012-05-25 15:12   ` Miklos Szeredi
2012-05-25 15:20   ` David Howells
2012-05-21 15:30 [PATCH 00/16] vfs: atomic open v5 " Miklos Szeredi
2012-05-21 15:30 ` [PATCH 16/16] nfs: don't open in ->d_revalidate Miklos Szeredi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).