linux-toolchains.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Al Viro <viro@zeniv.linux.org.uk>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Potapenko <glider@google.com>,
	Alexei Starovoitov <ast@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Andrey Konovalov <andreyknvl@google.com>,
	Andy Lutomirski <luto@kernel.org>, Arnd Bergmann <arnd@arndb.de>,
	Borislav Petkov <bp@alien8.de>, Christoph Hellwig <hch@lst.de>,
	Christoph Lameter <cl@linux.com>,
	David Rientjes <rientjes@google.com>,
	Dmitry Vyukov <dvyukov@google.com>,
	Eric Dumazet <edumazet@google.com>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Herbert Xu <herbert@gondor.apana.org.au>,
	Ilya Leoshkevich <iii@linux.ibm.com>,
	Ingo Molnar <mingo@redhat.com>, Jens Axboe <axboe@kernel.dk>,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	Kees Cook <keescook@chromium.org>, Marco Elver <elver@google.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Matthew Wilcox <willy@infradead.org>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Pekka Enberg <penberg@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Petr Mladek <pmladek@suse.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Vasily Gorbik <gor@linux.ibm.com>,
	Vegard Nossum <vegard.nossum@oracle.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	kasan-dev <kasan-dev@googlegroups.com>,
	Linux-MM <linux-mm@kvack.org>,
	linux-arch <linux-arch@vger.kernel.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Evgenii Stepanov <eugenis@google.com>,
	Nathan Chancellor <nathan@kernel.org>,
	Nick Desaulniers <ndesaulniers@google.com>,
	Segher Boessenkool <segher@kernel.crashing.org>,
	Vitaly Buka <vitalybuka@google.com>,
	linux-toolchains <linux-toolchains@vger.kernel.org>
Subject: Re: [PATCH v4 43/45] namei: initialize parameters passed to step_into()
Date: Mon, 4 Jul 2022 03:52:57 +0100	[thread overview]
Message-ID: <YsJWCREA5xMfmmqx@ZenIV> (raw)
In-Reply-To: <CAHk-=wgbpot7nt966qvnSR25iea3ueO90RwC2DwHH=7ZyeZzvQ@mail.gmail.com>

On Sat, Jul 02, 2022 at 10:23:16AM -0700, Linus Torvalds wrote:

> Al - can you please take a quick look?

FWIW, trying to write a coherent documentation had its usual effect...
The thing is, we don't really need to fetch the inode that early.
All we really care about is that in RCU mode ->d_seq gets sampled
before we fetch ->d_inode *and* we don't treat "it looks negative"
as hard -ENOENT in case of ->d_seq mismatch.

Which can be bloody well left to step_into().  So we don't need
to pass it inode argument at all - just dentry and seq.  Makes
a bunch of functions simpler as well...

It does *not* deal with the "uninitialized" seq argument in
!RCU case; I'll handle that in the followup, but that's a separate
story, IMO (and very clearly a false positive).

Cumulative diff follows; splitup is in #work.namei.  Comments?

diff --git a/fs/namei.c b/fs/namei.c
index 1f28d3f463c3..7f4f61ade9e3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1467,7 +1467,7 @@ EXPORT_SYMBOL(follow_down);
  * we meet a managed dentry that would need blocking.
  */
 static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
-			       struct inode **inode, unsigned *seqp)
+			       unsigned *seqp)
 {
 	struct dentry *dentry = path->dentry;
 	unsigned int flags = dentry->d_flags;
@@ -1497,13 +1497,6 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
 				dentry = path->dentry = mounted->mnt.mnt_root;
 				nd->state |= ND_JUMPED;
 				*seqp = read_seqcount_begin(&dentry->d_seq);
-				*inode = dentry->d_inode;
-				/*
-				 * We don't need to re-check ->d_seq after this
-				 * ->d_inode read - there will be an RCU delay
-				 * between mount hash removal and ->mnt_root
-				 * becoming unpinned.
-				 */
 				flags = dentry->d_flags;
 				continue;
 			}
@@ -1515,8 +1508,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
 }
 
 static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
-			  struct path *path, struct inode **inode,
-			  unsigned int *seqp)
+			  struct path *path, unsigned int *seqp)
 {
 	bool jumped;
 	int ret;
@@ -1525,9 +1517,7 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
 	path->dentry = dentry;
 	if (nd->flags & LOOKUP_RCU) {
 		unsigned int seq = *seqp;
-		if (unlikely(!*inode))
-			return -ENOENT;
-		if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
+		if (likely(__follow_mount_rcu(nd, path, seqp)))
 			return 0;
 		if (!try_to_unlazy_next(nd, dentry, seq))
 			return -ECHILD;
@@ -1547,7 +1537,6 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
 		if (path->mnt != nd->path.mnt)
 			mntput(path->mnt);
 	} else {
-		*inode = d_backing_inode(path->dentry);
 		*seqp = 0; /* out of RCU mode, so the value doesn't matter */
 	}
 	return ret;
@@ -1607,9 +1596,7 @@ static struct dentry *__lookup_hash(const struct qstr *name,
 	return dentry;
 }
 
-static struct dentry *lookup_fast(struct nameidata *nd,
-				  struct inode **inode,
-			          unsigned *seqp)
+static struct dentry *lookup_fast(struct nameidata *nd, unsigned *seqp)
 {
 	struct dentry *dentry, *parent = nd->path.dentry;
 	int status = 1;
@@ -1628,22 +1615,11 @@ static struct dentry *lookup_fast(struct nameidata *nd,
 			return NULL;
 		}
 
-		/*
-		 * This sequence count validates that the inode matches
-		 * the dentry name information from lookup.
-		 */
-		*inode = d_backing_inode(dentry);
-		if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
-			return ERR_PTR(-ECHILD);
-
-		/*
+	        /*
 		 * This sequence count validates that the parent had no
 		 * changes while we did the lookup of the dentry above.
-		 *
-		 * The memory barrier in read_seqcount_begin of child is
-		 *  enough, we can use __read_seqcount_retry here.
 		 */
-		if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq)))
+		if (unlikely(read_seqcount_retry(&parent->d_seq, nd->seq)))
 			return ERR_PTR(-ECHILD);
 
 		*seqp = seq;
@@ -1838,13 +1814,21 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
  * for the common case.
  */
 static const char *step_into(struct nameidata *nd, int flags,
-		     struct dentry *dentry, struct inode *inode, unsigned seq)
+		     struct dentry *dentry, unsigned seq)
 {
 	struct path path;
-	int err = handle_mounts(nd, dentry, &path, &inode, &seq);
+	struct inode *inode;
+	int err = handle_mounts(nd, dentry, &path, &seq);
 
 	if (err < 0)
 		return ERR_PTR(err);
+	inode = path.dentry->d_inode;
+	if (unlikely(!inode)) {
+		if ((nd->flags & LOOKUP_RCU) &&
+		    read_seqcount_retry(&path.dentry->d_seq, seq))
+			return ERR_PTR(-ECHILD);
+		return ERR_PTR(-ENOENT);
+	}
 	if (likely(!d_is_symlink(path.dentry)) ||
 	   ((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) ||
 	   (flags & WALK_NOFOLLOW)) {
@@ -1870,9 +1854,7 @@ static const char *step_into(struct nameidata *nd, int flags,
 	return pick_link(nd, &path, inode, seq, flags);
 }
 
-static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
-					struct inode **inodep,
-					unsigned *seqp)
+static struct dentry *follow_dotdot_rcu(struct nameidata *nd, unsigned *seqp)
 {
 	struct dentry *parent, *old;
 
@@ -1895,7 +1877,6 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
 	}
 	old = nd->path.dentry;
 	parent = old->d_parent;
-	*inodep = parent->d_inode;
 	*seqp = read_seqcount_begin(&parent->d_seq);
 	if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
 		return ERR_PTR(-ECHILD);
@@ -1910,9 +1891,7 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
 	return NULL;
 }
 
-static struct dentry *follow_dotdot(struct nameidata *nd,
-				 struct inode **inodep,
-				 unsigned *seqp)
+static struct dentry *follow_dotdot(struct nameidata *nd, unsigned *seqp)
 {
 	struct dentry *parent;
 
@@ -1937,7 +1916,6 @@ static struct dentry *follow_dotdot(struct nameidata *nd,
 		return ERR_PTR(-ENOENT);
 	}
 	*seqp = 0;
-	*inodep = parent->d_inode;
 	return parent;
 
 in_root:
@@ -1952,7 +1930,6 @@ static const char *handle_dots(struct nameidata *nd, int type)
 	if (type == LAST_DOTDOT) {
 		const char *error = NULL;
 		struct dentry *parent;
-		struct inode *inode;
 		unsigned seq;
 
 		if (!nd->root.mnt) {
@@ -1961,17 +1938,17 @@ static const char *handle_dots(struct nameidata *nd, int type)
 				return error;
 		}
 		if (nd->flags & LOOKUP_RCU)
-			parent = follow_dotdot_rcu(nd, &inode, &seq);
+			parent = follow_dotdot_rcu(nd, &seq);
 		else
-			parent = follow_dotdot(nd, &inode, &seq);
+			parent = follow_dotdot(nd, &seq);
 		if (IS_ERR(parent))
 			return ERR_CAST(parent);
 		if (unlikely(!parent))
 			error = step_into(nd, WALK_NOFOLLOW,
-					 nd->path.dentry, nd->inode, nd->seq);
+					 nd->path.dentry, nd->seq);
 		else
 			error = step_into(nd, WALK_NOFOLLOW,
-					 parent, inode, seq);
+					 parent, seq);
 		if (unlikely(error))
 			return error;
 
@@ -1995,7 +1972,6 @@ static const char *handle_dots(struct nameidata *nd, int type)
 static const char *walk_component(struct nameidata *nd, int flags)
 {
 	struct dentry *dentry;
-	struct inode *inode;
 	unsigned seq;
 	/*
 	 * "." and ".." are special - ".." especially so because it has
@@ -2007,7 +1983,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
 			put_link(nd);
 		return handle_dots(nd, nd->last_type);
 	}
-	dentry = lookup_fast(nd, &inode, &seq);
+	dentry = lookup_fast(nd, &seq);
 	if (IS_ERR(dentry))
 		return ERR_CAST(dentry);
 	if (unlikely(!dentry)) {
@@ -2017,7 +1993,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
 	}
 	if (!(flags & WALK_MORE) && nd->depth)
 		put_link(nd);
-	return step_into(nd, flags, dentry, inode, seq);
+	return step_into(nd, flags, dentry, seq);
 }
 
 /*
@@ -2473,8 +2449,7 @@ static int handle_lookup_down(struct nameidata *nd)
 {
 	if (!(nd->flags & LOOKUP_RCU))
 		dget(nd->path.dentry);
-	return PTR_ERR(step_into(nd, WALK_NOFOLLOW,
-			nd->path.dentry, nd->inode, nd->seq));
+	return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry, nd->seq));
 }
 
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
@@ -3394,7 +3369,6 @@ static const char *open_last_lookups(struct nameidata *nd,
 	int open_flag = op->open_flag;
 	bool got_write = false;
 	unsigned seq;
-	struct inode *inode;
 	struct dentry *dentry;
 	const char *res;
 
@@ -3410,7 +3384,7 @@ static const char *open_last_lookups(struct nameidata *nd,
 		if (nd->last.name[nd->last.len])
 			nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 		/* we _can_ be in RCU mode here */
-		dentry = lookup_fast(nd, &inode, &seq);
+		dentry = lookup_fast(nd, &seq);
 		if (IS_ERR(dentry))
 			return ERR_CAST(dentry);
 		if (likely(dentry))
@@ -3464,7 +3438,7 @@ static const char *open_last_lookups(struct nameidata *nd,
 finish_lookup:
 	if (nd->depth)
 		put_link(nd);
-	res = step_into(nd, WALK_TRAILING, dentry, inode, seq);
+	res = step_into(nd, WALK_TRAILING, dentry, seq);
 	if (unlikely(res))
 		nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
 	return res;

  parent reply	other threads:[~2022-07-04  2:54 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20220701142310.2188015-1-glider@google.com>
2022-07-01 14:23 ` [PATCH v4 43/45] namei: initialize parameters passed to step_into() Alexander Potapenko
2022-07-02 17:23   ` Linus Torvalds
2022-07-03  3:59     ` Al Viro
2022-07-04  2:52     ` Al Viro [this message]
2022-07-04  8:20       ` Alexander Potapenko
2022-07-04 13:44         ` Al Viro
2022-07-04 13:55           ` Al Viro
2022-07-04 15:49           ` Alexander Potapenko
2022-07-04 16:03             ` Greg Kroah-Hartman
2022-07-04 16:33               ` Alexander Potapenko
2022-07-04 18:23             ` Segher Boessenkool
2022-07-04 16:00           ` Al Viro
2022-07-04 16:47             ` Alexander Potapenko
2022-07-04 17:36       ` Linus Torvalds
2022-07-04 19:02         ` Al Viro
2022-07-04 19:16           ` Linus Torvalds
2022-07-04 19:55             ` Al Viro
2022-07-04 20:24               ` Linus Torvalds
2022-07-04 20:46                 ` Al Viro
2022-07-04 20:51                   ` Linus Torvalds
2022-07-04 21:04                     ` Al Viro
2022-07-04 23:13                       ` [PATCH 1/7] __follow_mount_rcu(): verify that mount_lock remains unchanged Al Viro
2022-07-04 23:14                         ` [PATCH 2/7] follow_dotdot{,_rcu}(): change calling conventions Al Viro
2022-07-04 23:14                         ` [PATCH 3/7] namei: stash the sampled ->d_seq into nameidata Al Viro
2022-07-04 23:15                         ` [PATCH 4/7] step_into(): lose inode argument Al Viro
2022-07-04 23:15                         ` [PATCH 5/7] follow_dotdot{,_rcu}(): don't bother with inode Al Viro
2022-07-04 23:16                         ` [PATCH 6/7] lookup_fast(): " Al Viro
2022-07-04 23:17                         ` [PATCH 7/7] step_into(): move fetching ->d_inode past handle_mounts() Al Viro
2022-07-04 23:19                         ` [PATCH 1/7] __follow_mount_rcu(): verify that mount_lock remains unchanged Al Viro
2022-07-05  0:06                           ` Linus Torvalds
2022-07-05  3:48                             ` Al Viro
2022-07-04 20:47                 ` [PATCH v4 43/45] namei: initialize parameters passed to step_into() Linus Torvalds
2022-08-08 16:37   ` Alexander Potapenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YsJWCREA5xMfmmqx@ZenIV \
    --to=viro@zeniv.linux.org.uk \
    --cc=akpm@linux-foundation.org \
    --cc=andreyknvl@google.com \
    --cc=arnd@arndb.de \
    --cc=ast@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=bp@alien8.de \
    --cc=cl@linux.com \
    --cc=dvyukov@google.com \
    --cc=edumazet@google.com \
    --cc=elver@google.com \
    --cc=eugenis@google.com \
    --cc=glider@google.com \
    --cc=gor@linux.ibm.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hch@lst.de \
    --cc=herbert@gondor.apana.org.au \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=iii@linux.ibm.com \
    --cc=kasan-dev@googlegroups.com \
    --cc=keescook@chromium.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-toolchains@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=mst@redhat.com \
    --cc=nathan@kernel.org \
    --cc=ndesaulniers@google.com \
    --cc=penberg@kernel.org \
    --cc=peterz@infradead.org \
    --cc=pmladek@suse.com \
    --cc=rientjes@google.com \
    --cc=rostedt@goodmis.org \
    --cc=segher@kernel.crashing.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=vbabka@suse.cz \
    --cc=vegard.nossum@oracle.com \
    --cc=vitalybuka@google.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).