From: Nikolay Borisov <nborisov@suse.com>
To: NeilBrown <neilb@suse.com>, Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org,
Linus Torvalds <torvalds@linux-foundation.org>,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH 3/3] VFS: close race between getcwd() and d_move()
Date: Thu, 9 Nov 2017 13:41:24 +0200 [thread overview]
Message-ID: <076c5402-b2ff-2d29-2e38-2c0cb69e89b2@suse.com> (raw)
In-Reply-To: <151019772763.30101.16040338743875884111.stgit@noble>
On 9.11.2017 05:22, NeilBrown wrote:
> d_move() will call __d_drop() and then __d_rehash()
> on the dentry being moved. This creates a small window
> when the dentry appears to be unhashed. Many tests
> of d_unhashed() are made under ->d_lock and so are safe
> from racing with this window, but some aren't.
> In particular, getcwd() calls d_unlinked() (which calls
> d_unhashed()) without d_lock protection, so it can race.
>
> This races has been seen in practice with lustre, which uses d_move() as
> part of name lookup. See:
> https://jira.hpdd.intel.com/browse/LU-9735
> It could race with a regular rename(), and result in ENOENT instead
> of either the 'before' or 'after' name.
>
> The race can be demonstrated with a simple program which
> has two threads, one renaming a directory back and forth
> while another calls getcwd() within that directory: it should never
> fail, but does. See:
> https://patchwork.kernel.org/patch/9455345/
>
> We could fix this race by taking d_lock and rechecking when
> d_unhashed() reports true. Alternately when can remove the window,
> which is the approach this patch takes.
>
> When __d_drop and __d_rehash are used to move a dentry, an extra
> flag is passed which causes d_hash.pprev to not be cleared, and
> to not be tested.
>
> Signed-off-by: NeilBrown <neilb@suse.com>
> ---
> fs/dcache.c | 31 ++++++++++++++++++++-----------
> 1 file changed, 20 insertions(+), 11 deletions(-)
>
> diff --git a/fs/dcache.c b/fs/dcache.c
> index d5952306206b..3130d62f29c9 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -471,8 +471,11 @@ static void dentry_lru_add(struct dentry *dentry)
> * reason (NFS timeouts or autofs deletes).
> *
> * __d_drop requires dentry->d_lock.
> + * ___d_drop takes an extra @moving argument.
> + * If true, d_hash.pprev is not cleared, so there is no transient d_unhashed()
> + * state.
> */
> -void __d_drop(struct dentry *dentry)
> +static void inline ___d_drop(struct dentry *dentry, bool moving)
> {
> if (!d_unhashed(dentry)) {
> struct hlist_bl_head *b;
> @@ -493,12 +496,18 @@ void __d_drop(struct dentry *dentry)
> } else
> hlist_bl_lock(b);
> __hlist_bl_del(&dentry->d_hash);
> - dentry->d_hash.pprev = NULL;
> + if (likely(!moving))
> + dentry->d_hash.pprev = NULL;
nit: isn't a bit more explicit if (unlikely(moving)). I suspect the end
result is the same, however it's easy to miss the !. It's not a big deal
but just wondering.
> hlist_bl_unlock(b);
> /* After this call, in-progress rcu-walk path lookup will fail. */
> write_seqcount_invalidate(&dentry->d_seq);
> }
> }
> +
> +void __d_drop(struct dentry *dentry)
> +{
> + ___d_drop(dentry, false);
> +}
> EXPORT_SYMBOL(__d_drop);
>
> void d_drop(struct dentry *dentry)
> @@ -2387,10 +2396,10 @@ void d_delete(struct dentry * dentry)
> }
> EXPORT_SYMBOL(d_delete);
>
> -static void __d_rehash(struct dentry *entry)
> +static void __d_rehash(struct dentry *entry, bool moving)
> {
> struct hlist_bl_head *b = d_hash(entry->d_name.hash);
> - BUG_ON(!d_unhashed(entry));
> + BUG_ON(!moving && !d_unhashed(entry));
> hlist_bl_lock(b);
> hlist_bl_add_head_rcu(&entry->d_hash, b);
> hlist_bl_unlock(b);
> @@ -2406,7 +2415,7 @@ static void __d_rehash(struct dentry *entry)
> void d_rehash(struct dentry * entry)
> {
> spin_lock(&entry->d_lock);
> - __d_rehash(entry);
> + __d_rehash(entry, false);
> spin_unlock(&entry->d_lock);
> }
> EXPORT_SYMBOL(d_rehash);
> @@ -2580,7 +2589,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode)
> raw_write_seqcount_end(&dentry->d_seq);
> fsnotify_update_flags(dentry);
> }
> - __d_rehash(dentry);
> + __d_rehash(dentry, false);
> if (dir)
> end_dir_add(dir, n);
> spin_unlock(&dentry->d_lock);
> @@ -2642,7 +2651,7 @@ struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
> alias = NULL;
> } else {
> __dget_dlock(alias);
> - __d_rehash(alias);
> + __d_rehash(alias, false);
> spin_unlock(&alias->d_lock);
> }
> spin_unlock(&inode->i_lock);
> @@ -2828,8 +2837,8 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
>
> /* unhash both */
> /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
> - __d_drop(dentry);
> - __d_drop(target);
> + ___d_drop(dentry, true);
> + ___d_drop(target, exchange);
>
> /* Switch the names.. */
> if (exchange)
> @@ -2838,9 +2847,9 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
> copy_name(dentry, target);
>
> /* rehash in new place(s) */
> - __d_rehash(dentry);
> + __d_rehash(dentry, true);
> if (exchange)
> - __d_rehash(target);
> + __d_rehash(target, true);
>
> /* ... and switch them in the tree */
> if (IS_ROOT(dentry)) {
>
>
>
next prev parent reply other threads:[~2017-11-09 11:41 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-11-09 3:22 [PATCH 0/3] Three VFS patch resends NeilBrown
2017-11-09 3:22 ` [PATCH 1/3] VFS: use synchronize_rcu_expedited() in namespace_unlock() NeilBrown
2017-11-09 3:22 ` [PATCH 2/3] Improve fairness when locking the per-superblock s_anon list NeilBrown
2017-11-09 19:52 ` Linus Torvalds
2017-11-09 20:50 ` Al Viro
2017-11-09 23:09 ` NeilBrown
2017-11-09 23:19 ` Al Viro
2017-11-10 0:02 ` Linus Torvalds
2017-11-10 8:50 ` Christoph Hellwig
2017-11-09 3:22 ` [PATCH 3/3] VFS: close race between getcwd() and d_move() NeilBrown
2017-11-09 11:41 ` Nikolay Borisov [this message]
2017-11-09 13:08 ` Matthew Wilcox
2017-11-09 16:02 ` Nikolay Borisov
2017-11-09 20:23 ` Linus Torvalds
2017-11-09 22:14 ` NeilBrown
2017-11-10 1:40 ` Linus Torvalds
2017-11-10 4:45 ` NeilBrown
2017-11-10 19:52 ` Linus Torvalds
2017-11-10 20:53 ` Al Viro
2017-11-21 23:50 ` Al Viro
2017-11-22 1:31 ` NeilBrown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=076c5402-b2ff-2d29-2e38-2c0cb69e89b2@suse.com \
--to=nborisov@suse.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=neilb@suse.com \
--cc=torvalds@linux-foundation.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).