All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nikolay Borisov <nborisov@suse.com>
To: NeilBrown <neilb@suse.com>, Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 3/3] VFS: close race between getcwd() and d_move()
Date: Thu, 9 Nov 2017 13:41:24 +0200	[thread overview]
Message-ID: <076c5402-b2ff-2d29-2e38-2c0cb69e89b2@suse.com> (raw)
In-Reply-To: <151019772763.30101.16040338743875884111.stgit@noble>



On  9.11.2017 05:22, NeilBrown wrote:
> d_move() will call __d_drop() and then __d_rehash()
> on the dentry being moved.  This creates a small window
> when the dentry appears to be unhashed.  Many tests
> of d_unhashed() are made under ->d_lock and so are safe
> from racing with this window, but some aren't.
> In particular, getcwd() calls d_unlinked() (which calls
> d_unhashed()) without d_lock protection, so it can race.
> 
> This races has been seen in practice with lustre, which uses d_move() as
> part of name lookup.  See:
>    https://jira.hpdd.intel.com/browse/LU-9735
> It could race with a regular rename(), and result in ENOENT instead
> of either the 'before' or 'after' name.
> 
> The race can be demonstrated with a simple program which
> has two threads, one renaming a directory back and forth
> while another calls getcwd() within that directory: it should never
> fail, but does.  See:
>   https://patchwork.kernel.org/patch/9455345/
> 
> We could fix this race by taking d_lock and rechecking when
> d_unhashed() reports true.  Alternately when can remove the window,
> which is the approach this patch takes.
> 
> When __d_drop and __d_rehash are used to move a dentry, an extra
> flag is passed which causes d_hash.pprev to not be cleared, and
> to not be tested.
> 
> Signed-off-by: NeilBrown <neilb@suse.com>
> ---
>  fs/dcache.c |   31 ++++++++++++++++++++-----------
>  1 file changed, 20 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/dcache.c b/fs/dcache.c
> index d5952306206b..3130d62f29c9 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -471,8 +471,11 @@ static void dentry_lru_add(struct dentry *dentry)
>   * reason (NFS timeouts or autofs deletes).
>   *
>   * __d_drop requires dentry->d_lock.
> + * ___d_drop takes an extra @moving argument.
> + * If true, d_hash.pprev is not cleared, so there is no transient d_unhashed()
> + * state.
>   */
> -void __d_drop(struct dentry *dentry)
> +static void inline ___d_drop(struct dentry *dentry, bool moving)
>  {
>  	if (!d_unhashed(dentry)) {
>  		struct hlist_bl_head *b;
> @@ -493,12 +496,18 @@ void __d_drop(struct dentry *dentry)
>  		} else
>  			hlist_bl_lock(b);
>  		__hlist_bl_del(&dentry->d_hash);
> -		dentry->d_hash.pprev = NULL;
> +		if (likely(!moving))
> +			dentry->d_hash.pprev = NULL;

nit: isn't a bit more explicit if (unlikely(moving)). I suspect the end
result is the same, however it's easy to miss the !. It's not a big deal
but just wondering.

>  		hlist_bl_unlock(b);
>  		/* After this call, in-progress rcu-walk path lookup will fail. */
>  		write_seqcount_invalidate(&dentry->d_seq);
>  	}
>  }
> +
> +void __d_drop(struct dentry *dentry)
> +{
> +	___d_drop(dentry, false);
> +}
>  EXPORT_SYMBOL(__d_drop);
>  
>  void d_drop(struct dentry *dentry)
> @@ -2387,10 +2396,10 @@ void d_delete(struct dentry * dentry)
>  }
>  EXPORT_SYMBOL(d_delete);
>  
> -static void __d_rehash(struct dentry *entry)
> +static void __d_rehash(struct dentry *entry, bool moving)
>  {
>  	struct hlist_bl_head *b = d_hash(entry->d_name.hash);
> -	BUG_ON(!d_unhashed(entry));
> +	BUG_ON(!moving && !d_unhashed(entry));
>  	hlist_bl_lock(b);
>  	hlist_bl_add_head_rcu(&entry->d_hash, b);
>  	hlist_bl_unlock(b);
> @@ -2406,7 +2415,7 @@ static void __d_rehash(struct dentry *entry)
>  void d_rehash(struct dentry * entry)
>  {
>  	spin_lock(&entry->d_lock);
> -	__d_rehash(entry);
> +	__d_rehash(entry, false);
>  	spin_unlock(&entry->d_lock);
>  }
>  EXPORT_SYMBOL(d_rehash);
> @@ -2580,7 +2589,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode)
>  		raw_write_seqcount_end(&dentry->d_seq);
>  		fsnotify_update_flags(dentry);
>  	}
> -	__d_rehash(dentry);
> +	__d_rehash(dentry, false);
>  	if (dir)
>  		end_dir_add(dir, n);
>  	spin_unlock(&dentry->d_lock);
> @@ -2642,7 +2651,7 @@ struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
>  			alias = NULL;
>  		} else {
>  			__dget_dlock(alias);
> -			__d_rehash(alias);
> +			__d_rehash(alias, false);
>  			spin_unlock(&alias->d_lock);
>  		}
>  		spin_unlock(&inode->i_lock);
> @@ -2828,8 +2837,8 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
>  
>  	/* unhash both */
>  	/* __d_drop does write_seqcount_barrier, but they're OK to nest. */
> -	__d_drop(dentry);
> -	__d_drop(target);
> +	___d_drop(dentry, true);
> +	___d_drop(target, exchange);
>  
>  	/* Switch the names.. */
>  	if (exchange)
> @@ -2838,9 +2847,9 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
>  		copy_name(dentry, target);
>  
>  	/* rehash in new place(s) */
> -	__d_rehash(dentry);
> +	__d_rehash(dentry, true);
>  	if (exchange)
> -		__d_rehash(target);
> +		__d_rehash(target, true);
>  
>  	/* ... and switch them in the tree */
>  	if (IS_ROOT(dentry)) {
> 
> 
> 

  reply	other threads:[~2017-11-09 11:41 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-09  3:22 [PATCH 0/3] Three VFS patch resends NeilBrown
2017-11-09  3:22 ` [PATCH 1/3] VFS: use synchronize_rcu_expedited() in namespace_unlock() NeilBrown
2017-11-09  3:22 ` [PATCH 3/3] VFS: close race between getcwd() and d_move() NeilBrown
2017-11-09 11:41   ` Nikolay Borisov [this message]
2017-11-09 13:08     ` Matthew Wilcox
2017-11-09 16:02       ` Nikolay Borisov
2017-11-09 20:23   ` Linus Torvalds
2017-11-09 22:14     ` NeilBrown
2017-11-10  1:40       ` Linus Torvalds
2017-11-10  4:45         ` NeilBrown
2017-11-10 19:52           ` Linus Torvalds
2017-11-10 20:53           ` Al Viro
2017-11-21 23:50             ` Al Viro
2017-11-22  1:31               ` NeilBrown
2017-11-09  3:22 ` [PATCH 2/3] Improve fairness when locking the per-superblock s_anon list NeilBrown
2017-11-09 19:52   ` Linus Torvalds
2017-11-09 20:50     ` Al Viro
2017-11-09 23:09       ` NeilBrown
2017-11-09 23:19         ` Al Viro
2017-11-10  0:02       ` Linus Torvalds
2017-11-10  8:50     ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=076c5402-b2ff-2d29-2e38-2c0cb69e89b2@suse.com \
    --to=nborisov@suse.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=neilb@suse.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.