linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] nfsd4: fix a deadlock on state owner replay mutex
@ 2019-06-27 18:30 黄乐
  2019-07-10  0:03 ` bfields
  0 siblings, 1 reply; 4+ messages in thread
From: 黄乐 @ 2019-06-27 18:30 UTC (permalink / raw)
  To: bfields, jlayton, linux-nfs, linux-kernel

from: Huang Le <huangle1@jd.com>

In move_to_close_lru(), which only be called on path of nfsd4 CLOSE op,
the code could wait for its stid ref count drop to 2 while holding its
state owner replay mutex.  However, the other stid ref holder (normally
a parallel CLOSE op) that move_to_close_lru() is waiting for might be
accquiring the same replay mutex.

This patch fix the issue by clearing the replay owner before waiting, and
assign it back after then.

Signed-off-by: Huang Le <huangle1@jd.com>
---

I guess we should cc this patch to stable tree, since a malicious client
could craft parallel CLOSE ops to put all nfsd tasks in D state shortly.

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 618e660..5f6a48f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3829,12 +3829,12 @@ static void nfs4_free_openowner(struct nfs4_stateowner *so)
  * them before returning however.
  */
 static void
-move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
+move_to_close_lru(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *s,
+		struct net *net)
 {
 	struct nfs4_ol_stateid *last;
 	struct nfs4_openowner *oo = openowner(s->st_stateowner);
-	struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net,
-						nfsd_net_id);
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo);
 
@@ -3846,8 +3846,19 @@ static void nfs4_free_openowner(struct nfs4_stateowner *so)
 	 * Wait for the refcount to drop to 2. Since it has been unhashed,
 	 * there should be no danger of the refcount going back up again at
 	 * this point.
+	 *
+	 * Before waiting, we clear cstate->replay_owner to release its
+	 * so_replay.rp_mutex, since other reference holder might be accquiring
+	 * the same mutex before they could drop the references.  The replay_owner
+	 * can be assigned back safely after they done their jobs.
 	 */
-	wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
+	if (refcount_read(&s->st_stid.sc_count) != 2) {
+		struct nfs4_stateowner *so = cstate->replay_owner;
+
+		nfsd4_cstate_clear_replay(cstate);
+		wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
+		nfsd4_cstate_assign_replay(cstate, so);
+	}
 
 	release_all_access(s);
 	if (s->st_stid.sc_file) {
@@ -5531,7 +5542,8 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac
 	return status;
 }
 
-static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
+static void nfsd4_close_open_stateid(struct nfsd4_compound_state *cstate,
+		struct nfs4_ol_stateid *s)
 {
 	struct nfs4_client *clp = s->st_stid.sc_client;
 	bool unhashed;
@@ -5549,7 +5561,7 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
 		spin_unlock(&clp->cl_lock);
 		free_ol_stateid_reaplist(&reaplist);
 		if (unhashed)
-			move_to_close_lru(s, clp->net);
+			move_to_close_lru(cstate, s, clp->net);
 	}
 }
 
@@ -5587,7 +5599,7 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
 	 */
 	nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
 
-	nfsd4_close_open_stateid(stp);
+	nfsd4_close_open_stateid(cstate, stp);
 	mutex_unlock(&stp->st_mutex);
 
 	/* v4.1+ suggests that we send a special stateid in here, since the

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] nfsd4: fix a deadlock on state owner replay mutex
  2019-06-27 18:30 [PATCH] nfsd4: fix a deadlock on state owner replay mutex 黄乐
@ 2019-07-10  0:03 ` bfields
  2019-07-10 18:43   ` 黄乐
  0 siblings, 1 reply; 4+ messages in thread
From: bfields @ 2019-07-10  0:03 UTC (permalink / raw)
  To: 黄乐; +Cc: jlayton, linux-nfs, linux-kernel

On Thu, Jun 27, 2019 at 06:30:27PM +0000, 黄乐 wrote:
> from: Huang Le <huangle1@jd.com>
> 
> In move_to_close_lru(), which only be called on path of nfsd4 CLOSE op,
> the code could wait for its stid ref count drop to 2 while holding its
> state owner replay mutex.  However, the other stid ref holder (normally
> a parallel CLOSE op) that move_to_close_lru() is waiting for might be
> accquiring the same replay mutex.
> 
> This patch fix the issue by clearing the replay owner before waiting, and
> assign it back after then.

I don't understand why that's safe.  Maybe it is, but I don't understand
yet.  If we take the mutex, bump the seqid, drop the mutex, someone else
comes in and bumps the seqid again, then we reacquire the mutex... what
happens?

--b.

> 
> Signed-off-by: Huang Le <huangle1@jd.com>
> ---
> 
> I guess we should cc this patch to stable tree, since a malicious client
> could craft parallel CLOSE ops to put all nfsd tasks in D state shortly.
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 618e660..5f6a48f 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -3829,12 +3829,12 @@ static void nfs4_free_openowner(struct nfs4_stateowner *so)
>   * them before returning however.
>   */
>  static void
> -move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
> +move_to_close_lru(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *s,
> +		struct net *net)
>  {
>  	struct nfs4_ol_stateid *last;
>  	struct nfs4_openowner *oo = openowner(s->st_stateowner);
> -	struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net,
> -						nfsd_net_id);
> +	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>  
>  	dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo);
>  
> @@ -3846,8 +3846,19 @@ static void nfs4_free_openowner(struct nfs4_stateowner *so)
>  	 * Wait for the refcount to drop to 2. Since it has been unhashed,
>  	 * there should be no danger of the refcount going back up again at
>  	 * this point.
> +	 *
> +	 * Before waiting, we clear cstate->replay_owner to release its
> +	 * so_replay.rp_mutex, since other reference holder might be accquiring
> +	 * the same mutex before they could drop the references.  The replay_owner
> +	 * can be assigned back safely after they done their jobs.
>  	 */
> -	wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
> +	if (refcount_read(&s->st_stid.sc_count) != 2) {
> +		struct nfs4_stateowner *so = cstate->replay_owner;
> +
> +		nfsd4_cstate_clear_replay(cstate);
> +		wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
> +		nfsd4_cstate_assign_replay(cstate, so);
> +	}
>  
>  	release_all_access(s);
>  	if (s->st_stid.sc_file) {
> @@ -5531,7 +5542,8 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac
>  	return status;
>  }
>  
> -static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
> +static void nfsd4_close_open_stateid(struct nfsd4_compound_state *cstate,
> +		struct nfs4_ol_stateid *s)
>  {
>  	struct nfs4_client *clp = s->st_stid.sc_client;
>  	bool unhashed;
> @@ -5549,7 +5561,7 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
>  		spin_unlock(&clp->cl_lock);
>  		free_ol_stateid_reaplist(&reaplist);
>  		if (unhashed)
> -			move_to_close_lru(s, clp->net);
> +			move_to_close_lru(cstate, s, clp->net);
>  	}
>  }
>  
> @@ -5587,7 +5599,7 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
>  	 */
>  	nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
>  
> -	nfsd4_close_open_stateid(stp);
> +	nfsd4_close_open_stateid(cstate, stp);
>  	mutex_unlock(&stp->st_mutex);
>  
>  	/* v4.1+ suggests that we send a special stateid in here, since the

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] nfsd4: fix a deadlock on state owner replay mutex
  2019-07-10  0:03 ` bfields
@ 2019-07-10 18:43   ` 黄乐
  0 siblings, 0 replies; 4+ messages in thread
From: 黄乐 @ 2019-07-10 18:43 UTC (permalink / raw)
  To: bfields; +Cc: jlayton, linux-nfs, linux-kernel

It's safe because when we reach move_to_close_lru(), we know we are the
*logical* last call among all racing calls, it is actually the reason we
do the waiting here at fist place.  Now just because of racing, we gain
the rp_mutex first and have a *lower* seqid value, so it is what we need
exactly to let the seqid bump with other racing calls before we continue,
that ensures possible future CLOSE call could be replayed correctly.


On Wed, Jul 10, 2019, bfields@fieldses.org wrote:
> I don't understand why that's safe.  Maybe it is, but I don't understand
> yet.  If we take the mutex, bump the seqid, drop the mutex, someone else
> comes in and bumps the seqid again, then we reacquire the mutex... what
> happens?
> 
> --b.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] nfsd4: fix a deadlock on state owner replay mutex
@ 2019-06-27 18:16 黄乐
  0 siblings, 0 replies; 4+ messages in thread
From: 黄乐 @ 2019-06-27 18:16 UTC (permalink / raw)
  To: bfields, jlayton, linux-nfs, linux-kernel

from: Huang Le <huangle1@jd.com>

In move_to_close_lru(), which only be called on path of nfsd4 CLOSE op,
the code could wait for its stid ref count drop to 2 while holding its
state owner replay mutex.  However, the other stid ref holder (normally
a parallel CLOSE op) that move_to_close_lru() is waiting for might be
accquiring the same replay mutex.

This patch fix the issue by clearing the replay owner before waiting, and
assign it back after then.

Signed-off-by: Huang Le <huangle1@jd.com>
---

I guess we should cc this patch to stable tree, since a malicious client
could craft parallel CLOSE ops to put all nfsd tasks in D state shortly.

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 618e660..5f6a48f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3829,12 +3829,12 @@ static void nfs4_free_openowner(struct nfs4_stateowner *so)
  * them before returning however.
  */
 static void
-move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
+move_to_close_lru(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *s,
+		struct net *net)
 {
 	struct nfs4_ol_stateid *last;
 	struct nfs4_openowner *oo = openowner(s->st_stateowner);
-	struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net,
-						nfsd_net_id);
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo);
 
@@ -3846,8 +3846,19 @@ static void nfs4_free_openowner(struct nfs4_stateowner *so)
 	 * Wait for the refcount to drop to 2. Since it has been unhashed,
 	 * there should be no danger of the refcount going back up again at
 	 * this point.
+	 *
+	 * Before waiting, we clear cstate->replay_owner to release its
+	 * so_replay.rp_mutex, since other reference holder might be accquiring
+	 * the same mutex before they could drop the references.  The replay_owner
+	 * can be assigned back safely after they done their jobs.
 	 */
-	wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
+	if (refcount_read(&s->st_stid.sc_count) != 2) {
+		struct nfs4_stateowner *so = cstate->replay_owner;
+
+		nfsd4_cstate_clear_replay(cstate);
+		wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
+		nfsd4_cstate_assign_replay(cstate, so);
+	}
 
 	release_all_access(s);
 	if (s->st_stid.sc_file) {
@@ -5531,7 +5542,8 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac
 	return status;
 }
 
-static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
+static void nfsd4_close_open_stateid(struct nfsd4_compound_state *cstate,
+		struct nfs4_ol_stateid *s)
 {
 	struct nfs4_client *clp = s->st_stid.sc_client;
 	bool unhashed;
@@ -5549,7 +5561,7 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
 		spin_unlock(&clp->cl_lock);
 		free_ol_stateid_reaplist(&reaplist);
 		if (unhashed)
-			move_to_close_lru(s, clp->net);
+			move_to_close_lru(cstate, s, clp->net);
 	}
 }
 
@@ -5587,7 +5599,7 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
 	 */
 	nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
 
-	nfsd4_close_open_stateid(stp);
+	nfsd4_close_open_stateid(cstate, stp);
 	mutex_unlock(&stp->st_mutex);
 
 	/* v4.1+ suggests that we send a special stateid in here, since the

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-07-10 18:43 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-27 18:30 [PATCH] nfsd4: fix a deadlock on state owner replay mutex 黄乐
2019-07-10  0:03 ` bfields
2019-07-10 18:43   ` 黄乐
  -- strict thread matches above, loose matches on Subject: below --
2019-06-27 18:16 黄乐

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).