All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC 0/2] Fix "sleep while locked" in RELEASE_LOCKOWNER
@ 2022-05-11 21:52 Chuck Lever
  2022-05-11 21:52 ` [PATCH RFC 1/2] NFSD: nfsd4_release_lockowner() should drop clp->cl_lock sooner Chuck Lever
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Chuck Lever @ 2022-05-11 21:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: trondmy, bfields, jlayton, dai.ngo

This short series passes the usual tests on NFSv4.0. We still do not
have a reproducer for the splat, though, so it's not known if the
issue has been fully addressed.

Because this is a long-standing issue and we do not have a
reproducer, I'm inclined to be conservative and push this in v5.19
rather than in v5.18-rc.

Thoughts, questions, and virtual rotten fruit welcome.

---

Chuck Lever (2):
      NFSD: nfsd4_release_lockowner() should drop clp->cl_lock sooner
      NFSD: nfsd_file_put() can sleep


 fs/nfsd/filecache.c |  2 ++
 fs/nfsd/nfs4state.c | 56 ++++++++++++++++++++-------------------------
 2 files changed, 27 insertions(+), 31 deletions(-)

--
Chuck Lever


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH RFC 1/2] NFSD: nfsd4_release_lockowner() should drop clp->cl_lock sooner
  2022-05-11 21:52 [PATCH RFC 0/2] Fix "sleep while locked" in RELEASE_LOCKOWNER Chuck Lever
@ 2022-05-11 21:52 ` Chuck Lever
  2022-05-11 22:04   ` Trond Myklebust
  2022-05-12 10:07   ` Jeff Layton
  2022-05-11 21:52 ` [PATCH RFC 2/2] NFSD: nfsd_file_put() can sleep Chuck Lever
       [not found] ` <PH0PR14MB549335582C7A4D4F4D2269B2AAD99@PH0PR14MB5493.namprd14.prod.outlook.com>
  2 siblings, 2 replies; 11+ messages in thread
From: Chuck Lever @ 2022-05-11 21:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: trondmy, bfields, jlayton, dai.ngo

nfsd4_release_lockowner() mustn't hold clp->cl_lock when
check_for_locks() invokes nfsd_file_put(), which can sleep.

Reported-by: Dai Ngo <dai.ngo@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: <stable@vger.kernel.org>
---
 fs/nfsd/nfs4state.c |   56 +++++++++++++++++++++++----------------------------
 1 file changed, 25 insertions(+), 31 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 234e852fcdfa..e2eb6d031643 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -6611,8 +6611,8 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
 		deny->ld_type = NFS4_WRITE_LT;
 }
 
-static struct nfs4_lockowner *
-find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
+static struct nfs4_stateowner *
+find_stateowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
 {
 	unsigned int strhashval = ownerstr_hashval(owner);
 	struct nfs4_stateowner *so;
@@ -6624,11 +6624,22 @@ find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
 		if (so->so_is_open_owner)
 			continue;
 		if (same_owner_str(so, owner))
-			return lockowner(nfs4_get_stateowner(so));
+			return nfs4_get_stateowner(so);
 	}
 	return NULL;
 }
 
+static struct nfs4_lockowner *
+find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
+{
+	struct nfs4_stateowner *so;
+
+	so = find_stateowner_str_locked(clp, owner);
+	if (!so)
+		return NULL;
+	return lockowner(so);
+}
+
 static struct nfs4_lockowner *
 find_lockowner_str(struct nfs4_client *clp, struct xdr_netobj *owner)
 {
@@ -7305,10 +7316,8 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 	struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
 	clientid_t *clid = &rlockowner->rl_clientid;
 	struct nfs4_stateowner *sop;
-	struct nfs4_lockowner *lo = NULL;
+	struct nfs4_lockowner *lo;
 	struct nfs4_ol_stateid *stp;
-	struct xdr_netobj *owner = &rlockowner->rl_owner;
-	unsigned int hashval = ownerstr_hashval(owner);
 	__be32 status;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 	struct nfs4_client *clp;
@@ -7322,32 +7331,18 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 		return status;
 
 	clp = cstate->clp;
-	/* Find the matching lock stateowner */
 	spin_lock(&clp->cl_lock);
-	list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
-			    so_strhash) {
-
-		if (sop->so_is_open_owner || !same_owner_str(sop, owner))
-			continue;
-
-		/* see if there are still any locks associated with it */
-		lo = lockowner(sop);
-		list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) {
-			if (check_for_locks(stp->st_stid.sc_file, lo)) {
-				status = nfserr_locks_held;
-				spin_unlock(&clp->cl_lock);
-				return status;
-			}
-		}
+	sop = find_stateowner_str_locked(clp, &rlockowner->rl_owner);
+	spin_unlock(&clp->cl_lock);
+	if (!sop)
+		return nfs_ok;
 
-		nfs4_get_stateowner(sop);
-		break;
-	}
-	if (!lo) {
-		spin_unlock(&clp->cl_lock);
-		return status;
-	}
+	lo = lockowner(sop);
+	list_for_each_entry(stp, &sop->so_stateids, st_perstateowner)
+		if (check_for_locks(stp->st_stid.sc_file, lo))
+			return nfserr_locks_held;
 
+	spin_lock(&clp->cl_lock);
 	unhash_lockowner_locked(lo);
 	while (!list_empty(&lo->lo_owner.so_stateids)) {
 		stp = list_first_entry(&lo->lo_owner.so_stateids,
@@ -7360,8 +7355,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 	free_ol_stateid_reaplist(&reaplist);
 	remove_blocked_locks(lo);
 	nfs4_put_stateowner(&lo->lo_owner);
-
-	return status;
+	return nfs_ok;
 }
 
 static inline struct nfs4_client_reclaim *



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH RFC 2/2] NFSD: nfsd_file_put() can sleep
  2022-05-11 21:52 [PATCH RFC 0/2] Fix "sleep while locked" in RELEASE_LOCKOWNER Chuck Lever
  2022-05-11 21:52 ` [PATCH RFC 1/2] NFSD: nfsd4_release_lockowner() should drop clp->cl_lock sooner Chuck Lever
@ 2022-05-11 21:52 ` Chuck Lever
  2022-05-12 10:08   ` Jeff Layton
       [not found] ` <PH0PR14MB549335582C7A4D4F4D2269B2AAD99@PH0PR14MB5493.namprd14.prod.outlook.com>
  2 siblings, 1 reply; 11+ messages in thread
From: Chuck Lever @ 2022-05-11 21:52 UTC (permalink / raw)
  To: linux-nfs; +Cc: trondmy, bfields, jlayton, dai.ngo

Ensure the lockdep infrastructure can catch calls to nfsd_file_put()
when spinlocks are held.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/filecache.c |    2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 8f7ed5dbb003..60a5d82e59b3 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -301,6 +301,8 @@ nfsd_file_put_noref(struct nfsd_file *nf)
 void
 nfsd_file_put(struct nfsd_file *nf)
 {
+	might_sleep();
+
 	set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
 	if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
 		nfsd_file_flush(nf);



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH RFC 1/2] NFSD: nfsd4_release_lockowner() should drop clp->cl_lock sooner
  2022-05-11 21:52 ` [PATCH RFC 1/2] NFSD: nfsd4_release_lockowner() should drop clp->cl_lock sooner Chuck Lever
@ 2022-05-11 22:04   ` Trond Myklebust
  2022-05-12 14:03     ` Chuck Lever III
  2022-05-12 10:07   ` Jeff Layton
  1 sibling, 1 reply; 11+ messages in thread
From: Trond Myklebust @ 2022-05-11 22:04 UTC (permalink / raw)
  To: linux-nfs, chuck.lever; +Cc: bfields, jlayton, dai.ngo

On Wed, 2022-05-11 at 17:52 -0400, Chuck Lever wrote:
> nfsd4_release_lockowner() mustn't hold clp->cl_lock when
> check_for_locks() invokes nfsd_file_put(), which can sleep.
> 
> Reported-by: Dai Ngo <dai.ngo@oracle.com>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> Cc: <stable@vger.kernel.org>
> ---
>  fs/nfsd/nfs4state.c |   56 +++++++++++++++++++++++------------------
> ----------
>  1 file changed, 25 insertions(+), 31 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 234e852fcdfa..e2eb6d031643 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -6611,8 +6611,8 @@ nfs4_set_lock_denied(struct file_lock *fl,
> struct nfsd4_lock_denied *deny)
>                 deny->ld_type = NFS4_WRITE_LT;
>  }
>  
> -static struct nfs4_lockowner *
> -find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj
> *owner)
> +static struct nfs4_stateowner *
> +find_stateowner_str_locked(struct nfs4_client *clp, struct
> xdr_netobj *owner)
>  {
>         unsigned int strhashval = ownerstr_hashval(owner);
>         struct nfs4_stateowner *so;
> @@ -6624,11 +6624,22 @@ find_lockowner_str_locked(struct nfs4_client
> *clp, struct xdr_netobj *owner)
>                 if (so->so_is_open_owner)
>                         continue;
>                 if (same_owner_str(so, owner))
> -                       return lockowner(nfs4_get_stateowner(so));
> +                       return nfs4_get_stateowner(so);

Hmm... If nfs4_get_stateowner() can fail here, don't you want to
continue searching the list when it does?

>         }
>         return NULL;
>  }
>  
> +static struct nfs4_lockowner *
> +find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj
> *owner)
> +{
> +       struct nfs4_stateowner *so;
> +
> +       so = find_stateowner_str_locked(clp, owner);
> +       if (!so)
> +               return NULL;
> +       return lockowner(so);
> +}
> +
>  static struct nfs4_lockowner *
>  find_lockowner_str(struct nfs4_client *clp, struct xdr_netobj
> *owner)
>  {
> @@ -7305,10 +7316,8 @@ nfsd4_release_lockowner(struct svc_rqst
> *rqstp,
>         struct nfsd4_release_lockowner *rlockowner = &u-
> >release_lockowner;
>         clientid_t *clid = &rlockowner->rl_clientid;
>         struct nfs4_stateowner *sop;
> -       struct nfs4_lockowner *lo = NULL;
> +       struct nfs4_lockowner *lo;
>         struct nfs4_ol_stateid *stp;
> -       struct xdr_netobj *owner = &rlockowner->rl_owner;
> -       unsigned int hashval = ownerstr_hashval(owner);
>         __be32 status;
>         struct nfsd_net *nn = net_generic(SVC_NET(rqstp),
> nfsd_net_id);
>         struct nfs4_client *clp;
> @@ -7322,32 +7331,18 @@ nfsd4_release_lockowner(struct svc_rqst
> *rqstp,
>                 return status;
>  
>         clp = cstate->clp;
> -       /* Find the matching lock stateowner */
>         spin_lock(&clp->cl_lock);
> -       list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
> -                           so_strhash) {
> -
> -               if (sop->so_is_open_owner || !same_owner_str(sop,
> owner))
> -                       continue;
> -
> -               /* see if there are still any locks associated with
> it */
> -               lo = lockowner(sop);
> -               list_for_each_entry(stp, &sop->so_stateids,
> st_perstateowner) {
> -                       if (check_for_locks(stp->st_stid.sc_file,
> lo)) {
> -                               status = nfserr_locks_held;
> -                               spin_unlock(&clp->cl_lock);
> -                               return status;
> -                       }
> -               }
> +       sop = find_stateowner_str_locked(clp, &rlockowner->rl_owner);
> +       spin_unlock(&clp->cl_lock);
> +       if (!sop)
> +               return nfs_ok;
>  
> -               nfs4_get_stateowner(sop);
> -               break;
> -       }
> -       if (!lo) {
> -               spin_unlock(&clp->cl_lock);
> -               return status;
> -       }
> +       lo = lockowner(sop);
> +       list_for_each_entry(stp, &sop->so_stateids, st_perstateowner)
> +               if (check_for_locks(stp->st_stid.sc_file, lo))
> +                       return nfserr_locks_held;

Isn't this line now leaking the reference to sop?

>  
> +       spin_lock(&clp->cl_lock);
>         unhash_lockowner_locked(lo);
>         while (!list_empty(&lo->lo_owner.so_stateids)) {
>                 stp = list_first_entry(&lo->lo_owner.so_stateids,
> @@ -7360,8 +7355,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
>         free_ol_stateid_reaplist(&reaplist);
>         remove_blocked_locks(lo);
>         nfs4_put_stateowner(&lo->lo_owner);
> -
> -       return status;
> +       return nfs_ok;
>  }
>  
>  static inline struct nfs4_client_reclaim *
> 
> 

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH RFC 1/2] NFSD: nfsd4_release_lockowner() should drop clp->cl_lock sooner
  2022-05-11 21:52 ` [PATCH RFC 1/2] NFSD: nfsd4_release_lockowner() should drop clp->cl_lock sooner Chuck Lever
  2022-05-11 22:04   ` Trond Myklebust
@ 2022-05-12 10:07   ` Jeff Layton
  2022-05-12 14:10     ` Chuck Lever III
  1 sibling, 1 reply; 11+ messages in thread
From: Jeff Layton @ 2022-05-12 10:07 UTC (permalink / raw)
  To: Chuck Lever, linux-nfs; +Cc: trondmy, bfields, dai.ngo

On Wed, 2022-05-11 at 17:52 -0400, Chuck Lever wrote:
> nfsd4_release_lockowner() mustn't hold clp->cl_lock when
> check_for_locks() invokes nfsd_file_put(), which can sleep.
> 
> Reported-by: Dai Ngo <dai.ngo@oracle.com>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> Cc: <stable@vger.kernel.org>
> ---
>  fs/nfsd/nfs4state.c |   56 +++++++++++++++++++++++----------------------------
>  1 file changed, 25 insertions(+), 31 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 234e852fcdfa..e2eb6d031643 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -6611,8 +6611,8 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
>  		deny->ld_type = NFS4_WRITE_LT;
>  }
>  
> -static struct nfs4_lockowner *
> -find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
> +static struct nfs4_stateowner *
> +find_stateowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
>  {
>  	unsigned int strhashval = ownerstr_hashval(owner);
>  	struct nfs4_stateowner *so;
> @@ -6624,11 +6624,22 @@ find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
>  		if (so->so_is_open_owner)
>  			continue;
>  		if (same_owner_str(so, owner))
> -			return lockowner(nfs4_get_stateowner(so));
> +			return nfs4_get_stateowner(so);
>  	}
>  	return NULL;
>  }
>  
> +static struct nfs4_lockowner *
> +find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
> +{
> +	struct nfs4_stateowner *so;
> +
> +	so = find_stateowner_str_locked(clp, owner);
> +	if (!so)
> +		return NULL;
> +	return lockowner(so);
> +}
> +
>  static struct nfs4_lockowner *
>  find_lockowner_str(struct nfs4_client *clp, struct xdr_netobj *owner)
>  {
> @@ -7305,10 +7316,8 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
>  	struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
>  	clientid_t *clid = &rlockowner->rl_clientid;
>  	struct nfs4_stateowner *sop;
> -	struct nfs4_lockowner *lo = NULL;
> +	struct nfs4_lockowner *lo;
>  	struct nfs4_ol_stateid *stp;
> -	struct xdr_netobj *owner = &rlockowner->rl_owner;
> -	unsigned int hashval = ownerstr_hashval(owner);
>  	__be32 status;
>  	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
>  	struct nfs4_client *clp;
> @@ -7322,32 +7331,18 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
>  		return status;
>  
>  	clp = cstate->clp;
> -	/* Find the matching lock stateowner */
>  	spin_lock(&clp->cl_lock);
> -	list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
> -			    so_strhash) {
> -
> -		if (sop->so_is_open_owner || !same_owner_str(sop, owner))
> -			continue;
> -
> -		/* see if there are still any locks associated with it */
> -		lo = lockowner(sop);
> -		list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) {
> -			if (check_for_locks(stp->st_stid.sc_file, lo)) {
> -				status = nfserr_locks_held;
> -				spin_unlock(&clp->cl_lock);
> -				return status;
> -			}
> -		}
> +	sop = find_stateowner_str_locked(clp, &rlockowner->rl_owner);
> +	spin_unlock(&clp->cl_lock);
> +	if (!sop)
> +		return nfs_ok;
>  
> -		nfs4_get_stateowner(sop);
> -		break;
> -	}
> -	if (!lo) {
> -		spin_unlock(&clp->cl_lock);
> -		return status;
> -	}
> +	lo = lockowner(sop);
> +	list_for_each_entry(stp, &sop->so_stateids, st_perstateowner)
> +		if (check_for_locks(stp->st_stid.sc_file, lo))
> +			return nfserr_locks_held;
>  

It has been a while since I was in this code, but is it safe to walk the
above list without holding the cl_lock?

> +	spin_lock(&clp->cl_lock);
>  	unhash_lockowner_locked(lo);
>  	while (!list_empty(&lo->lo_owner.so_stateids)) {
>  		stp = list_first_entry(&lo->lo_owner.so_stateids,
> @@ -7360,8 +7355,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
>  	free_ol_stateid_reaplist(&reaplist);
>  	remove_blocked_locks(lo);
>  	nfs4_put_stateowner(&lo->lo_owner);
> -
> -	return status;
> +	return nfs_ok;
>  }
>  
>  static inline struct nfs4_client_reclaim *
> 
> 

-- 
Jeff Layton <jlayton@redhat.com>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH RFC 2/2] NFSD: nfsd_file_put() can sleep
  2022-05-11 21:52 ` [PATCH RFC 2/2] NFSD: nfsd_file_put() can sleep Chuck Lever
@ 2022-05-12 10:08   ` Jeff Layton
  0 siblings, 0 replies; 11+ messages in thread
From: Jeff Layton @ 2022-05-12 10:08 UTC (permalink / raw)
  To: Chuck Lever, linux-nfs; +Cc: trondmy, bfields, dai.ngo

On Wed, 2022-05-11 at 17:52 -0400, Chuck Lever wrote:
> Ensure the lockdep infrastructure can catch calls to nfsd_file_put()
> when spinlocks are held.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/nfsd/filecache.c |    2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> index 8f7ed5dbb003..60a5d82e59b3 100644
> --- a/fs/nfsd/filecache.c
> +++ b/fs/nfsd/filecache.c
> @@ -301,6 +301,8 @@ nfsd_file_put_noref(struct nfsd_file *nf)
>  void
>  nfsd_file_put(struct nfsd_file *nf)
>  {
> +	might_sleep();
> +
>  	set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
>  	if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
>  		nfsd_file_flush(nf);
> 
> 

Reviewed-by: Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH RFC 1/2] NFSD: nfsd4_release_lockowner() should drop clp->cl_lock sooner
  2022-05-11 22:04   ` Trond Myklebust
@ 2022-05-12 14:03     ` Chuck Lever III
  0 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever III @ 2022-05-12 14:03 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Linux NFS Mailing List, Bruce Fields, Jeff Layton, Dai Ngo



> On May 11, 2022, at 6:04 PM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Wed, 2022-05-11 at 17:52 -0400, Chuck Lever wrote:
>> nfsd4_release_lockowner() mustn't hold clp->cl_lock when
>> check_for_locks() invokes nfsd_file_put(), which can sleep.
>> 
>> Reported-by: Dai Ngo <dai.ngo@oracle.com>
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> Cc: <stable@vger.kernel.org>
>> ---
>>  fs/nfsd/nfs4state.c |   56 +++++++++++++++++++++++------------------
>> ----------
>>  1 file changed, 25 insertions(+), 31 deletions(-)
>> 
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 234e852fcdfa..e2eb6d031643 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -6611,8 +6611,8 @@ nfs4_set_lock_denied(struct file_lock *fl,
>> struct nfsd4_lock_denied *deny)
>>                 deny->ld_type = NFS4_WRITE_LT;
>>  }
>>  
>> -static struct nfs4_lockowner *
>> -find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj
>> *owner)
>> +static struct nfs4_stateowner *
>> +find_stateowner_str_locked(struct nfs4_client *clp, struct
>> xdr_netobj *owner)
>>  {
>>         unsigned int strhashval = ownerstr_hashval(owner);
>>         struct nfs4_stateowner *so;
>> @@ -6624,11 +6624,22 @@ find_lockowner_str_locked(struct nfs4_client
>> *clp, struct xdr_netobj *owner)
>>                 if (so->so_is_open_owner)
>>                         continue;
>>                 if (same_owner_str(so, owner))
>> -                       return lockowner(nfs4_get_stateowner(so));
>> +                       return nfs4_get_stateowner(so);
> 
> Hmm... If nfs4_get_stateowner() can fail here, don't you want to
> continue searching the list when it does?

You've lost me on this.

 494 static inline struct nfs4_stateowner *
 495 nfs4_get_stateowner(struct nfs4_stateowner *sop)
 496 {
 497         atomic_inc(&sop->so_count);
 498         return sop;
 499 }
 500 

How would nfs4_get_stateowner() fail?


>>         }
>>         return NULL;
>>  }
>>  
>> +static struct nfs4_lockowner *
>> +find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj
>> *owner)
>> +{
>> +       struct nfs4_stateowner *so;
>> +
>> +       so = find_stateowner_str_locked(clp, owner);
>> +       if (!so)
>> +               return NULL;
>> +       return lockowner(so);
>> +}
>> +
>>  static struct nfs4_lockowner *
>>  find_lockowner_str(struct nfs4_client *clp, struct xdr_netobj
>> *owner)
>>  {
>> @@ -7305,10 +7316,8 @@ nfsd4_release_lockowner(struct svc_rqst
>> *rqstp,
>>         struct nfsd4_release_lockowner *rlockowner = &u-
>>> release_lockowner;
>>         clientid_t *clid = &rlockowner->rl_clientid;
>>         struct nfs4_stateowner *sop;
>> -       struct nfs4_lockowner *lo = NULL;
>> +       struct nfs4_lockowner *lo;
>>         struct nfs4_ol_stateid *stp;
>> -       struct xdr_netobj *owner = &rlockowner->rl_owner;
>> -       unsigned int hashval = ownerstr_hashval(owner);
>>         __be32 status;
>>         struct nfsd_net *nn = net_generic(SVC_NET(rqstp),
>> nfsd_net_id);
>>         struct nfs4_client *clp;
>> @@ -7322,32 +7331,18 @@ nfsd4_release_lockowner(struct svc_rqst
>> *rqstp,
>>                 return status;
>>  
>>         clp = cstate->clp;
>> -       /* Find the matching lock stateowner */
>>         spin_lock(&clp->cl_lock);
>> -       list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
>> -                           so_strhash) {
>> -
>> -               if (sop->so_is_open_owner || !same_owner_str(sop,
>> owner))
>> -                       continue;
>> -
>> -               /* see if there are still any locks associated with
>> it */
>> -               lo = lockowner(sop);
>> -               list_for_each_entry(stp, &sop->so_stateids,
>> st_perstateowner) {
>> -                       if (check_for_locks(stp->st_stid.sc_file,
>> lo)) {
>> -                               status = nfserr_locks_held;
>> -                               spin_unlock(&clp->cl_lock);
>> -                               return status;
>> -                       }
>> -               }
>> +       sop = find_stateowner_str_locked(clp, &rlockowner->rl_owner);
>> +       spin_unlock(&clp->cl_lock);
>> +       if (!sop)
>> +               return nfs_ok;
>>  
>> -               nfs4_get_stateowner(sop);
>> -               break;
>> -       }
>> -       if (!lo) {
>> -               spin_unlock(&clp->cl_lock);
>> -               return status;
>> -       }
>> +       lo = lockowner(sop);
>> +       list_for_each_entry(stp, &sop->so_stateids, st_perstateowner)
>> +               if (check_for_locks(stp->st_stid.sc_file, lo))
>> +                       return nfserr_locks_held;
> 
> Isn't this line now leaking the reference to sop?

Indeed. Fixed.


>>  
>> +       spin_lock(&clp->cl_lock);
>>         unhash_lockowner_locked(lo);
>>         while (!list_empty(&lo->lo_owner.so_stateids)) {
>>                 stp = list_first_entry(&lo->lo_owner.so_stateids,
>> @@ -7360,8 +7355,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
>>         free_ol_stateid_reaplist(&reaplist);
>>         remove_blocked_locks(lo);
>>         nfs4_put_stateowner(&lo->lo_owner);
>> -
>> -       return status;
>> +       return nfs_ok;
>>  }
>>  
>>  static inline struct nfs4_client_reclaim *
>> 
>> 
> 
> -- 
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
> 
> 

--
Chuck Lever




^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH RFC 1/2] NFSD: nfsd4_release_lockowner() should drop clp->cl_lock sooner
  2022-05-12 10:07   ` Jeff Layton
@ 2022-05-12 14:10     ` Chuck Lever III
  0 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever III @ 2022-05-12 14:10 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Linux NFS Mailing List, trondmy, Bruce Fields, Dai Ngo



> On May 12, 2022, at 6:07 AM, Jeff Layton <jlayton@redhat.com> wrote:
> 
> On Wed, 2022-05-11 at 17:52 -0400, Chuck Lever wrote:
>> nfsd4_release_lockowner() mustn't hold clp->cl_lock when
>> check_for_locks() invokes nfsd_file_put(), which can sleep.
>> 
>> Reported-by: Dai Ngo <dai.ngo@oracle.com>
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> Cc: <stable@vger.kernel.org>
>> ---
>> fs/nfsd/nfs4state.c |   56 +++++++++++++++++++++++----------------------------
>> 1 file changed, 25 insertions(+), 31 deletions(-)
>> 
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 234e852fcdfa..e2eb6d031643 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -6611,8 +6611,8 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
>> 		deny->ld_type = NFS4_WRITE_LT;
>> }
>> 
>> -static struct nfs4_lockowner *
>> -find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
>> +static struct nfs4_stateowner *
>> +find_stateowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
>> {
>> 	unsigned int strhashval = ownerstr_hashval(owner);
>> 	struct nfs4_stateowner *so;
>> @@ -6624,11 +6624,22 @@ find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
>> 		if (so->so_is_open_owner)
>> 			continue;
>> 		if (same_owner_str(so, owner))
>> -			return lockowner(nfs4_get_stateowner(so));
>> +			return nfs4_get_stateowner(so);
>> 	}
>> 	return NULL;
>> }
>> 
>> +static struct nfs4_lockowner *
>> +find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
>> +{
>> +	struct nfs4_stateowner *so;
>> +
>> +	so = find_stateowner_str_locked(clp, owner);
>> +	if (!so)
>> +		return NULL;
>> +	return lockowner(so);
>> +}
>> +
>> static struct nfs4_lockowner *
>> find_lockowner_str(struct nfs4_client *clp, struct xdr_netobj *owner)
>> {
>> @@ -7305,10 +7316,8 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
>> 	struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
>> 	clientid_t *clid = &rlockowner->rl_clientid;
>> 	struct nfs4_stateowner *sop;
>> -	struct nfs4_lockowner *lo = NULL;
>> +	struct nfs4_lockowner *lo;
>> 	struct nfs4_ol_stateid *stp;
>> -	struct xdr_netobj *owner = &rlockowner->rl_owner;
>> -	unsigned int hashval = ownerstr_hashval(owner);
>> 	__be32 status;
>> 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
>> 	struct nfs4_client *clp;
>> @@ -7322,32 +7331,18 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
>> 		return status;
>> 
>> 	clp = cstate->clp;
>> -	/* Find the matching lock stateowner */
>> 	spin_lock(&clp->cl_lock);
>> -	list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
>> -			    so_strhash) {
>> -
>> -		if (sop->so_is_open_owner || !same_owner_str(sop, owner))
>> -			continue;
>> -
>> -		/* see if there are still any locks associated with it */
>> -		lo = lockowner(sop);
>> -		list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) {
>> -			if (check_for_locks(stp->st_stid.sc_file, lo)) {
>> -				status = nfserr_locks_held;
>> -				spin_unlock(&clp->cl_lock);
>> -				return status;
>> -			}
>> -		}
>> +	sop = find_stateowner_str_locked(clp, &rlockowner->rl_owner);
>> +	spin_unlock(&clp->cl_lock);
>> +	if (!sop)
>> +		return nfs_ok;
>> 
>> -		nfs4_get_stateowner(sop);
>> -		break;
>> -	}
>> -	if (!lo) {
>> -		spin_unlock(&clp->cl_lock);
>> -		return status;
>> -	}
>> +	lo = lockowner(sop);
>> +	list_for_each_entry(stp, &sop->so_stateids, st_perstateowner)
>> +		if (check_for_locks(stp->st_stid.sc_file, lo))
>> +			return nfserr_locks_held;
>> 
> 
> It has been a while since I was in this code, but is it safe to walk the
> above list without holding the cl_lock?

Shoot.

I thought holding a reference on the stateowner would be enough,
but looks like everyone else holds cl_lock. More chin scratching
and caffeine needed.


>> +	spin_lock(&clp->cl_lock);
>> 	unhash_lockowner_locked(lo);
>> 	while (!list_empty(&lo->lo_owner.so_stateids)) {
>> 		stp = list_first_entry(&lo->lo_owner.so_stateids,
>> @@ -7360,8 +7355,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
>> 	free_ol_stateid_reaplist(&reaplist);
>> 	remove_blocked_locks(lo);
>> 	nfs4_put_stateowner(&lo->lo_owner);
>> -
>> -	return status;
>> +	return nfs_ok;
>> }
>> 
>> static inline struct nfs4_client_reclaim *
>> 
>> 
> 
> -- 
> Jeff Layton <jlayton@redhat.com>
> 

--
Chuck Lever




^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH RFC 0/2] Fix "sleep while locked" in RELEASE_LOCKOWNER
       [not found] ` <PH0PR14MB549335582C7A4D4F4D2269B2AAD99@PH0PR14MB5493.namprd14.prod.outlook.com>
@ 2022-05-26 23:39   ` Chuck Lever III
  2022-05-27  0:21     ` Charles Hedrick
  0 siblings, 1 reply; 11+ messages in thread
From: Chuck Lever III @ 2022-05-26 23:39 UTC (permalink / raw)
  To: Charles Hedrick
  Cc: Linux NFS Mailing List, trondmy, Bruce Fields, Jeff Layton, Dai Ngo



> On May 26, 2022, at 3:17 PM, Charles Hedrick <hedrick@rutgers.edu> wrote:
> 
> We are still stuck on NFS 3 because NFS 4 lock operations hang. Typically with thunderbird, firefox, etc. I had hoped that Ubuntu 22 would fix this, given the patch 
> 
> UNRPC: Don't call connect() more than once on a TCP socket
> 
> If this is part of the problem, that would mean we couldn't use NFS 4 until Ubuntu 24, i.e. summer of 2025, given delays in release and deployment.
> 
> Unfortunately I can't reproduce our problem. It doesn't show up until we're halfway into our semester and loads start getting heavier.
> 
> You say this is a long-standing issue. So are problems with NFS 4 locking (and also NFS 4 delegation). If you have a patch for both of these issues that we could put into 5.4.0, I might be willing to test it, assuming the patches are safe. We probably wouldn't know it has really fixed things for at least 6 months.

Charles, this mailing list is an upstream Linux forum. There honestly isn't anything we can do about Ubuntu backporting policies, and we can't help much at all with Linux kernels as old as v5.4 unless there are known fixes in later kernels. It's up to you to find those fixes, test them, and then convince the stable kernel folks and your distribution to include the fix in their kernel. The folks on linux-nfs@ are little more than process observers in those communities.

The RELEASE_LOCKOWNER lock inversion issue has been around forever, but it was exposed recently by a performance regression fix in v5.18-rc3. After that point, a client can leverage the existing lock inversion bug to provoke a deadlock on the server using normal NFSv4 operations. That makes the RELEASE_LOCKOWNER issue a potential denial-of-service in the latest kernels, which is priority one in my book.

I stand by my statement to Linus in this morning's pull request: I currently know of no other high priority bugs in v5.18's NFS server (I'm not talking about the NFS client) under active investigation except for the one I mentioned in the PR. If you know of /specific/ reports of significant incorrect behavior in the latest upstream Linux NFS client or server, please post links to them here, or better yet, file bugs and help the assignees to troubleshoot the problems.


--
Chuck Lever




^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH RFC 0/2] Fix "sleep while locked" in RELEASE_LOCKOWNER
  2022-05-26 23:39   ` [PATCH RFC 0/2] Fix "sleep while locked" in RELEASE_LOCKOWNER Chuck Lever III
@ 2022-05-27  0:21     ` Charles Hedrick
  2022-05-27 15:32       ` Chuck Lever III
  0 siblings, 1 reply; 11+ messages in thread
From: Charles Hedrick @ 2022-05-27  0:21 UTC (permalink / raw)
  To: Chuck Lever III
  Cc: Linux NFS Mailing List, trondmy, Bruce Fields, Jeff Layton, Dai Ngo

I was hoping it was reasonable to ask for backporting to the latest LTS. It would be nice if you could make the statement you did about that. At that point I agree that it’s Ubuntu’s problem.

> On May 26, 2022, at 7:39 PM, Chuck Lever III <chuck.lever@oracle.com> wrote:
> 
> 
> 
>> On May 26, 2022, at 3:17 PM, Charles Hedrick <hedrick@rutgers.edu> wrote:
>> 
>> We are still stuck on NFS 3 because NFS 4 lock operations hang. Typically with thunderbird, firefox, etc. I had hoped that Ubuntu 22 would fix this, given the patch 
>> 
>> UNRPC: Don't call connect() more than once on a TCP socket
>> 
>> If this is part of the problem, that would mean we couldn't use NFS 4 until Ubuntu 24, i.e. summer of 2025, given delays in release and deployment.
>> 
>> Unfortunately I can't reproduce our problem. It doesn't show up until we're halfway into our semester and loads start getting heavier.
>> 
>> You say this is a long-standing issue. So are problems with NFS 4 locking (and also NFS 4 delegation). If you have a patch for both of these issues that we could put into 5.4.0, I might be willing to test it, assuming the patches are safe. We probably wouldn't know it has really fixed things for at least 6 months.
> 
> Charles, this mailing list is an upstream Linux forum. There honestly isn't anything we can do about Ubuntu backporting policies, and we can't help much at all with Linux kernels as old as v5.4 unless there are known fixes in later kernels. It's up to you to find those fixes, test them, and then convince the stable kernel folks and your distribution to include the fix in their kernel. The folks on linux-nfs@ are little more than process observers in those communities.
> 
> The RELEASE_LOCKOWNER lock inversion issue has been around forever, but it was exposed recently by a performance regression fix in v5.18-rc3. After that point, a client can leverage the existing lock inversion bug to provoke a deadlock on the server using normal NFSv4 operations. That makes the RELEASE_LOCKOWNER issue a potential denial-of-service in the latest kernels, which is priority one in my book.
> 
> I stand by my statement to Linus in this morning's pull request: I currently know of no other high priority bugs in v5.18's NFS server (I'm not talking about the NFS client) under active investigation except for the one I mentioned in the PR. If you know of /specific/ reports of significant incorrect behavior in the latest upstream Linux NFS client or server, please post links to them here, or better yet, file bugs and help the assignees to troubleshoot the problems.
> 
> 
> --
> Chuck Lever
> 
> 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH RFC 0/2] Fix "sleep while locked" in RELEASE_LOCKOWNER
  2022-05-27  0:21     ` Charles Hedrick
@ 2022-05-27 15:32       ` Chuck Lever III
  0 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever III @ 2022-05-27 15:32 UTC (permalink / raw)
  To: Charles Hedrick
  Cc: Linux NFS Mailing List, trondmy, Bruce Fields, Jeff Layton, Dai Ngo



> On May 26, 2022, at 8:21 PM, Charles Hedrick <hedrick@rutgers.edu> wrote:
> 
> I was hoping it was reasonable to ask for backporting to the latest LTS.

It is reasonable for you to make that request of stable@, cc: linux-nfs@.
(And you don't have to hijack someone else's thread to do that ;-)

But do note that upstream commit 89f42494f92f "SUNRPC: Don't call connect()
more than once on a TCP socket" has already been backported to v5.4.196
as commit 975a0f14d5cd. v5.4.196 was released just two days ago.

And, fwiw, 89f42494f92f had a Fixes: tag in it. The stable kernels have
some automation that look for that and apply such commits automatically.
Sometimes it takes a while, though.

Good luck.


> It would be nice if you could make the statement you did about that. At that point I agree that it’s Ubuntu’s problem.
> 
>> On May 26, 2022, at 7:39 PM, Chuck Lever III <chuck.lever@oracle.com> wrote:
>> 
>> 
>> 
>>> On May 26, 2022, at 3:17 PM, Charles Hedrick <hedrick@rutgers.edu> wrote:
>>> 
>>> We are still stuck on NFS 3 because NFS 4 lock operations hang. Typically with thunderbird, firefox, etc. I had hoped that Ubuntu 22 would fix this, given the patch 
>>> 
>>> UNRPC: Don't call connect() more than once on a TCP socket
>>> 
>>> If this is part of the problem, that would mean we couldn't use NFS 4 until Ubuntu 24, i.e. summer of 2025, given delays in release and deployment.
>>> 
>>> Unfortunately I can't reproduce our problem. It doesn't show up until we're halfway into our semester and loads start getting heavier.
>>> 
>>> You say this is a long-standing issue. So are problems with NFS 4 locking (and also NFS 4 delegation). If you have a patch for both of these issues that we could put into 5.4.0, I might be willing to test it, assuming the patches are safe. We probably wouldn't know it has really fixed things for at least 6 months.
>> 
>> Charles, this mailing list is an upstream Linux forum. There honestly isn't anything we can do about Ubuntu backporting policies, and we can't help much at all with Linux kernels as old as v5.4 unless there are known fixes in later kernels. It's up to you to find those fixes, test them, and then convince the stable kernel folks and your distribution to include the fix in their kernel. The folks on linux-nfs@ are little more than process observers in those communities.
>> 
>> The RELEASE_LOCKOWNER lock inversion issue has been around forever, but it was exposed recently by a performance regression fix in v5.18-rc3. After that point, a client can leverage the existing lock inversion bug to provoke a deadlock on the server using normal NFSv4 operations. That makes the RELEASE_LOCKOWNER issue a potential denial-of-service in the latest kernels, which is priority one in my book.
>> 
>> I stand by my statement to Linus in this morning's pull request: I currently know of no other high priority bugs in v5.18's NFS server (I'm not talking about the NFS client) under active investigation except for the one I mentioned in the PR. If you know of /specific/ reports of significant incorrect behavior in the latest upstream Linux NFS client or server, please post links to them here, or better yet, file bugs and help the assignees to troubleshoot the problems.
>> 
>> 
>> --
>> Chuck Lever
>> 
>> 
>> 

--
Chuck Lever




^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-05-27 15:32 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-11 21:52 [PATCH RFC 0/2] Fix "sleep while locked" in RELEASE_LOCKOWNER Chuck Lever
2022-05-11 21:52 ` [PATCH RFC 1/2] NFSD: nfsd4_release_lockowner() should drop clp->cl_lock sooner Chuck Lever
2022-05-11 22:04   ` Trond Myklebust
2022-05-12 14:03     ` Chuck Lever III
2022-05-12 10:07   ` Jeff Layton
2022-05-12 14:10     ` Chuck Lever III
2022-05-11 21:52 ` [PATCH RFC 2/2] NFSD: nfsd_file_put() can sleep Chuck Lever
2022-05-12 10:08   ` Jeff Layton
     [not found] ` <PH0PR14MB549335582C7A4D4F4D2269B2AAD99@PH0PR14MB5493.namprd14.prod.outlook.com>
2022-05-26 23:39   ` [PATCH RFC 0/2] Fix "sleep while locked" in RELEASE_LOCKOWNER Chuck Lever III
2022-05-27  0:21     ` Charles Hedrick
2022-05-27 15:32       ` Chuck Lever III

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.