All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] SUNRPC/xprtrdma: Fix reconnection locking
@ 2021-07-26 12:03 trondmy
  2021-07-26 14:17 ` Chuck Lever III
  2021-07-27 15:57 ` Chuck Lever III
  0 siblings, 2 replies; 3+ messages in thread
From: trondmy @ 2021-07-26 12:03 UTC (permalink / raw)
  To: linux-nfs; +Cc: Chuck Lever

From: Trond Myklebust <trond.myklebust@hammerspace.com>

The xprtrdma client code currently relies on the task that initiated the
connect to hold the XPRT_LOCK for the duration of the connection
attempt. If the task is woken early, due to some other event, then that
lock could get released early.
Avoid races by using the same mechanism that the socket code uses of
transferring lock ownership to the RDMA connect worker itself. That
frees us to call rpcrdma_xprt_disconnect() directly since we're now
guaranteed exclusion w.r.t. other callers.

Fixes: 4cf44be6f1e8 ("xprtrdma: Fix recursion into rpcrdma_xprt_disconnect()")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xprt.c               |  2 ++
 net/sunrpc/xprtrdma/transport.c | 11 +++++------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index aae5a328b15b..b88ac8132054 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -877,6 +877,7 @@ bool xprt_lock_connect(struct rpc_xprt *xprt,
 	spin_unlock(&xprt->transport_lock);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(xprt_lock_connect);
 
 void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
 {
@@ -893,6 +894,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
 	spin_unlock(&xprt->transport_lock);
 	wake_up_bit(&xprt->state, XPRT_LOCKED);
 }
+EXPORT_SYMBOL_GPL(xprt_unlock_connect);
 
 /**
  * xprt_connect - schedule a transport connect operation
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 9c2ffc67c0fd..975aef16ad34 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -250,12 +250,9 @@ xprt_rdma_connect_worker(struct work_struct *work)
 					   xprt->stat.connect_start;
 		xprt_set_connected(xprt);
 		rc = -EAGAIN;
-	} else {
-		/* Force a call to xprt_rdma_close to clean up */
-		spin_lock(&xprt->transport_lock);
-		set_bit(XPRT_CLOSE_WAIT, &xprt->state);
-		spin_unlock(&xprt->transport_lock);
-	}
+	} else
+		rpcrdma_xprt_disconnect(r_xprt);
+	xprt_unlock_connect(xprt, r_xprt);
 	xprt_wake_pending_tasks(xprt, rc);
 }
 
@@ -489,6 +486,8 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 	struct rpcrdma_ep *ep = r_xprt->rx_ep;
 	unsigned long delay;
 
+	WARN_ON_ONCE(!xprt_lock_connect(xprt, task, r_xprt));
+
 	delay = 0;
 	if (ep && ep->re_connect_status != 0) {
 		delay = xprt_reconnect_delay(xprt);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] SUNRPC/xprtrdma: Fix reconnection locking
  2021-07-26 12:03 [PATCH] SUNRPC/xprtrdma: Fix reconnection locking trondmy
@ 2021-07-26 14:17 ` Chuck Lever III
  2021-07-27 15:57 ` Chuck Lever III
  1 sibling, 0 replies; 3+ messages in thread
From: Chuck Lever III @ 2021-07-26 14:17 UTC (permalink / raw)
  To: trondmy; +Cc: Linux NFS Mailing List



> On Jul 26, 2021, at 8:03 AM, trondmy@kernel.org wrote:
> 
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> The xprtrdma client code currently relies on the task that initiated the
> connect to hold the XPRT_LOCK for the duration of the connection
> attempt. If the task is woken early, due to some other event, then that
> lock could get released early.
> Avoid races by using the same mechanism that the socket code uses of
> transferring lock ownership to the RDMA connect worker itself. That
> frees us to call rpcrdma_xprt_disconnect() directly since we're now
> guaranteed exclusion w.r.t. other callers.
> 
> Fixes: 4cf44be6f1e8 ("xprtrdma: Fix recursion into rpcrdma_xprt_disconnect()")
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>

I would like to test this change with disconnect injection
before it is merged.


> ---
> net/sunrpc/xprt.c               |  2 ++
> net/sunrpc/xprtrdma/transport.c | 11 +++++------
> 2 files changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index aae5a328b15b..b88ac8132054 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -877,6 +877,7 @@ bool xprt_lock_connect(struct rpc_xprt *xprt,
> 	spin_unlock(&xprt->transport_lock);
> 	return ret;
> }
> +EXPORT_SYMBOL_GPL(xprt_lock_connect);
> 
> void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
> {
> @@ -893,6 +894,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
> 	spin_unlock(&xprt->transport_lock);
> 	wake_up_bit(&xprt->state, XPRT_LOCKED);
> }
> +EXPORT_SYMBOL_GPL(xprt_unlock_connect);
> 
> /**
>  * xprt_connect - schedule a transport connect operation
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index 9c2ffc67c0fd..975aef16ad34 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -250,12 +250,9 @@ xprt_rdma_connect_worker(struct work_struct *work)
> 					   xprt->stat.connect_start;
> 		xprt_set_connected(xprt);
> 		rc = -EAGAIN;
> -	} else {
> -		/* Force a call to xprt_rdma_close to clean up */
> -		spin_lock(&xprt->transport_lock);
> -		set_bit(XPRT_CLOSE_WAIT, &xprt->state);
> -		spin_unlock(&xprt->transport_lock);
> -	}
> +	} else
> +		rpcrdma_xprt_disconnect(r_xprt);
> +	xprt_unlock_connect(xprt, r_xprt);
> 	xprt_wake_pending_tasks(xprt, rc);
> }
> 
> @@ -489,6 +486,8 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
> 	struct rpcrdma_ep *ep = r_xprt->rx_ep;
> 	unsigned long delay;
> 
> +	WARN_ON_ONCE(!xprt_lock_connect(xprt, task, r_xprt));
> +
> 	delay = 0;
> 	if (ep && ep->re_connect_status != 0) {
> 		delay = xprt_reconnect_delay(xprt);
> -- 
> 2.31.1
> 

--
Chuck Lever




^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] SUNRPC/xprtrdma: Fix reconnection locking
  2021-07-26 12:03 [PATCH] SUNRPC/xprtrdma: Fix reconnection locking trondmy
  2021-07-26 14:17 ` Chuck Lever III
@ 2021-07-27 15:57 ` Chuck Lever III
  1 sibling, 0 replies; 3+ messages in thread
From: Chuck Lever III @ 2021-07-27 15:57 UTC (permalink / raw)
  To: trondmy; +Cc: Linux NFS Mailing List



> On Jul 26, 2021, at 8:03 AM, trondmy@kernel.org wrote:
> 
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> The xprtrdma client code currently relies on the task that initiated the
> connect to hold the XPRT_LOCK for the duration of the connection
> attempt. If the task is woken early, due to some other event, then that
> lock could get released early.
> Avoid races by using the same mechanism that the socket code uses of
> transferring lock ownership to the RDMA connect worker itself. That
> frees us to call rpcrdma_xprt_disconnect() directly since we're now
> guaranteed exclusion w.r.t. other callers.
> 
> Fixes: 4cf44be6f1e8 ("xprtrdma: Fix recursion into rpcrdma_xprt_disconnect()")
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>

First, thanks for the clean up. I had been wondering about that,
but I haven't encountered any problems so far with the code as
it was.

Second, after applying this patch and enabling KASAN, I ran
several disconnect injection passes with an intensive software
development workload. About 500 disconnects were injected over
the test runs. I did not encounter any issues.

I inspected a trace capture of a couple injected disconnects
and did not see anything unexpected.

I don't have a server-side disconnect injection rig. I probably
should build one.

Tested-by: Chuck Lever <chuck.lever@oracle.com>

> ---
> net/sunrpc/xprt.c               |  2 ++
> net/sunrpc/xprtrdma/transport.c | 11 +++++------
> 2 files changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index aae5a328b15b..b88ac8132054 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -877,6 +877,7 @@ bool xprt_lock_connect(struct rpc_xprt *xprt,
> 	spin_unlock(&xprt->transport_lock);
> 	return ret;
> }
> +EXPORT_SYMBOL_GPL(xprt_lock_connect);
> 
> void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
> {
> @@ -893,6 +894,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
> 	spin_unlock(&xprt->transport_lock);
> 	wake_up_bit(&xprt->state, XPRT_LOCKED);
> }
> +EXPORT_SYMBOL_GPL(xprt_unlock_connect);
> 
> /**
>  * xprt_connect - schedule a transport connect operation
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index 9c2ffc67c0fd..975aef16ad34 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -250,12 +250,9 @@ xprt_rdma_connect_worker(struct work_struct *work)
> 					   xprt->stat.connect_start;
> 		xprt_set_connected(xprt);
> 		rc = -EAGAIN;
> -	} else {
> -		/* Force a call to xprt_rdma_close to clean up */
> -		spin_lock(&xprt->transport_lock);
> -		set_bit(XPRT_CLOSE_WAIT, &xprt->state);
> -		spin_unlock(&xprt->transport_lock);
> -	}
> +	} else
> +		rpcrdma_xprt_disconnect(r_xprt);
> +	xprt_unlock_connect(xprt, r_xprt);
> 	xprt_wake_pending_tasks(xprt, rc);
> }
> 
> @@ -489,6 +486,8 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
> 	struct rpcrdma_ep *ep = r_xprt->rx_ep;
> 	unsigned long delay;
> 
> +	WARN_ON_ONCE(!xprt_lock_connect(xprt, task, r_xprt));
> +
> 	delay = 0;
> 	if (ep && ep->re_connect_status != 0) {
> 		delay = xprt_reconnect_delay(xprt);
> -- 
> 2.31.1
> 

--
Chuck Lever




^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-07-27 15:57 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-26 12:03 [PATCH] SUNRPC/xprtrdma: Fix reconnection locking trondmy
2021-07-26 14:17 ` Chuck Lever III
2021-07-27 15:57 ` Chuck Lever III

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.