linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Chuck Lever <chuck.lever@oracle.com>
To: linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org
Subject: [PATCH v1 14/22] xprtrdma: Simplify RPC wake-ups on connect
Date: Mon, 10 Sep 2018 11:10:10 -0400	[thread overview]
Message-ID: <20180910151010.10564.73968.stgit@manet.1015granger.net> (raw)
In-Reply-To: <20180910150040.10564.97487.stgit@manet.1015granger.net>

Currently, when a connection is established, rpcrdma_conn_upcall
invokes rpcrdma_conn_func and then
wake_up_all(&ep->rep_connect_wait). The former wakes waiting RPCs,
but the connect worker is not done yet, and that leads to races,
double wakes, and difficulty understanding how this logic is
supposed to work.

Instead, collect all the "connection established" logic in the
connect worker (xprt_rdma_connect_worker). A disconnect worker is
retained to handle provider upcalls safely.

Fixes: 254f91e2fa1f ("xprtrdma: RPC/RDMA must invoke ... ")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/transport.c |   54 +++++++++++++--------------------------
 net/sunrpc/xprtrdma/verbs.c     |   42 ++++++++++++++++++++++++------
 net/sunrpc/xprtrdma/xprt_rdma.h |    4 +--
 3 files changed, 52 insertions(+), 48 deletions(-)

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 289d13c..d7c4255 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -225,51 +225,35 @@
 		}
 }
 
-void
-rpcrdma_conn_func(struct rpcrdma_ep *ep)
-{
-	schedule_delayed_work(&ep->rep_connect_worker, 0);
-}
-
-void
-rpcrdma_connect_worker(struct work_struct *work)
+/**
+ * xprt_rdma_connect_worker - establish connection in the background
+ * @work: worker thread context
+ *
+ * Requester holds the xprt's send lock to prevent activity on this
+ * transport while a fresh connection is being established. RPC tasks
+ * sleep on the xprt's pending queue waiting for connect to complete.
+ */
+static void
+xprt_rdma_connect_worker(struct work_struct *work)
 {
-	struct rpcrdma_ep *ep =
-		container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
-	struct rpcrdma_xprt *r_xprt =
-		container_of(ep, struct rpcrdma_xprt, rx_ep);
+	struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
+						   rx_connect_worker.work);
 	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+	int rc;
 
-	spin_lock_bh(&xprt->transport_lock);
-	if (ep->rep_connected > 0) {
+	rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+	xprt_clear_connecting(xprt);
+	if (r_xprt->rx_ep.rep_connected > 0) {
 		if (!xprt_test_and_set_connected(xprt)) {
 			xprt->stat.connect_count++;
 			xprt->stat.connect_time += (long)jiffies -
 						   xprt->stat.connect_start;
-			xprt_wake_pending_tasks(xprt, 0);
+			xprt_wake_pending_tasks(xprt, -EAGAIN);
 		}
 	} else {
 		if (xprt_test_and_clear_connected(xprt))
-			xprt_wake_pending_tasks(xprt, -ENOTCONN);
+			xprt_wake_pending_tasks(xprt, rc);
 	}
-	spin_unlock_bh(&xprt->transport_lock);
-}
-
-static void
-xprt_rdma_connect_worker(struct work_struct *work)
-{
-	struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
-						   rx_connect_worker.work);
-	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
-	int rc = 0;
-
-	xprt_clear_connected(xprt);
-
-	rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
-	if (rc)
-		xprt_wake_pending_tasks(xprt, rc);
-
-	xprt_clear_connecting(xprt);
 }
 
 static void
@@ -302,8 +286,6 @@
 
 	cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
 
-	xprt_clear_connected(xprt);
-
 	rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
 	rpcrdma_buffer_destroy(&r_xprt->rx_buf);
 	rpcrdma_ia_close(&r_xprt->rx_ia);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index c60172f..abbd3cd 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -108,6 +108,25 @@
 	}
 }
 
+/**
+ * rpcrdma_disconnect_worker - Force a disconnect
+ * @work: endpoint to be disconnected
+ *
+ * Provider callbacks can possibly run in an IRQ context. This function
+ * is invoked in a worker thread to guarantee that disconnect wake-up
+ * calls are always done in process context.
+ */
+static void
+rpcrdma_disconnect_worker(struct work_struct *work)
+{
+	struct rpcrdma_ep *ep = container_of(work, struct rpcrdma_ep,
+					     rep_disconnect_worker.work);
+	struct rpcrdma_xprt *r_xprt =
+		container_of(ep, struct rpcrdma_xprt, rx_ep);
+
+	xprt_force_disconnect(&r_xprt->rx_xprt);
+}
+
 static void
 rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
 {
@@ -121,7 +140,7 @@
 
 	if (ep->rep_connected == 1) {
 		ep->rep_connected = -EIO;
-		rpcrdma_conn_func(ep);
+		schedule_delayed_work(&ep->rep_disconnect_worker, 0);
 		wake_up_all(&ep->rep_connect_wait);
 	}
 }
@@ -271,13 +290,14 @@
 		++xprt->connect_cookie;
 		ep->rep_connected = 1;
 		rpcrdma_update_connect_private(r_xprt, &event->param.conn);
-		goto connected;
+		wake_up_all(&ep->rep_connect_wait);
+		break;
 	case RDMA_CM_EVENT_CONNECT_ERROR:
 		ep->rep_connected = -ENOTCONN;
-		goto connected;
+		goto disconnected;
 	case RDMA_CM_EVENT_UNREACHABLE:
 		ep->rep_connected = -ENETUNREACH;
-		goto connected;
+		goto disconnected;
 	case RDMA_CM_EVENT_REJECTED:
 		dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
 			rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
@@ -285,12 +305,12 @@
 		ep->rep_connected = -ECONNREFUSED;
 		if (event->status == IB_CM_REJ_STALE_CONN)
 			ep->rep_connected = -EAGAIN;
-		goto connected;
+		goto disconnected;
 	case RDMA_CM_EVENT_DISCONNECTED:
 		++xprt->connect_cookie;
 		ep->rep_connected = -ECONNABORTED;
-connected:
-		rpcrdma_conn_func(ep);
+disconnected:
+		xprt_force_disconnect(xprt);
 		wake_up_all(&ep->rep_connect_wait);
 		break;
 	default:
@@ -550,7 +570,8 @@
 				   cdata->max_requests >> 2);
 	ep->rep_send_count = ep->rep_send_batch;
 	init_waitqueue_head(&ep->rep_connect_wait);
-	INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
+	INIT_DELAYED_WORK(&ep->rep_disconnect_worker,
+			  rpcrdma_disconnect_worker);
 
 	sendcq = ib_alloc_cq(ia->ri_device, NULL,
 			     ep->rep_attr.cap.max_send_wr + 1,
@@ -623,7 +644,7 @@
 void
 rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 {
-	cancel_delayed_work_sync(&ep->rep_connect_worker);
+	cancel_delayed_work_sync(&ep->rep_disconnect_worker);
 
 	if (ia->ri_id && ia->ri_id->qp) {
 		rpcrdma_ep_disconnect(ep, ia);
@@ -736,6 +757,7 @@
 {
 	struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
 						   rx_ia);
+	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
 	int rc;
 
 retry:
@@ -762,6 +784,8 @@
 	}
 
 	ep->rep_connected = 0;
+	xprt_clear_connected(xprt);
+
 	rpcrdma_post_recvs(r_xprt, true);
 
 	rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 5e19bb59..9413a20 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -101,7 +101,7 @@ struct rpcrdma_ep {
 	wait_queue_head_t 	rep_connect_wait;
 	struct rpcrdma_connect_private	rep_cm_private;
 	struct rdma_conn_param	rep_remote_cma;
-	struct delayed_work	rep_connect_worker;
+	struct delayed_work	rep_disconnect_worker;
 };
 
 /* Pre-allocate extra Work Requests for handling backward receives
@@ -556,7 +556,6 @@ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
 				struct rpcrdma_create_data_internal *);
 void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
 int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
-void rpcrdma_conn_func(struct rpcrdma_ep *ep);
 void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
 
 int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
@@ -655,7 +654,6 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
 extern unsigned int xprt_rdma_max_inline_read;
 void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
 void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
-void rpcrdma_connect_worker(struct work_struct *work);
 void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
 int xprt_rdma_init(void);
 void xprt_rdma_cleanup(void);

  parent reply	other threads:[~2018-09-10 20:04 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-10 15:08 [PATCH v1 00/22] NFS/RDMA client patches for v4.20 Chuck Lever
2018-09-10 15:09 ` [PATCH v1 01/22] xprtrdma: Reset credit grant properly after a disconnect Chuck Lever
2018-09-10 15:09 ` [PATCH v1 02/22] xprtrdma: Create more MRs at a time Chuck Lever
2018-09-10 15:09 ` [PATCH v1 03/22] xprtrdma: Explicitly resetting MRs is no longer necessary Chuck Lever
2018-09-10 15:09 ` [PATCH v1 04/22] xprtrdma: Name MR trace events consistently Chuck Lever
2018-09-10 15:09 ` [PATCH v1 05/22] xprtrdma: Refactor chunk encoding Chuck Lever
2018-09-10 15:09 ` [PATCH v1 06/22] xprtrdma: Refactor chunktype handling Chuck Lever
2018-09-10 15:09 ` [PATCH v1 07/22] xprtrdma: Support Write+Reply Replies Chuck Lever
2018-09-10 15:09 ` [PATCH v1 08/22] sunrpc: Fix connect metrics Chuck Lever
2018-09-12 18:41   ` Anna Schumaker
2018-09-10 15:09 ` [PATCH v1 09/22] sunrpc: Report connect_time in seconds Chuck Lever
2018-09-10 15:09 ` [PATCH v1 10/22] xprtrdma: Rename rpcrdma_conn_upcall Chuck Lever
2018-09-10 15:09 ` [PATCH v1 11/22] xprtrdma: Conventional variable names in rpcrdma_conn_upcall Chuck Lever
2018-09-10 15:09 ` [PATCH v1 12/22] xprtrdma: Eliminate "connstate" variable from rpcrdma_conn_upcall() Chuck Lever
2018-09-10 15:10 ` [PATCH v1 13/22] xprtrdma: Re-organize the switch() in rpcrdma_conn_upcall Chuck Lever
2018-09-10 15:10 ` Chuck Lever [this message]
2018-09-10 15:10 ` [PATCH v1 15/22] xprtrdma: Rename rpcrdma_qp_async_error_upcall Chuck Lever
2018-09-10 15:10 ` [PATCH v1 16/22] xprtrdma: Remove memory address of "ep" from an error message Chuck Lever
2018-09-10 15:10 ` [PATCH v1 17/22] svcrdma: Don't disable BH's in backchannel Chuck Lever
2018-09-10 15:10 ` [PATCH v1 18/22] xprtrdma: Move rb_flags initialization Chuck Lever
2018-09-10 15:10 ` [PATCH v1 19/22] xprtrdma: Report when there were zero posted Receives Chuck Lever
2018-09-10 15:10 ` [PATCH v1 20/22] xprtrdma: Add documenting comments Chuck Lever
2018-09-10 15:10 ` [PATCH v1 21/22] xprtrdma: Clean up xprt_rdma_disconnect_inject Chuck Lever
2018-09-10 15:10 ` [PATCH v1 22/22] xprtrdma: Squelch a sparse warning Chuck Lever

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180910151010.10564.73968.stgit@manet.1015granger.net \
    --to=chuck.lever@oracle.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).