All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chuck Lever <chuck.lever@oracle.com>
To: anna.schumaker@netapp.com
Cc: linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org
Subject: [PATCH v5 04/30] xprtrdma: Replace rpcrdma_receive_wq with a per-xprt workqueue
Date: Wed, 19 Dec 2018 10:58:29 -0500	[thread overview]
Message-ID: <20181219155829.11602.21903.stgit@manet.1015granger.net> (raw)
In-Reply-To: <20181219155152.11602.18605.stgit@manet.1015granger.net>

To address a connection-close ordering problem, we need the ability
to drain the RPC completions running on rpcrdma_receive_wq for just
one transport. Give each transport its own RPC completion workqueue,
and drain that workqueue when disconnecting the transport.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c  |    2 +
 net/sunrpc/xprtrdma/transport.c |   17 +++-------
 net/sunrpc/xprtrdma/verbs.c     |   67 +++++++++++++++++++++------------------
 net/sunrpc/xprtrdma/xprt_rdma.h |    6 +--
 4 files changed, 44 insertions(+), 48 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index dc23977..5738c9f 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1356,7 +1356,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 	clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
 
 	trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
-	queue_work(rpcrdma_receive_wq, &rep->rr_work);
+	queue_work(buf->rb_completion_wq, &rep->rr_work);
 	return;
 
 out_badversion:
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index ae2a838..91c476a 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -444,10 +444,14 @@
 	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 
+	might_sleep();
+
 	dprintk("RPC:       %s: closing xprt %p\n", __func__, xprt);
 
+	/* Prevent marshaling and sending of new requests */
+	xprt_clear_connected(xprt);
+
 	if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
-		xprt_clear_connected(xprt);
 		rpcrdma_ia_remove(ia);
 		return;
 	}
@@ -858,8 +862,6 @@ void xprt_rdma_cleanup(void)
 		dprintk("RPC:       %s: xprt_unregister returned %i\n",
 			__func__, rc);
 
-	rpcrdma_destroy_wq();
-
 	rc = xprt_unregister_transport(&xprt_rdma_bc);
 	if (rc)
 		dprintk("RPC:       %s: xprt_unregister(bc) returned %i\n",
@@ -870,20 +872,13 @@ int xprt_rdma_init(void)
 {
 	int rc;
 
-	rc = rpcrdma_alloc_wq();
-	if (rc)
-		return rc;
-
 	rc = xprt_register_transport(&xprt_rdma);
-	if (rc) {
-		rpcrdma_destroy_wq();
+	if (rc)
 		return rc;
-	}
 
 	rc = xprt_register_transport(&xprt_rdma_bc);
 	if (rc) {
 		xprt_unregister_transport(&xprt_rdma);
-		rpcrdma_destroy_wq();
 		return rc;
 	}
 
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index e4461e7..cff3a5d 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -80,33 +80,23 @@
 static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
 static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
 
-struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
-
-int
-rpcrdma_alloc_wq(void)
+/* Wait for outstanding transport work to finish.
+ */
+static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
 {
-	struct workqueue_struct *recv_wq;
-
-	recv_wq = alloc_workqueue("xprtrdma_receive",
-				  WQ_MEM_RECLAIM | WQ_HIGHPRI,
-				  0);
-	if (!recv_wq)
-		return -ENOMEM;
-
-	rpcrdma_receive_wq = recv_wq;
-	return 0;
-}
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 
-void
-rpcrdma_destroy_wq(void)
-{
-	struct workqueue_struct *wq;
+	/* Flush Receives, then wait for deferred Reply work
+	 * to complete.
+	 */
+	ib_drain_qp(ia->ri_id->qp);
+	drain_workqueue(buf->rb_completion_wq);
 
-	if (rpcrdma_receive_wq) {
-		wq = rpcrdma_receive_wq;
-		rpcrdma_receive_wq = NULL;
-		destroy_workqueue(wq);
-	}
+	/* Deferred Reply processing might have scheduled
+	 * local invalidations.
+	 */
+	ib_drain_sq(ia->ri_id->qp);
 }
 
 /**
@@ -483,7 +473,7 @@
 	 *   connection is already gone.
 	 */
 	if (ia->ri_id->qp) {
-		ib_drain_qp(ia->ri_id->qp);
+		rpcrdma_xprt_drain(r_xprt);
 		rdma_destroy_qp(ia->ri_id);
 		ia->ri_id->qp = NULL;
 	}
@@ -825,8 +815,10 @@
 	return rc;
 }
 
-/*
- * rpcrdma_ep_disconnect
+/**
+ * rpcrdma_ep_disconnect - Disconnect underlying transport
+ * @ep: endpoint to disconnect
+ * @ia: associated interface adapter
  *
  * This is separate from destroy to facilitate the ability
  * to reconnect without recreating the endpoint.
@@ -837,19 +829,20 @@
 void
 rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 {
+	struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
+						   rx_ep);
 	int rc;
 
+	/* returns without wait if ID is not connected */
 	rc = rdma_disconnect(ia->ri_id);
 	if (!rc)
-		/* returns without wait if not connected */
 		wait_event_interruptible(ep->rep_connect_wait,
 							ep->rep_connected != 1);
 	else
 		ep->rep_connected = rc;
-	trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt,
-					       rx_ep), rc);
+	trace_xprtrdma_disconnect(r_xprt, rc);
 
-	ib_drain_qp(ia->ri_id->qp);
+	rpcrdma_xprt_drain(r_xprt);
 }
 
 /* Fixed-size circular FIFO queue. This implementation is wait-free and
@@ -1183,6 +1176,13 @@ struct rpcrdma_req *
 	if (rc)
 		goto out;
 
+	buf->rb_completion_wq = alloc_workqueue("rpcrdma-%s",
+						WQ_MEM_RECLAIM | WQ_HIGHPRI,
+						0,
+			r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]);
+	if (!buf->rb_completion_wq)
+		goto out;
+
 	return 0;
 out:
 	rpcrdma_buffer_destroy(buf);
@@ -1241,6 +1241,11 @@ struct rpcrdma_req *
 {
 	cancel_delayed_work_sync(&buf->rb_refresh_worker);
 
+	if (buf->rb_completion_wq) {
+		destroy_workqueue(buf->rb_completion_wq);
+		buf->rb_completion_wq = NULL;
+	}
+
 	rpcrdma_sendctxs_destroy(buf);
 
 	while (!list_empty(&buf->rb_recv_bufs)) {
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 788124c..3f198cd 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -412,6 +412,7 @@ struct rpcrdma_buffer {
 
 	u32			rb_bc_max_requests;
 
+	struct workqueue_struct *rb_completion_wq;
 	struct delayed_work	rb_refresh_worker;
 };
 #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
@@ -547,8 +548,6 @@ struct rpcrdma_xprt {
 bool frwr_is_supported(struct rpcrdma_ia *);
 bool fmr_is_supported(struct rpcrdma_ia *);
 
-extern struct workqueue_struct *rpcrdma_receive_wq;
-
 /*
  * Endpoint calls - xprtrdma/verbs.c
  */
@@ -603,9 +602,6 @@ struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
 	return __rpcrdma_dma_map_regbuf(ia, rb);
 }
 
-int rpcrdma_alloc_wq(void);
-void rpcrdma_destroy_wq(void);
-
 /*
  * Wrappers for chunk registration, shared by read/write chunk code.
  */


  parent reply	other threads:[~2018-12-19 15:58 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-19 15:58 [PATCH v5 00/30] NFS/RDMA client for next (4.21) Chuck Lever
2018-12-19 15:58 ` [PATCH v5 01/30] xprtrdma: Yet another double DMA-unmap Chuck Lever
2018-12-19 15:58 ` [PATCH v5 02/30] xprtrdma: Ensure MRs are DMA-unmapped when posting LOCAL_INV fails Chuck Lever
2018-12-19 15:58 ` [PATCH v5 03/30] xprtrdma: Refactor Receive accounting Chuck Lever
2018-12-19 15:58 ` Chuck Lever [this message]
2018-12-19 15:58 ` [PATCH v5 05/30] xprtrdma: No qp_event disconnect Chuck Lever
2018-12-19 15:58 ` [PATCH v5 06/30] xprtrdma: Don't wake pending tasks until disconnect is done Chuck Lever
2018-12-19 15:58 ` [PATCH v5 07/30] xprtrdma: Fix ri_max_segs and the result of ro_maxpages Chuck Lever
2018-12-19 15:58 ` [PATCH v5 08/30] xprtrdma: Reduce max_frwr_depth Chuck Lever
2018-12-19 15:58 ` [PATCH v5 09/30] xprtrdma: Remove support for FMR memory registration Chuck Lever
2018-12-19 15:59 ` [PATCH v5 10/30] xprtrdma: Remove rpcrdma_memreg_ops Chuck Lever
2018-12-19 15:59 ` [PATCH v5 11/30] xprtrdma: Plant XID in on-the-wire RDMA offset (FRWR) Chuck Lever
2018-12-19 15:59 ` [PATCH v5 12/30] NFS: Make "port=" mount option optional for RDMA mounts Chuck Lever
2018-12-19 15:59 ` [PATCH v5 13/30] xprtrdma: Recognize XDRBUF_SPARSE_PAGES Chuck Lever
2018-12-19 15:59 ` [PATCH v5 14/30] xprtrdma: Remove request_module from backchannel Chuck Lever
2018-12-19 15:59 ` [PATCH v5 15/30] xprtrdma: Expose transport header errors Chuck Lever
2018-12-19 15:59 ` [PATCH v5 16/30] xprtrdma: Simplify locking that protects the rl_allreqs list Chuck Lever
2018-12-19 15:59 ` [PATCH v5 17/30] xprtrdma: Cull dprintk() call sites Chuck Lever
2018-12-19 15:59 ` [PATCH v5 18/30] xprtrdma: Remove unused fields from rpcrdma_ia Chuck Lever
2018-12-19 15:59 ` [PATCH v5 19/30] xprtrdma: Clean up of xprtrdma chunk trace points Chuck Lever
2018-12-19 15:59 ` [PATCH v5 20/30] xprtrdma: Relocate the xprtrdma_mr_map " Chuck Lever
2018-12-19 16:00 ` [PATCH v5 21/30] xprtrdma: Add trace points for calls to transport switch methods Chuck Lever
2018-12-19 16:00 ` [PATCH v5 22/30] xprtrdma: Trace mapping, alloc, and dereg failures Chuck Lever
2018-12-19 16:00 ` [PATCH v5 23/30] NFS: Fix NFSv4 symbolic trace point output Chuck Lever
2018-12-19 16:00 ` [PATCH v5 24/30] SUNRPC: Simplify defining common RPC trace events Chuck Lever
2018-12-19 16:00 ` [PATCH v5 25/30] SUNRPC: Fix some kernel doc complaints Chuck Lever
2018-12-19 16:00 ` [PATCH v5 26/30] xprtrdma: Update comments in frwr_op_send Chuck Lever
2018-12-19 16:00 ` [PATCH v5 27/30] xprtrdma: Replace outdated comment for rpcrdma_ep_post Chuck Lever
2018-12-19 16:00 ` [PATCH v5 28/30] xprtrdma: Add documenting comment for rpcrdma_buffer_destroy Chuck Lever
2018-12-19 16:00 ` [PATCH v5 29/30] xprtrdma: Clarify comments in rpcrdma_ia_remove Chuck Lever
2018-12-19 16:00 ` [PATCH v5 30/30] xprtrdma: Don't leak freed MRs Chuck Lever

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181219155829.11602.21903.stgit@manet.1015granger.net \
    --to=chuck.lever@oracle.com \
    --cc=anna.schumaker@netapp.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.