linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] Three additional fixes for v5.4
@ 2019-08-26 17:12 Chuck Lever
  2019-08-26 17:12 ` [PATCH 1/3] xprtrdma: Recycle MRs after disconnect Chuck Lever
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Chuck Lever @ 2019-08-26 17:12 UTC (permalink / raw)
  To: anna.schumaker; +Cc: linux-rdma, linux-nfs

Hi Anna-

Please review these, and if they are acceptable, include them in
your v5.4 NFS/RDMA linux-next branch. Thanks!

---

Chuck Lever (3):
      xprtrdma: Recycle MRs after disconnect
      xprtrdma: Clear xprt->reestablish_timeout on close
      xprtrdma: Send Queue size grows after a reconnect


 net/sunrpc/xprtrdma/frwr_ops.c  |   35 +++++++++++++++++++++++++++--------
 net/sunrpc/xprtrdma/rpc_rdma.c  |   10 +++++++---
 net/sunrpc/xprtrdma/transport.c |    3 +--
 net/sunrpc/xprtrdma/verbs.c     |   28 ++++++++++++++++------------
 net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
 5 files changed, 52 insertions(+), 25 deletions(-)

--
Chuck Lever

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] xprtrdma: Recycle MRs after disconnect
  2019-08-26 17:12 [PATCH 0/3] Three additional fixes for v5.4 Chuck Lever
@ 2019-08-26 17:12 ` Chuck Lever
  2019-08-26 17:12 ` [PATCH 2/3] xprtrdma: Clear xprt->reestablish_timeout on close Chuck Lever
  2019-08-26 17:12 ` [PATCH 3/3] xprtrdma: Send Queue size grows after a reconnect Chuck Lever
  2 siblings, 0 replies; 4+ messages in thread
From: Chuck Lever @ 2019-08-26 17:12 UTC (permalink / raw)
  To: anna.schumaker; +Cc: linux-rdma, linux-nfs

The optimization done in "xprtrdma: Simplify rpcrdma_mr_pop" was a
bit too optimistic. MRs left over after a reconnect still need to
be recycled, not added back to the free list, since they could be
in flight or actually fully registered.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/frwr_ops.c  |   35 +++++++++++++++++++++++++++--------
 net/sunrpc/xprtrdma/rpc_rdma.c  |    2 +-
 net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 368cdf3..30065a2 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -88,15 +88,8 @@ void frwr_release_mr(struct rpcrdma_mr *mr)
 	kfree(mr);
 }
 
-/* MRs are dynamically allocated, so simply clean up and release the MR.
- * A replacement MR will subsequently be allocated on demand.
- */
-static void
-frwr_mr_recycle_worker(struct work_struct *work)
+static void frwr_mr_recycle(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
 {
-	struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
-	struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-
 	trace_xprtrdma_mr_recycle(mr);
 
 	if (mr->mr_dir != DMA_NONE) {
@@ -114,6 +107,32 @@ void frwr_release_mr(struct rpcrdma_mr *mr)
 	frwr_release_mr(mr);
 }
 
+/* MRs are dynamically allocated, so simply clean up and release the MR.
+ * A replacement MR will subsequently be allocated on demand.
+ */
+static void
+frwr_mr_recycle_worker(struct work_struct *work)
+{
+	struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr,
+					     mr_recycle);
+
+	frwr_mr_recycle(mr->mr_xprt, mr);
+}
+
+/* frwr_recycle - Discard MRs
+ * @req: request to reset
+ *
+ * Used after a reconnect. These MRs could be in flight, we can't
+ * tell. Safe thing to do is release them.
+ */
+void frwr_recycle(struct rpcrdma_req *req)
+{
+	struct rpcrdma_mr *mr;
+
+	while ((mr = rpcrdma_mr_pop(&req->rl_registered)))
+		frwr_mr_recycle(mr->mr_xprt, mr);
+}
+
 /* frwr_reset - Place MRs back on the free list
  * @req: request to reset
  *
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 67e1684..19dd29a 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -867,7 +867,7 @@ static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt,
 	 * chunks. Very likely the connection has been replaced,
 	 * so these registrations are invalid and unusable.
 	 */
-	frwr_reset(req);
+	frwr_recycle(req);
 
 	/* This implementation supports the following combinations
 	 * of chunk lists in one RPC-over-RDMA Call message:
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index bd1befa..65e6b0e 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -542,6 +542,7 @@ static inline bool rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
 /* Memory registration calls xprtrdma/frwr_ops.c
  */
 bool frwr_is_supported(struct ib_device *device);
+void frwr_recycle(struct rpcrdma_req *req);
 void frwr_reset(struct rpcrdma_req *req);
 int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep);
 int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] xprtrdma: Clear xprt->reestablish_timeout on close
  2019-08-26 17:12 [PATCH 0/3] Three additional fixes for v5.4 Chuck Lever
  2019-08-26 17:12 ` [PATCH 1/3] xprtrdma: Recycle MRs after disconnect Chuck Lever
@ 2019-08-26 17:12 ` Chuck Lever
  2019-08-26 17:12 ` [PATCH 3/3] xprtrdma: Send Queue size grows after a reconnect Chuck Lever
  2 siblings, 0 replies; 4+ messages in thread
From: Chuck Lever @ 2019-08-26 17:12 UTC (permalink / raw)
  To: anna.schumaker; +Cc: linux-rdma, linux-nfs

Ensure that the re-establishment delay does not grow exponentially
on each good reconnect. This probably should have been part of
commit 675dd90ad093 ("xprtrdma: Modernize ops->connect").

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c  |    8 ++++++--
 net/sunrpc/xprtrdma/transport.c |    3 +--
 net/sunrpc/xprtrdma/verbs.c     |    2 ++
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 19dd29a..b86b5fd 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1261,8 +1261,6 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
 	struct rpc_rqst *rqst = rep->rr_rqst;
 	int status;
 
-	xprt->reestablish_timeout = 0;
-
 	switch (rep->rr_proc) {
 	case rdma_msg:
 		status = rpcrdma_decode_msg(r_xprt, rep, rqst);
@@ -1321,6 +1319,12 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 	u32 credits;
 	__be32 *p;
 
+	/* Any data means we had a useful conversation, so
+	 * then we don't need to delay the next reconnect.
+	 */
+	if (xprt->reestablish_timeout)
+		xprt->reestablish_timeout = 0;
+
 	/* Fixed transport header fields */
 	xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
 			rep->rr_hdrbuf.head[0].iov_base, NULL);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 993b96f..160558b 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -423,8 +423,6 @@ void xprt_rdma_close(struct rpc_xprt *xprt)
 
 	if (ep->rep_connected == -ENODEV)
 		return;
-	if (ep->rep_connected > 0)
-		xprt->reestablish_timeout = 0;
 	rpcrdma_ep_disconnect(ep, ia);
 
 	/* Prepare @xprt for the next connection by reinitializing
@@ -434,6 +432,7 @@ void xprt_rdma_close(struct rpc_xprt *xprt)
 	xprt->cwnd = RPC_CWNDSHIFT;
 
 out:
+	xprt->reestablish_timeout = 0;
 	++xprt->connect_cookie;
 	xprt_disconnect_done(xprt);
 }
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index ac2abf4..1dadc9e 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -731,6 +731,8 @@ void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
 	if (rc)
 		goto out;
 
+	if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
+		xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
 	wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
 	if (ep->rep_connected <= 0) {
 		if (ep->rep_connected == -EAGAIN)


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] xprtrdma: Send Queue size grows after a reconnect
  2019-08-26 17:12 [PATCH 0/3] Three additional fixes for v5.4 Chuck Lever
  2019-08-26 17:12 ` [PATCH 1/3] xprtrdma: Recycle MRs after disconnect Chuck Lever
  2019-08-26 17:12 ` [PATCH 2/3] xprtrdma: Clear xprt->reestablish_timeout on close Chuck Lever
@ 2019-08-26 17:12 ` Chuck Lever
  2 siblings, 0 replies; 4+ messages in thread
From: Chuck Lever @ 2019-08-26 17:12 UTC (permalink / raw)
  To: anna.schumaker; +Cc: linux-rdma, linux-nfs

Eli Dorfman reports that after a series of idle disconnects, an
RPC/RDMA transport becomes unusable (rdma_create_qp returns
-ENOMEM). Problem was tracked down to increasing Send Queue size
after each reconnect.

The rdma_create_qp() API does not promise to leave its @qp_init_attr
parameter unaltered. In fact, some drivers do modify one or more of
its fields. Thus our calls to rdma_create_qp must use a fresh copy
of ib_qp_init_attr each time.

This fix is appropriate for kernels dating back to late 2007, though
it will have to be adapted, as the connect code has changed over the
years.

Reported-by: Eli Dorfman <eli@vastdata.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/verbs.c |   26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 1dadc9e..7969457 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -606,10 +606,10 @@ void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
  * Unlike a normal reconnection, a fresh PD and a new set
  * of MRs and buffers is needed.
  */
-static int
-rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
-			 struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
+				    struct ib_qp_init_attr *qp_init_attr)
 {
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	int rc, err;
 
 	trace_xprtrdma_reinsert(r_xprt);
@@ -626,7 +626,7 @@ void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
 	}
 
 	rc = -ENETUNREACH;
-	err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+	err = rdma_create_qp(ia->ri_id, ia->ri_pd, qp_init_attr);
 	if (err) {
 		pr_err("rpcrdma: rdma_create_qp returned %d\n", err);
 		goto out3;
@@ -643,16 +643,16 @@ void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
 	return rc;
 }
 
-static int
-rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
-		     struct rpcrdma_ia *ia)
+static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt,
+				struct ib_qp_init_attr *qp_init_attr)
 {
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rdma_cm_id *id, *old;
 	int err, rc;
 
 	trace_xprtrdma_reconnect(r_xprt);
 
-	rpcrdma_ep_disconnect(ep, ia);
+	rpcrdma_ep_disconnect(&r_xprt->rx_ep, ia);
 
 	rc = -EHOSTUNREACH;
 	id = rpcrdma_create_id(r_xprt, ia);
@@ -674,7 +674,7 @@ void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
 		goto out_destroy;
 	}
 
-	err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
+	err = rdma_create_qp(id, ia->ri_pd, qp_init_attr);
 	if (err)
 		goto out_destroy;
 
@@ -699,25 +699,27 @@ void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
 	struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
 						   rx_ia);
 	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+	struct ib_qp_init_attr qp_init_attr;
 	int rc;
 
 retry:
+	memcpy(&qp_init_attr, &ep->rep_attr, sizeof(qp_init_attr));
 	switch (ep->rep_connected) {
 	case 0:
 		dprintk("RPC:       %s: connecting...\n", __func__);
-		rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+		rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &qp_init_attr);
 		if (rc) {
 			rc = -ENETUNREACH;
 			goto out_noupdate;
 		}
 		break;
 	case -ENODEV:
-		rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia);
+		rc = rpcrdma_ep_recreate_xprt(r_xprt, &qp_init_attr);
 		if (rc)
 			goto out_noupdate;
 		break;
 	default:
-		rc = rpcrdma_ep_reconnect(r_xprt, ep, ia);
+		rc = rpcrdma_ep_reconnect(r_xprt, &qp_init_attr);
 		if (rc)
 			goto out;
 	}


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-08-26 17:13 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-26 17:12 [PATCH 0/3] Three additional fixes for v5.4 Chuck Lever
2019-08-26 17:12 ` [PATCH 1/3] xprtrdma: Recycle MRs after disconnect Chuck Lever
2019-08-26 17:12 ` [PATCH 2/3] xprtrdma: Clear xprt->reestablish_timeout on close Chuck Lever
2019-08-26 17:12 ` [PATCH 3/3] xprtrdma: Send Queue size grows after a reconnect Chuck Lever

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).