All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH v1 4/5] xprtrdma: Reduce required number of send SGEs
Date: Fri, 13 Jan 2017 12:43:22 -0500	[thread overview]
Message-ID: <20170113174322.32692.66126.stgit@manet.1015granger.net> (raw)
In-Reply-To: <20170113173023.32692.30661.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>

The MAX_SEND_SGES check introduced in commit 655fec6987be
("xprtrdma: Use gathered Send for large inline messages") fails
for devices that have a small max_sge.

Instead of checking for a large fixed maximum number of SGEs,
check for a minimum small number. RPC-over-RDMA will switch to
using a Read chunk if an xdr_buf has more pages than can fit in
the device's max_sge limit. This is better than failing all
together to mount the server.

This fix supports devices that have as few as three send SGEs
available.

Reported-By: Selvin Xavier <selvin.xavier-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Reported-By: Devesh Sharma <devesh.sharma-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Reported-by: Honggang Li <honli-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Reported-by: Ram Amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
Fixes: 655fec6987be ("xprtrdma: Use gathered Send for large ...")
Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/rpc_rdma.c  |   23 ++++++++++++++++++++++-
 net/sunrpc/xprtrdma/verbs.c     |   13 +++++++------
 net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 4909758..ab699f9 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -126,13 +126,34 @@ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
  * plus the RPC call fit under the transport's inline limit. If the
  * combined call message size exceeds that limit, the client must use
  * the read chunk list for this operation.
+ *
+ * A read chunk is also required if sending the RPC call inline would
+ * exceed this device's max_sge limit.
  */
 static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
 				struct rpc_rqst *rqst)
 {
+	struct xdr_buf *xdr = &rqst->rq_snd_buf;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	unsigned int count, remaining, offset;
+
+	if (xdr->len > ia->ri_max_inline_write)
+		return false;
+
+	if (xdr->page_len) {
+		remaining = xdr->page_len;
+		offset = xdr->page_base & ~PAGE_MASK;
+		count = 0;
+		while (remaining) {
+			remaining -= min_t(unsigned int,
+					   PAGE_SIZE - offset, remaining);
+			offset = 0;
+			if (++count > ia->ri_max_sgeno)
+				return false;
+		}
+	}
 
-	return rqst->rq_snd_buf.len <= ia->ri_max_inline_write;
+	return true;
 }
 
 /* The client can't know how large the actual reply will be. Thus it
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 12e8242..5dcdd0b 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -488,18 +488,19 @@ static void rpcrdma_destroy_id(struct rdma_cm_id *id)
  */
 int
 rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
-				struct rpcrdma_create_data_internal *cdata)
+		  struct rpcrdma_create_data_internal *cdata)
 {
 	struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
+	unsigned int max_qp_wr, max_sge;
 	struct ib_cq *sendcq, *recvcq;
-	unsigned int max_qp_wr;
 	int rc;
 
-	if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) {
-		dprintk("RPC:       %s: insufficient sge's available\n",
-			__func__);
+	max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);
+	if (max_sge < 3) {
+		pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
 		return -ENOMEM;
 	}
+	ia->ri_max_sgeno = max_sge - 3;
 
 	if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
 		dprintk("RPC:       %s: insufficient wqe's available\n",
@@ -524,7 +525,7 @@ static void rpcrdma_destroy_id(struct rdma_cm_id *id)
 	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
 	ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
 	ep->rep_attr.cap.max_recv_wr += 1;	/* drain cqe */
-	ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES;
+	ep->rep_attr.cap.max_send_sge = max_sge;
 	ep->rep_attr.cap.max_recv_sge = 1;
 	ep->rep_attr.cap.max_inline_data = 0;
 	ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index f495df0c..c134d0b 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -74,6 +74,7 @@ struct rpcrdma_ia {
 	unsigned int		ri_max_frmr_depth;
 	unsigned int		ri_max_inline_write;
 	unsigned int		ri_max_inline_read;
+	unsigned int		ri_max_sgeno;
 	bool			ri_reminv_expected;
 	bool			ri_implicit_padding;
 	enum ib_mr_type		ri_mrtype;

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

WARNING: multiple messages have this Message-ID (diff)
From: Chuck Lever <chuck.lever@oracle.com>
To: linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org
Subject: [PATCH v1 4/5] xprtrdma: Reduce required number of send SGEs
Date: Fri, 13 Jan 2017 12:43:22 -0500	[thread overview]
Message-ID: <20170113174322.32692.66126.stgit@manet.1015granger.net> (raw)
In-Reply-To: <20170113173023.32692.30661.stgit@manet.1015granger.net>

The MAX_SEND_SGES check introduced in commit 655fec6987be
("xprtrdma: Use gathered Send for large inline messages") fails
for devices that have a small max_sge.

Instead of checking for a large fixed maximum number of SGEs,
check for a minimum small number. RPC-over-RDMA will switch to
using a Read chunk if an xdr_buf has more pages than can fit in
the device's max_sge limit. This is better than failing all
together to mount the server.

This fix supports devices that have as few as three send SGEs
available.

Reported-By: Selvin Xavier <selvin.xavier@broadcom.com>
Reported-By: Devesh Sharma <devesh.sharma@broadcom.com>
Reported-by: Honggang Li <honli@redhat.com>
Reported-by: Ram Amrani <Ram.Amrani@cavium.com>
Fixes: 655fec6987be ("xprtrdma: Use gathered Send for large ...")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c  |   23 ++++++++++++++++++++++-
 net/sunrpc/xprtrdma/verbs.c     |   13 +++++++------
 net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 4909758..ab699f9 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -126,13 +126,34 @@ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
  * plus the RPC call fit under the transport's inline limit. If the
  * combined call message size exceeds that limit, the client must use
  * the read chunk list for this operation.
+ *
+ * A read chunk is also required if sending the RPC call inline would
+ * exceed this device's max_sge limit.
  */
 static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
 				struct rpc_rqst *rqst)
 {
+	struct xdr_buf *xdr = &rqst->rq_snd_buf;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	unsigned int count, remaining, offset;
+
+	if (xdr->len > ia->ri_max_inline_write)
+		return false;
+
+	if (xdr->page_len) {
+		remaining = xdr->page_len;
+		offset = xdr->page_base & ~PAGE_MASK;
+		count = 0;
+		while (remaining) {
+			remaining -= min_t(unsigned int,
+					   PAGE_SIZE - offset, remaining);
+			offset = 0;
+			if (++count > ia->ri_max_sgeno)
+				return false;
+		}
+	}
 
-	return rqst->rq_snd_buf.len <= ia->ri_max_inline_write;
+	return true;
 }
 
 /* The client can't know how large the actual reply will be. Thus it
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 12e8242..5dcdd0b 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -488,18 +488,19 @@ static void rpcrdma_destroy_id(struct rdma_cm_id *id)
  */
 int
 rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
-				struct rpcrdma_create_data_internal *cdata)
+		  struct rpcrdma_create_data_internal *cdata)
 {
 	struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
+	unsigned int max_qp_wr, max_sge;
 	struct ib_cq *sendcq, *recvcq;
-	unsigned int max_qp_wr;
 	int rc;
 
-	if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) {
-		dprintk("RPC:       %s: insufficient sge's available\n",
-			__func__);
+	max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);
+	if (max_sge < 3) {
+		pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
 		return -ENOMEM;
 	}
+	ia->ri_max_sgeno = max_sge - 3;
 
 	if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
 		dprintk("RPC:       %s: insufficient wqe's available\n",
@@ -524,7 +525,7 @@ static void rpcrdma_destroy_id(struct rdma_cm_id *id)
 	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
 	ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
 	ep->rep_attr.cap.max_recv_wr += 1;	/* drain cqe */
-	ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES;
+	ep->rep_attr.cap.max_send_sge = max_sge;
 	ep->rep_attr.cap.max_recv_sge = 1;
 	ep->rep_attr.cap.max_inline_data = 0;
 	ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index f495df0c..c134d0b 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -74,6 +74,7 @@ struct rpcrdma_ia {
 	unsigned int		ri_max_frmr_depth;
 	unsigned int		ri_max_inline_write;
 	unsigned int		ri_max_inline_read;
+	unsigned int		ri_max_sgeno;
 	bool			ri_reminv_expected;
 	bool			ri_implicit_padding;
 	enum ib_mr_type		ri_mrtype;


  parent reply	other threads:[~2017-01-13 17:43 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-13 17:42 [PATCH v1 0/5] Fix "support large inline thresholds" Chuck Lever
2017-01-13 17:42 ` Chuck Lever
     [not found] ` <20170113173023.32692.30661.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2017-01-13 17:42   ` [PATCH v1 1/5] xprtrdma: Fix Read chunk padding Chuck Lever
2017-01-13 17:42     ` Chuck Lever
2017-01-13 17:43   ` [PATCH v1 2/5] xprtrdma: Per-connection pad optimization Chuck Lever
2017-01-13 17:43     ` Chuck Lever
2017-01-13 17:43   ` [PATCH v1 3/5] xprtrdma: Disable pad optimization by default Chuck Lever
2017-01-13 17:43     ` Chuck Lever
2017-01-13 17:43   ` Chuck Lever [this message]
2017-01-13 17:43     ` [PATCH v1 4/5] xprtrdma: Reduce required number of send SGEs Chuck Lever
     [not found]     ` <20170113174322.32692.66126.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2017-01-13 18:01       ` Parav Pandit
2017-01-13 18:01         ` Parav Pandit
     [not found]         ` <VI1PR0502MB30080923A8B244BBAD059358D1780-o1MPJYiShExKsLr+rGaxW8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2017-01-13 18:30           ` Chuck Lever
2017-01-13 18:30             ` Chuck Lever
     [not found]             ` <706404D7-A179-4E54-A2C7-534FCC1B5745-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2017-01-13 19:14               ` Parav Pandit
2017-01-13 19:14                 ` Parav Pandit
2017-01-13 17:43   ` [PATCH v1 5/5] xprtrdma: Shrink send SGEs array Chuck Lever
2017-01-13 17:43     ` Chuck Lever
2017-01-20 17:30   ` [PATCH v1 0/5] Fix "support large inline thresholds" Steve Wise
2017-01-20 17:30     ` Steve Wise
2017-01-20 18:17     ` Chuck Lever
2017-01-20 18:17       ` Chuck Lever

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170113174322.32692.66126.stgit@manet.1015granger.net \
    --to=chuck.lever-qhclzuegtsvqt0dzr+alfa@public.gmane.org \
    --cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.