All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V2] xprtrdma: mind the device's max fast register page list depth
@ 2014-04-12 14:40 ` Steve Wise
  0 siblings, 0 replies; 4+ messages in thread
From: Steve Wise @ 2014-04-12 14:40 UTC (permalink / raw)
  To: trond.myklebust-7I+n7zu2hftEKMMhf/gKZA
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	linux-nfs-u79uwXL29TY76Z2rM5mHXA,
	chuck.lever-QHcLZuEGTsvQT0dZR+AlfA

Some rdma devices don't support a fast register page list depth of
at least RPCRDMA_MAX_DATA_SEGS.  So xprtrdma needs to chunk its fast
register regions according to the minimum of the device max supported
depth or RPCRDMA_MAX_DATA_SEGS.

Changes since V1:

- removed useless BUG_ON()
- removed changes to rpcrdma_register_external() that were unnecessary

Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
---

 net/sunrpc/xprtrdma/rpc_rdma.c  |    4 ---
 net/sunrpc/xprtrdma/verbs.c     |   47 +++++++++++++++++++++++++++++----------
 net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
 3 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 96ead52..400aa1b 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -248,10 +248,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
 	/* success. all failures return above */
 	req->rl_nchunks = nchunks;
 
-	BUG_ON(nchunks == 0);
-	BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
-	       && (nchunks > 3));
-
 	/*
 	 * finish off header. If write, marshal discrim and nchunks.
 	 */
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 93726560..55fb09a 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -539,6 +539,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 				__func__);
 			memreg = RPCRDMA_REGISTER;
 #endif
+		} else {
+			/* Mind the ia limit on FRMR page list depth */
+			ia->ri_max_frmr_depth = min_t(unsigned int,
+				RPCRDMA_MAX_DATA_SEGS,
+				devattr.max_fast_reg_page_list_len);
 		}
 		break;
 	}
@@ -659,24 +664,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 	ep->rep_attr.srq = NULL;
 	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
 	switch (ia->ri_memreg_strategy) {
-	case RPCRDMA_FRMR:
+	case RPCRDMA_FRMR: {
+		int depth = 7;
+
 		/* Add room for frmr register and invalidate WRs.
 		 * 1. FRMR reg WR for head
 		 * 2. FRMR invalidate WR for head
-		 * 3. FRMR reg WR for pagelist
-		 * 4. FRMR invalidate WR for pagelist
+		 * 3. N FRMR reg WRs for pagelist
+		 * 4. N FRMR invalidate WRs for pagelist
 		 * 5. FRMR reg WR for tail
 		 * 6. FRMR invalidate WR for tail
 		 * 7. The RDMA_SEND WR
 		 */
-		ep->rep_attr.cap.max_send_wr *= 7;
+
+		/* Calculate N if the device max FRMR depth is smaller than
+		 * RPCRDMA_MAX_DATA_SEGS.
+		 */
+		if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
+			int delta = RPCRDMA_MAX_DATA_SEGS -
+				    ia->ri_max_frmr_depth;
+
+			do {
+				depth += 2; /* FRMR reg + invalidate */
+				delta -= ia->ri_max_frmr_depth;
+			} while (delta > 0);
+
+		}
+		ep->rep_attr.cap.max_send_wr *= depth;
 		if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
-			cdata->max_requests = devattr.max_qp_wr / 7;
+			cdata->max_requests = devattr.max_qp_wr / depth;
 			if (!cdata->max_requests)
 				return -EINVAL;
-			ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
+			ep->rep_attr.cap.max_send_wr = cdata->max_requests *
+						       depth;
 		}
 		break;
+	}
 	case RPCRDMA_MEMWINDOWS_ASYNC:
 	case RPCRDMA_MEMWINDOWS:
 		/* Add room for mw_binds+unbinds - overkill! */
@@ -1043,16 +1066,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
 	case RPCRDMA_FRMR:
 		for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
 			r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
-							 RPCRDMA_MAX_SEGS);
+						ia->ri_max_frmr_depth);
 			if (IS_ERR(r->r.frmr.fr_mr)) {
 				rc = PTR_ERR(r->r.frmr.fr_mr);
 				dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
 					" failed %i\n", __func__, rc);
 				goto out;
 			}
-			r->r.frmr.fr_pgl =
-				ib_alloc_fast_reg_page_list(ia->ri_id->device,
-							    RPCRDMA_MAX_SEGS);
+			r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
+						ia->ri_id->device,
+						ia->ri_max_frmr_depth);
 			if (IS_ERR(r->r.frmr.fr_pgl)) {
 				rc = PTR_ERR(r->r.frmr.fr_pgl);
 				dprintk("RPC:       %s: "
@@ -1498,8 +1521,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
 	seg1->mr_offset -= pageoff;	/* start of page */
 	seg1->mr_len += pageoff;
 	len = -pageoff;
-	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
-		*nsegs = RPCRDMA_MAX_DATA_SEGS;
+	if (*nsegs > ia->ri_max_frmr_depth)
+		*nsegs = ia->ri_max_frmr_depth;
 	for (page_no = i = 0; i < *nsegs;) {
 		rpcrdma_map_one(ia, seg, writing);
 		pa = seg->mr_dma;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cc1445d..98340a3 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -66,6 +66,7 @@ struct rpcrdma_ia {
 	struct completion	ri_done;
 	int			ri_async_rc;
 	enum rpcrdma_memreg	ri_memreg_strategy;
+	unsigned int		ri_max_frmr_depth;
 };
 
 /*

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH V2] xprtrdma: mind the device's max fast register page list depth
@ 2014-04-12 14:40 ` Steve Wise
  0 siblings, 0 replies; 4+ messages in thread
From: Steve Wise @ 2014-04-12 14:40 UTC (permalink / raw)
  To: trond.myklebust; +Cc: linux-rdma, linux-nfs, chuck.lever

Some rdma devices don't support a fast register page list depth of
at least RPCRDMA_MAX_DATA_SEGS.  So xprtrdma needs to chunk its fast
register regions according to the minimum of the device max supported
depth or RPCRDMA_MAX_DATA_SEGS.

Changes since V1:

- removed useless BUG_ON()
- removed changes to rpcrdma_register_external() that were unnecessary

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
---

 net/sunrpc/xprtrdma/rpc_rdma.c  |    4 ---
 net/sunrpc/xprtrdma/verbs.c     |   47 +++++++++++++++++++++++++++++----------
 net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
 3 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 96ead52..400aa1b 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -248,10 +248,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
 	/* success. all failures return above */
 	req->rl_nchunks = nchunks;
 
-	BUG_ON(nchunks == 0);
-	BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
-	       && (nchunks > 3));
-
 	/*
 	 * finish off header. If write, marshal discrim and nchunks.
 	 */
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 93726560..55fb09a 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -539,6 +539,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 				__func__);
 			memreg = RPCRDMA_REGISTER;
 #endif
+		} else {
+			/* Mind the ia limit on FRMR page list depth */
+			ia->ri_max_frmr_depth = min_t(unsigned int,
+				RPCRDMA_MAX_DATA_SEGS,
+				devattr.max_fast_reg_page_list_len);
 		}
 		break;
 	}
@@ -659,24 +664,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 	ep->rep_attr.srq = NULL;
 	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
 	switch (ia->ri_memreg_strategy) {
-	case RPCRDMA_FRMR:
+	case RPCRDMA_FRMR: {
+		int depth = 7;
+
 		/* Add room for frmr register and invalidate WRs.
 		 * 1. FRMR reg WR for head
 		 * 2. FRMR invalidate WR for head
-		 * 3. FRMR reg WR for pagelist
-		 * 4. FRMR invalidate WR for pagelist
+		 * 3. N FRMR reg WRs for pagelist
+		 * 4. N FRMR invalidate WRs for pagelist
 		 * 5. FRMR reg WR for tail
 		 * 6. FRMR invalidate WR for tail
 		 * 7. The RDMA_SEND WR
 		 */
-		ep->rep_attr.cap.max_send_wr *= 7;
+
+		/* Calculate N if the device max FRMR depth is smaller than
+		 * RPCRDMA_MAX_DATA_SEGS.
+		 */
+		if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
+			int delta = RPCRDMA_MAX_DATA_SEGS -
+				    ia->ri_max_frmr_depth;
+
+			do {
+				depth += 2; /* FRMR reg + invalidate */
+				delta -= ia->ri_max_frmr_depth;
+			} while (delta > 0);
+
+		}
+		ep->rep_attr.cap.max_send_wr *= depth;
 		if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
-			cdata->max_requests = devattr.max_qp_wr / 7;
+			cdata->max_requests = devattr.max_qp_wr / depth;
 			if (!cdata->max_requests)
 				return -EINVAL;
-			ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
+			ep->rep_attr.cap.max_send_wr = cdata->max_requests *
+						       depth;
 		}
 		break;
+	}
 	case RPCRDMA_MEMWINDOWS_ASYNC:
 	case RPCRDMA_MEMWINDOWS:
 		/* Add room for mw_binds+unbinds - overkill! */
@@ -1043,16 +1066,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
 	case RPCRDMA_FRMR:
 		for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
 			r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
-							 RPCRDMA_MAX_SEGS);
+						ia->ri_max_frmr_depth);
 			if (IS_ERR(r->r.frmr.fr_mr)) {
 				rc = PTR_ERR(r->r.frmr.fr_mr);
 				dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
 					" failed %i\n", __func__, rc);
 				goto out;
 			}
-			r->r.frmr.fr_pgl =
-				ib_alloc_fast_reg_page_list(ia->ri_id->device,
-							    RPCRDMA_MAX_SEGS);
+			r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
+						ia->ri_id->device,
+						ia->ri_max_frmr_depth);
 			if (IS_ERR(r->r.frmr.fr_pgl)) {
 				rc = PTR_ERR(r->r.frmr.fr_pgl);
 				dprintk("RPC:       %s: "
@@ -1498,8 +1521,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
 	seg1->mr_offset -= pageoff;	/* start of page */
 	seg1->mr_len += pageoff;
 	len = -pageoff;
-	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
-		*nsegs = RPCRDMA_MAX_DATA_SEGS;
+	if (*nsegs > ia->ri_max_frmr_depth)
+		*nsegs = ia->ri_max_frmr_depth;
 	for (page_no = i = 0; i < *nsegs;) {
 		rpcrdma_map_one(ia, seg, writing);
 		pa = seg->mr_dma;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cc1445d..98340a3 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -66,6 +66,7 @@ struct rpcrdma_ia {
 	struct completion	ri_done;
 	int			ri_async_rc;
 	enum rpcrdma_memreg	ri_memreg_strategy;
+	unsigned int		ri_max_frmr_depth;
 };
 
 /*


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH V2] xprtrdma: mind the device's max fast register page list depth
  2014-04-12 14:40 ` Steve Wise
@ 2014-04-14 14:31     ` Chuck Lever
  -1 siblings, 0 replies; 4+ messages in thread
From: Chuck Lever @ 2014-04-14 14:31 UTC (permalink / raw)
  To: Steve Wise
  Cc: Trond Myklebust, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	Linux NFS Mailing List


On Apr 12, 2014, at 10:40 AM, Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org> wrote:

> Some rdma devices don't support a fast register page list depth of
> at least RPCRDMA_MAX_DATA_SEGS.  So xprtrdma needs to chunk its fast
> register regions according to the minimum of the device max supported
> depth or RPCRDMA_MAX_DATA_SEGS.
> 
> Changes since V1:
> 
> - removed useless BUG_ON()
> - removed changes to rpcrdma_register_external() that were unnecessary
> 
> Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>

Reviewed-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>

> ---
> 
> net/sunrpc/xprtrdma/rpc_rdma.c  |    4 ---
> net/sunrpc/xprtrdma/verbs.c     |   47 +++++++++++++++++++++++++++++----------
> net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
> 3 files changed, 36 insertions(+), 16 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index 96ead52..400aa1b 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -248,10 +248,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
> 	/* success. all failures return above */
> 	req->rl_nchunks = nchunks;
> 
> -	BUG_ON(nchunks == 0);
> -	BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
> -	       && (nchunks > 3));
> -
> 	/*
> 	 * finish off header. If write, marshal discrim and nchunks.
> 	 */
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 93726560..55fb09a 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -539,6 +539,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
> 				__func__);
> 			memreg = RPCRDMA_REGISTER;
> #endif
> +		} else {
> +			/* Mind the ia limit on FRMR page list depth */
> +			ia->ri_max_frmr_depth = min_t(unsigned int,
> +				RPCRDMA_MAX_DATA_SEGS,
> +				devattr.max_fast_reg_page_list_len);
> 		}
> 		break;
> 	}
> @@ -659,24 +664,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
> 	ep->rep_attr.srq = NULL;
> 	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
> 	switch (ia->ri_memreg_strategy) {
> -	case RPCRDMA_FRMR:
> +	case RPCRDMA_FRMR: {
> +		int depth = 7;
> +
> 		/* Add room for frmr register and invalidate WRs.
> 		 * 1. FRMR reg WR for head
> 		 * 2. FRMR invalidate WR for head
> -		 * 3. FRMR reg WR for pagelist
> -		 * 4. FRMR invalidate WR for pagelist
> +		 * 3. N FRMR reg WRs for pagelist
> +		 * 4. N FRMR invalidate WRs for pagelist
> 		 * 5. FRMR reg WR for tail
> 		 * 6. FRMR invalidate WR for tail
> 		 * 7. The RDMA_SEND WR
> 		 */
> -		ep->rep_attr.cap.max_send_wr *= 7;
> +
> +		/* Calculate N if the device max FRMR depth is smaller than
> +		 * RPCRDMA_MAX_DATA_SEGS.
> +		 */
> +		if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
> +			int delta = RPCRDMA_MAX_DATA_SEGS -
> +				    ia->ri_max_frmr_depth;
> +
> +			do {
> +				depth += 2; /* FRMR reg + invalidate */
> +				delta -= ia->ri_max_frmr_depth;
> +			} while (delta > 0);
> +
> +		}
> +		ep->rep_attr.cap.max_send_wr *= depth;
> 		if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
> -			cdata->max_requests = devattr.max_qp_wr / 7;
> +			cdata->max_requests = devattr.max_qp_wr / depth;
> 			if (!cdata->max_requests)
> 				return -EINVAL;
> -			ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
> +			ep->rep_attr.cap.max_send_wr = cdata->max_requests *
> +						       depth;
> 		}
> 		break;
> +	}
> 	case RPCRDMA_MEMWINDOWS_ASYNC:
> 	case RPCRDMA_MEMWINDOWS:
> 		/* Add room for mw_binds+unbinds - overkill! */
> @@ -1043,16 +1066,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
> 	case RPCRDMA_FRMR:
> 		for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
> 			r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
> -							 RPCRDMA_MAX_SEGS);
> +						ia->ri_max_frmr_depth);
> 			if (IS_ERR(r->r.frmr.fr_mr)) {
> 				rc = PTR_ERR(r->r.frmr.fr_mr);
> 				dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
> 					" failed %i\n", __func__, rc);
> 				goto out;
> 			}
> -			r->r.frmr.fr_pgl =
> -				ib_alloc_fast_reg_page_list(ia->ri_id->device,
> -							    RPCRDMA_MAX_SEGS);
> +			r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
> +						ia->ri_id->device,
> +						ia->ri_max_frmr_depth);
> 			if (IS_ERR(r->r.frmr.fr_pgl)) {
> 				rc = PTR_ERR(r->r.frmr.fr_pgl);
> 				dprintk("RPC:       %s: "
> @@ -1498,8 +1521,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
> 	seg1->mr_offset -= pageoff;	/* start of page */
> 	seg1->mr_len += pageoff;
> 	len = -pageoff;
> -	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
> -		*nsegs = RPCRDMA_MAX_DATA_SEGS;
> +	if (*nsegs > ia->ri_max_frmr_depth)
> +		*nsegs = ia->ri_max_frmr_depth;
> 	for (page_no = i = 0; i < *nsegs;) {
> 		rpcrdma_map_one(ia, seg, writing);
> 		pa = seg->mr_dma;
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index cc1445d..98340a3 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -66,6 +66,7 @@ struct rpcrdma_ia {
> 	struct completion	ri_done;
> 	int			ri_async_rc;
> 	enum rpcrdma_memreg	ri_memreg_strategy;
> +	unsigned int		ri_max_frmr_depth;
> };
> 
> /*
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH V2] xprtrdma: mind the device's max fast register page list depth
@ 2014-04-14 14:31     ` Chuck Lever
  0 siblings, 0 replies; 4+ messages in thread
From: Chuck Lever @ 2014-04-14 14:31 UTC (permalink / raw)
  To: Steve Wise; +Cc: Trond Myklebust, linux-rdma, Linux NFS Mailing List


On Apr 12, 2014, at 10:40 AM, Steve Wise <swise@opengridcomputing.com> wrote:

> Some rdma devices don't support a fast register page list depth of
> at least RPCRDMA_MAX_DATA_SEGS.  So xprtrdma needs to chunk its fast
> register regions according to the minimum of the device max supported
> depth or RPCRDMA_MAX_DATA_SEGS.
> 
> Changes since V1:
> 
> - removed useless BUG_ON()
> - removed changes to rpcrdma_register_external() that were unnecessary
> 
> Signed-off-by: Steve Wise <swise@opengridcomputing.com>

Reviewed-by: Chuck Lever <chuck.lever@oracle.com>

> ---
> 
> net/sunrpc/xprtrdma/rpc_rdma.c  |    4 ---
> net/sunrpc/xprtrdma/verbs.c     |   47 +++++++++++++++++++++++++++++----------
> net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
> 3 files changed, 36 insertions(+), 16 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index 96ead52..400aa1b 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -248,10 +248,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
> 	/* success. all failures return above */
> 	req->rl_nchunks = nchunks;
> 
> -	BUG_ON(nchunks == 0);
> -	BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
> -	       && (nchunks > 3));
> -
> 	/*
> 	 * finish off header. If write, marshal discrim and nchunks.
> 	 */
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 93726560..55fb09a 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -539,6 +539,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
> 				__func__);
> 			memreg = RPCRDMA_REGISTER;
> #endif
> +		} else {
> +			/* Mind the ia limit on FRMR page list depth */
> +			ia->ri_max_frmr_depth = min_t(unsigned int,
> +				RPCRDMA_MAX_DATA_SEGS,
> +				devattr.max_fast_reg_page_list_len);
> 		}
> 		break;
> 	}
> @@ -659,24 +664,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
> 	ep->rep_attr.srq = NULL;
> 	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
> 	switch (ia->ri_memreg_strategy) {
> -	case RPCRDMA_FRMR:
> +	case RPCRDMA_FRMR: {
> +		int depth = 7;
> +
> 		/* Add room for frmr register and invalidate WRs.
> 		 * 1. FRMR reg WR for head
> 		 * 2. FRMR invalidate WR for head
> -		 * 3. FRMR reg WR for pagelist
> -		 * 4. FRMR invalidate WR for pagelist
> +		 * 3. N FRMR reg WRs for pagelist
> +		 * 4. N FRMR invalidate WRs for pagelist
> 		 * 5. FRMR reg WR for tail
> 		 * 6. FRMR invalidate WR for tail
> 		 * 7. The RDMA_SEND WR
> 		 */
> -		ep->rep_attr.cap.max_send_wr *= 7;
> +
> +		/* Calculate N if the device max FRMR depth is smaller than
> +		 * RPCRDMA_MAX_DATA_SEGS.
> +		 */
> +		if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
> +			int delta = RPCRDMA_MAX_DATA_SEGS -
> +				    ia->ri_max_frmr_depth;
> +
> +			do {
> +				depth += 2; /* FRMR reg + invalidate */
> +				delta -= ia->ri_max_frmr_depth;
> +			} while (delta > 0);
> +
> +		}
> +		ep->rep_attr.cap.max_send_wr *= depth;
> 		if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
> -			cdata->max_requests = devattr.max_qp_wr / 7;
> +			cdata->max_requests = devattr.max_qp_wr / depth;
> 			if (!cdata->max_requests)
> 				return -EINVAL;
> -			ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
> +			ep->rep_attr.cap.max_send_wr = cdata->max_requests *
> +						       depth;
> 		}
> 		break;
> +	}
> 	case RPCRDMA_MEMWINDOWS_ASYNC:
> 	case RPCRDMA_MEMWINDOWS:
> 		/* Add room for mw_binds+unbinds - overkill! */
> @@ -1043,16 +1066,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
> 	case RPCRDMA_FRMR:
> 		for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
> 			r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
> -							 RPCRDMA_MAX_SEGS);
> +						ia->ri_max_frmr_depth);
> 			if (IS_ERR(r->r.frmr.fr_mr)) {
> 				rc = PTR_ERR(r->r.frmr.fr_mr);
> 				dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
> 					" failed %i\n", __func__, rc);
> 				goto out;
> 			}
> -			r->r.frmr.fr_pgl =
> -				ib_alloc_fast_reg_page_list(ia->ri_id->device,
> -							    RPCRDMA_MAX_SEGS);
> +			r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
> +						ia->ri_id->device,
> +						ia->ri_max_frmr_depth);
> 			if (IS_ERR(r->r.frmr.fr_pgl)) {
> 				rc = PTR_ERR(r->r.frmr.fr_pgl);
> 				dprintk("RPC:       %s: "
> @@ -1498,8 +1521,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
> 	seg1->mr_offset -= pageoff;	/* start of page */
> 	seg1->mr_len += pageoff;
> 	len = -pageoff;
> -	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
> -		*nsegs = RPCRDMA_MAX_DATA_SEGS;
> +	if (*nsegs > ia->ri_max_frmr_depth)
> +		*nsegs = ia->ri_max_frmr_depth;
> 	for (page_no = i = 0; i < *nsegs;) {
> 		rpcrdma_map_one(ia, seg, writing);
> 		pa = seg->mr_dma;
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index cc1445d..98340a3 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -66,6 +66,7 @@ struct rpcrdma_ia {
> 	struct completion	ri_done;
> 	int			ri_async_rc;
> 	enum rpcrdma_memreg	ri_memreg_strategy;
> +	unsigned int		ri_max_frmr_depth;
> };
> 
> /*
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com




^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2014-04-14 14:31 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-12 14:40 [PATCH V2] xprtrdma: mind the device's max fast register page list depth Steve Wise
2014-04-12 14:40 ` Steve Wise
     [not found] ` <20140412144008.4553.33418.stgit-T4OLL4TyM9aNDNWfRnPdfg@public.gmane.org>
2014-04-14 14:31   ` Chuck Lever
2014-04-14 14:31     ` Chuck Lever

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.