* [PATCH V2] xprtrdma: mind the device's max fast register page list depth
@ 2014-04-12 14:40 ` Steve Wise
0 siblings, 0 replies; 4+ messages in thread
From: Steve Wise @ 2014-04-12 14:40 UTC (permalink / raw)
To: trond.myklebust-7I+n7zu2hftEKMMhf/gKZA
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nfs-u79uwXL29TY76Z2rM5mHXA,
chuck.lever-QHcLZuEGTsvQT0dZR+AlfA
Some rdma devices don't support a fast register page list depth of
at least RPCRDMA_MAX_DATA_SEGS. So xprtrdma needs to chunk its fast
register regions according to the minimum of the device max supported
depth or RPCRDMA_MAX_DATA_SEGS.
Changes since V1:
- removed useless BUG_ON()
- removed changes to rpcrdma_register_external() that were unnecessary
Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
---
net/sunrpc/xprtrdma/rpc_rdma.c | 4 ---
net/sunrpc/xprtrdma/verbs.c | 47 +++++++++++++++++++++++++++++----------
net/sunrpc/xprtrdma/xprt_rdma.h | 1 +
3 files changed, 36 insertions(+), 16 deletions(-)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 96ead52..400aa1b 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -248,10 +248,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
/* success. all failures return above */
req->rl_nchunks = nchunks;
- BUG_ON(nchunks == 0);
- BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
- && (nchunks > 3));
-
/*
* finish off header. If write, marshal discrim and nchunks.
*/
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 93726560..55fb09a 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -539,6 +539,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
__func__);
memreg = RPCRDMA_REGISTER;
#endif
+ } else {
+ /* Mind the ia limit on FRMR page list depth */
+ ia->ri_max_frmr_depth = min_t(unsigned int,
+ RPCRDMA_MAX_DATA_SEGS,
+ devattr.max_fast_reg_page_list_len);
}
break;
}
@@ -659,24 +664,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
+ case RPCRDMA_FRMR: {
+ int depth = 7;
+
/* Add room for frmr register and invalidate WRs.
* 1. FRMR reg WR for head
* 2. FRMR invalidate WR for head
- * 3. FRMR reg WR for pagelist
- * 4. FRMR invalidate WR for pagelist
+ * 3. N FRMR reg WRs for pagelist
+ * 4. N FRMR invalidate WRs for pagelist
* 5. FRMR reg WR for tail
* 6. FRMR invalidate WR for tail
* 7. The RDMA_SEND WR
*/
- ep->rep_attr.cap.max_send_wr *= 7;
+
+ /* Calculate N if the device max FRMR depth is smaller than
+ * RPCRDMA_MAX_DATA_SEGS.
+ */
+ if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
+ int delta = RPCRDMA_MAX_DATA_SEGS -
+ ia->ri_max_frmr_depth;
+
+ do {
+ depth += 2; /* FRMR reg + invalidate */
+ delta -= ia->ri_max_frmr_depth;
+ } while (delta > 0);
+
+ }
+ ep->rep_attr.cap.max_send_wr *= depth;
if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
- cdata->max_requests = devattr.max_qp_wr / 7;
+ cdata->max_requests = devattr.max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
- ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests *
+ depth;
}
break;
+ }
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
/* Add room for mw_binds+unbinds - overkill! */
@@ -1043,16 +1066,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
case RPCRDMA_FRMR:
for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
- RPCRDMA_MAX_SEGS);
+ ia->ri_max_frmr_depth);
if (IS_ERR(r->r.frmr.fr_mr)) {
rc = PTR_ERR(r->r.frmr.fr_mr);
dprintk("RPC: %s: ib_alloc_fast_reg_mr"
" failed %i\n", __func__, rc);
goto out;
}
- r->r.frmr.fr_pgl =
- ib_alloc_fast_reg_page_list(ia->ri_id->device,
- RPCRDMA_MAX_SEGS);
+ r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
+ ia->ri_id->device,
+ ia->ri_max_frmr_depth);
if (IS_ERR(r->r.frmr.fr_pgl)) {
rc = PTR_ERR(r->r.frmr.fr_pgl);
dprintk("RPC: %s: "
@@ -1498,8 +1521,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
seg1->mr_offset -= pageoff; /* start of page */
seg1->mr_len += pageoff;
len = -pageoff;
- if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
- *nsegs = RPCRDMA_MAX_DATA_SEGS;
+ if (*nsegs > ia->ri_max_frmr_depth)
+ *nsegs = ia->ri_max_frmr_depth;
for (page_no = i = 0; i < *nsegs;) {
rpcrdma_map_one(ia, seg, writing);
pa = seg->mr_dma;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cc1445d..98340a3 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -66,6 +66,7 @@ struct rpcrdma_ia {
struct completion ri_done;
int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy;
+ unsigned int ri_max_frmr_depth;
};
/*
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH V2] xprtrdma: mind the device's max fast register page list depth
@ 2014-04-12 14:40 ` Steve Wise
0 siblings, 0 replies; 4+ messages in thread
From: Steve Wise @ 2014-04-12 14:40 UTC (permalink / raw)
To: trond.myklebust; +Cc: linux-rdma, linux-nfs, chuck.lever
Some rdma devices don't support a fast register page list depth of
at least RPCRDMA_MAX_DATA_SEGS. So xprtrdma needs to chunk its fast
register regions according to the minimum of the device max supported
depth or RPCRDMA_MAX_DATA_SEGS.
Changes since V1:
- removed useless BUG_ON()
- removed changes to rpcrdma_register_external() that were unnecessary
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
---
net/sunrpc/xprtrdma/rpc_rdma.c | 4 ---
net/sunrpc/xprtrdma/verbs.c | 47 +++++++++++++++++++++++++++++----------
net/sunrpc/xprtrdma/xprt_rdma.h | 1 +
3 files changed, 36 insertions(+), 16 deletions(-)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 96ead52..400aa1b 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -248,10 +248,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
/* success. all failures return above */
req->rl_nchunks = nchunks;
- BUG_ON(nchunks == 0);
- BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
- && (nchunks > 3));
-
/*
* finish off header. If write, marshal discrim and nchunks.
*/
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 93726560..55fb09a 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -539,6 +539,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
__func__);
memreg = RPCRDMA_REGISTER;
#endif
+ } else {
+ /* Mind the ia limit on FRMR page list depth */
+ ia->ri_max_frmr_depth = min_t(unsigned int,
+ RPCRDMA_MAX_DATA_SEGS,
+ devattr.max_fast_reg_page_list_len);
}
break;
}
@@ -659,24 +664,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
+ case RPCRDMA_FRMR: {
+ int depth = 7;
+
/* Add room for frmr register and invalidate WRs.
* 1. FRMR reg WR for head
* 2. FRMR invalidate WR for head
- * 3. FRMR reg WR for pagelist
- * 4. FRMR invalidate WR for pagelist
+ * 3. N FRMR reg WRs for pagelist
+ * 4. N FRMR invalidate WRs for pagelist
* 5. FRMR reg WR for tail
* 6. FRMR invalidate WR for tail
* 7. The RDMA_SEND WR
*/
- ep->rep_attr.cap.max_send_wr *= 7;
+
+ /* Calculate N if the device max FRMR depth is smaller than
+ * RPCRDMA_MAX_DATA_SEGS.
+ */
+ if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
+ int delta = RPCRDMA_MAX_DATA_SEGS -
+ ia->ri_max_frmr_depth;
+
+ do {
+ depth += 2; /* FRMR reg + invalidate */
+ delta -= ia->ri_max_frmr_depth;
+ } while (delta > 0);
+
+ }
+ ep->rep_attr.cap.max_send_wr *= depth;
if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
- cdata->max_requests = devattr.max_qp_wr / 7;
+ cdata->max_requests = devattr.max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
- ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests *
+ depth;
}
break;
+ }
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
/* Add room for mw_binds+unbinds - overkill! */
@@ -1043,16 +1066,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
case RPCRDMA_FRMR:
for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
- RPCRDMA_MAX_SEGS);
+ ia->ri_max_frmr_depth);
if (IS_ERR(r->r.frmr.fr_mr)) {
rc = PTR_ERR(r->r.frmr.fr_mr);
dprintk("RPC: %s: ib_alloc_fast_reg_mr"
" failed %i\n", __func__, rc);
goto out;
}
- r->r.frmr.fr_pgl =
- ib_alloc_fast_reg_page_list(ia->ri_id->device,
- RPCRDMA_MAX_SEGS);
+ r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
+ ia->ri_id->device,
+ ia->ri_max_frmr_depth);
if (IS_ERR(r->r.frmr.fr_pgl)) {
rc = PTR_ERR(r->r.frmr.fr_pgl);
dprintk("RPC: %s: "
@@ -1498,8 +1521,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
seg1->mr_offset -= pageoff; /* start of page */
seg1->mr_len += pageoff;
len = -pageoff;
- if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
- *nsegs = RPCRDMA_MAX_DATA_SEGS;
+ if (*nsegs > ia->ri_max_frmr_depth)
+ *nsegs = ia->ri_max_frmr_depth;
for (page_no = i = 0; i < *nsegs;) {
rpcrdma_map_one(ia, seg, writing);
pa = seg->mr_dma;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cc1445d..98340a3 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -66,6 +66,7 @@ struct rpcrdma_ia {
struct completion ri_done;
int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy;
+ unsigned int ri_max_frmr_depth;
};
/*
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH V2] xprtrdma: mind the device's max fast register page list depth
2014-04-12 14:40 ` Steve Wise
@ 2014-04-14 14:31 ` Chuck Lever
-1 siblings, 0 replies; 4+ messages in thread
From: Chuck Lever @ 2014-04-14 14:31 UTC (permalink / raw)
To: Steve Wise
Cc: Trond Myklebust, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
Linux NFS Mailing List
On Apr 12, 2014, at 10:40 AM, Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org> wrote:
> Some rdma devices don't support a fast register page list depth of
> at least RPCRDMA_MAX_DATA_SEGS. So xprtrdma needs to chunk its fast
> register regions according to the minimum of the device max supported
> depth or RPCRDMA_MAX_DATA_SEGS.
>
> Changes since V1:
>
> - removed useless BUG_ON()
> - removed changes to rpcrdma_register_external() that were unnecessary
>
> Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
Reviewed-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>
> net/sunrpc/xprtrdma/rpc_rdma.c | 4 ---
> net/sunrpc/xprtrdma/verbs.c | 47 +++++++++++++++++++++++++++++----------
> net/sunrpc/xprtrdma/xprt_rdma.h | 1 +
> 3 files changed, 36 insertions(+), 16 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index 96ead52..400aa1b 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -248,10 +248,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
> /* success. all failures return above */
> req->rl_nchunks = nchunks;
>
> - BUG_ON(nchunks == 0);
> - BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
> - && (nchunks > 3));
> -
> /*
> * finish off header. If write, marshal discrim and nchunks.
> */
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 93726560..55fb09a 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -539,6 +539,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
> __func__);
> memreg = RPCRDMA_REGISTER;
> #endif
> + } else {
> + /* Mind the ia limit on FRMR page list depth */
> + ia->ri_max_frmr_depth = min_t(unsigned int,
> + RPCRDMA_MAX_DATA_SEGS,
> + devattr.max_fast_reg_page_list_len);
> }
> break;
> }
> @@ -659,24 +664,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
> ep->rep_attr.srq = NULL;
> ep->rep_attr.cap.max_send_wr = cdata->max_requests;
> switch (ia->ri_memreg_strategy) {
> - case RPCRDMA_FRMR:
> + case RPCRDMA_FRMR: {
> + int depth = 7;
> +
> /* Add room for frmr register and invalidate WRs.
> * 1. FRMR reg WR for head
> * 2. FRMR invalidate WR for head
> - * 3. FRMR reg WR for pagelist
> - * 4. FRMR invalidate WR for pagelist
> + * 3. N FRMR reg WRs for pagelist
> + * 4. N FRMR invalidate WRs for pagelist
> * 5. FRMR reg WR for tail
> * 6. FRMR invalidate WR for tail
> * 7. The RDMA_SEND WR
> */
> - ep->rep_attr.cap.max_send_wr *= 7;
> +
> + /* Calculate N if the device max FRMR depth is smaller than
> + * RPCRDMA_MAX_DATA_SEGS.
> + */
> + if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
> + int delta = RPCRDMA_MAX_DATA_SEGS -
> + ia->ri_max_frmr_depth;
> +
> + do {
> + depth += 2; /* FRMR reg + invalidate */
> + delta -= ia->ri_max_frmr_depth;
> + } while (delta > 0);
> +
> + }
> + ep->rep_attr.cap.max_send_wr *= depth;
> if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
> - cdata->max_requests = devattr.max_qp_wr / 7;
> + cdata->max_requests = devattr.max_qp_wr / depth;
> if (!cdata->max_requests)
> return -EINVAL;
> - ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
> + ep->rep_attr.cap.max_send_wr = cdata->max_requests *
> + depth;
> }
> break;
> + }
> case RPCRDMA_MEMWINDOWS_ASYNC:
> case RPCRDMA_MEMWINDOWS:
> /* Add room for mw_binds+unbinds - overkill! */
> @@ -1043,16 +1066,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
> case RPCRDMA_FRMR:
> for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
> r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
> - RPCRDMA_MAX_SEGS);
> + ia->ri_max_frmr_depth);
> if (IS_ERR(r->r.frmr.fr_mr)) {
> rc = PTR_ERR(r->r.frmr.fr_mr);
> dprintk("RPC: %s: ib_alloc_fast_reg_mr"
> " failed %i\n", __func__, rc);
> goto out;
> }
> - r->r.frmr.fr_pgl =
> - ib_alloc_fast_reg_page_list(ia->ri_id->device,
> - RPCRDMA_MAX_SEGS);
> + r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
> + ia->ri_id->device,
> + ia->ri_max_frmr_depth);
> if (IS_ERR(r->r.frmr.fr_pgl)) {
> rc = PTR_ERR(r->r.frmr.fr_pgl);
> dprintk("RPC: %s: "
> @@ -1498,8 +1521,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
> seg1->mr_offset -= pageoff; /* start of page */
> seg1->mr_len += pageoff;
> len = -pageoff;
> - if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
> - *nsegs = RPCRDMA_MAX_DATA_SEGS;
> + if (*nsegs > ia->ri_max_frmr_depth)
> + *nsegs = ia->ri_max_frmr_depth;
> for (page_no = i = 0; i < *nsegs;) {
> rpcrdma_map_one(ia, seg, writing);
> pa = seg->mr_dma;
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index cc1445d..98340a3 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -66,6 +66,7 @@ struct rpcrdma_ia {
> struct completion ri_done;
> int ri_async_rc;
> enum rpcrdma_memreg ri_memreg_strategy;
> + unsigned int ri_max_frmr_depth;
> };
>
> /*
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH V2] xprtrdma: mind the device's max fast register page list depth
@ 2014-04-14 14:31 ` Chuck Lever
0 siblings, 0 replies; 4+ messages in thread
From: Chuck Lever @ 2014-04-14 14:31 UTC (permalink / raw)
To: Steve Wise; +Cc: Trond Myklebust, linux-rdma, Linux NFS Mailing List
On Apr 12, 2014, at 10:40 AM, Steve Wise <swise@opengridcomputing.com> wrote:
> Some rdma devices don't support a fast register page list depth of
> at least RPCRDMA_MAX_DATA_SEGS. So xprtrdma needs to chunk its fast
> register regions according to the minimum of the device max supported
> depth or RPCRDMA_MAX_DATA_SEGS.
>
> Changes since V1:
>
> - removed useless BUG_ON()
> - removed changes to rpcrdma_register_external() that were unnecessary
>
> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>
> net/sunrpc/xprtrdma/rpc_rdma.c | 4 ---
> net/sunrpc/xprtrdma/verbs.c | 47 +++++++++++++++++++++++++++++----------
> net/sunrpc/xprtrdma/xprt_rdma.h | 1 +
> 3 files changed, 36 insertions(+), 16 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index 96ead52..400aa1b 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -248,10 +248,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
> /* success. all failures return above */
> req->rl_nchunks = nchunks;
>
> - BUG_ON(nchunks == 0);
> - BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
> - && (nchunks > 3));
> -
> /*
> * finish off header. If write, marshal discrim and nchunks.
> */
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 93726560..55fb09a 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -539,6 +539,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
> __func__);
> memreg = RPCRDMA_REGISTER;
> #endif
> + } else {
> + /* Mind the ia limit on FRMR page list depth */
> + ia->ri_max_frmr_depth = min_t(unsigned int,
> + RPCRDMA_MAX_DATA_SEGS,
> + devattr.max_fast_reg_page_list_len);
> }
> break;
> }
> @@ -659,24 +664,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
> ep->rep_attr.srq = NULL;
> ep->rep_attr.cap.max_send_wr = cdata->max_requests;
> switch (ia->ri_memreg_strategy) {
> - case RPCRDMA_FRMR:
> + case RPCRDMA_FRMR: {
> + int depth = 7;
> +
> /* Add room for frmr register and invalidate WRs.
> * 1. FRMR reg WR for head
> * 2. FRMR invalidate WR for head
> - * 3. FRMR reg WR for pagelist
> - * 4. FRMR invalidate WR for pagelist
> + * 3. N FRMR reg WRs for pagelist
> + * 4. N FRMR invalidate WRs for pagelist
> * 5. FRMR reg WR for tail
> * 6. FRMR invalidate WR for tail
> * 7. The RDMA_SEND WR
> */
> - ep->rep_attr.cap.max_send_wr *= 7;
> +
> + /* Calculate N if the device max FRMR depth is smaller than
> + * RPCRDMA_MAX_DATA_SEGS.
> + */
> + if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
> + int delta = RPCRDMA_MAX_DATA_SEGS -
> + ia->ri_max_frmr_depth;
> +
> + do {
> + depth += 2; /* FRMR reg + invalidate */
> + delta -= ia->ri_max_frmr_depth;
> + } while (delta > 0);
> +
> + }
> + ep->rep_attr.cap.max_send_wr *= depth;
> if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
> - cdata->max_requests = devattr.max_qp_wr / 7;
> + cdata->max_requests = devattr.max_qp_wr / depth;
> if (!cdata->max_requests)
> return -EINVAL;
> - ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
> + ep->rep_attr.cap.max_send_wr = cdata->max_requests *
> + depth;
> }
> break;
> + }
> case RPCRDMA_MEMWINDOWS_ASYNC:
> case RPCRDMA_MEMWINDOWS:
> /* Add room for mw_binds+unbinds - overkill! */
> @@ -1043,16 +1066,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
> case RPCRDMA_FRMR:
> for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
> r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
> - RPCRDMA_MAX_SEGS);
> + ia->ri_max_frmr_depth);
> if (IS_ERR(r->r.frmr.fr_mr)) {
> rc = PTR_ERR(r->r.frmr.fr_mr);
> dprintk("RPC: %s: ib_alloc_fast_reg_mr"
> " failed %i\n", __func__, rc);
> goto out;
> }
> - r->r.frmr.fr_pgl =
> - ib_alloc_fast_reg_page_list(ia->ri_id->device,
> - RPCRDMA_MAX_SEGS);
> + r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
> + ia->ri_id->device,
> + ia->ri_max_frmr_depth);
> if (IS_ERR(r->r.frmr.fr_pgl)) {
> rc = PTR_ERR(r->r.frmr.fr_pgl);
> dprintk("RPC: %s: "
> @@ -1498,8 +1521,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
> seg1->mr_offset -= pageoff; /* start of page */
> seg1->mr_len += pageoff;
> len = -pageoff;
> - if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
> - *nsegs = RPCRDMA_MAX_DATA_SEGS;
> + if (*nsegs > ia->ri_max_frmr_depth)
> + *nsegs = ia->ri_max_frmr_depth;
> for (page_no = i = 0; i < *nsegs;) {
> rpcrdma_map_one(ia, seg, writing);
> pa = seg->mr_dma;
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index cc1445d..98340a3 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -66,6 +66,7 @@ struct rpcrdma_ia {
> struct completion ri_done;
> int ri_async_rc;
> enum rpcrdma_memreg ri_memreg_strategy;
> + unsigned int ri_max_frmr_depth;
> };
>
> /*
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2014-04-14 14:31 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-12 14:40 [PATCH V2] xprtrdma: mind the device's max fast register page list depth Steve Wise
2014-04-12 14:40 ` Steve Wise
[not found] ` <20140412144008.4553.33418.stgit-T4OLL4TyM9aNDNWfRnPdfg@public.gmane.org>
2014-04-14 14:31 ` Chuck Lever
2014-04-14 14:31 ` Chuck Lever
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.