From: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org> To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org Subject: [PATCH v2 18/20] xprtrdma: Allocate RPC/RDMA receive buffer separately from struct rpcrdma_rep Date: Tue, 13 Jan 2015 11:27:12 -0500 [thread overview] Message-ID: <20150113162712.14086.19421.stgit@manet.1015granger.net> (raw) In-Reply-To: <20150113161440.14086.24801.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org> The rr_base field is the buffer where each RPC/RDMA reply header lands. In some cases the RPC reply header also lands in this buffer, just after the RPC/RDMA header. The pre-posted receive buffers are supposed to be the same size on the client and server. For Solaris and Linux, that size is supposed to be 1024 bytes, the inline threshold. The size of the rr_base buffer is currently dependent on RPCRDMA_MAX_DATA_SEGS. When the server constructs a chunk list in the RPC/RDMA header, each segment in the list takes up a little room in the buffer. If we want a large r/wsize maximum, MAX_SEGS will grow significantly, but notice that the inline threshold size won't change. Therefore the inline size is the real limit on the size of the RPC/RDMA header. The largest RPC reply the client can receive via RDMA SEND is also no bigger than the inline size. Thus the size of the pre-posted receive buffer should be exactly the inline size * 2. The MAX_RPCRDMAHDR term should be replaced, and rounding up ( 1 << fls(yada) ) is not necessary. RPC replies received via RDMA WRITE (long replies) are caught in rq_rcv_buf, which is the second half of the RPC send buffer. Ie, such replies are not involved in any way with rr_base. Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org> --- net/sunrpc/xprtrdma/rpc_rdma.c | 5 +++-- net/sunrpc/xprtrdma/verbs.c | 27 ++++++++++++++------------- net/sunrpc/xprtrdma/xprt_rdma.h | 14 ++++++-------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index c1d4a09..02efcaa 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -572,6 +572,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b { unsigned int i, total_len; struct rpcrdma_write_chunk *cur_wchunk; + char *base = (char *)rdmab_to_msg(rep->rr_rdmabuf); i = be32_to_cpu(**iptrp); if (i > max) @@ -599,7 +600,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b return -1; cur_wchunk = (struct rpcrdma_write_chunk *) w; } - if ((char *) cur_wchunk > rep->rr_base + rep->rr_len) + if ((char *)cur_wchunk > base + rep->rr_len) return -1; *iptrp = (__be32 *) cur_wchunk; @@ -753,7 +754,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) dprintk("RPC: %s: short/invalid reply\n", __func__); goto repost; } - headerp = (struct rpcrdma_msg *) rep->rr_base; + headerp = rdmab_to_msg(rep->rr_rdmabuf); if (headerp->rm_vers != rpcrdma_version) { dprintk("RPC: %s: invalid version %d\n", __func__, be32_to_cpu(headerp->rm_vers)); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index c81749b..7aac422 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -298,8 +298,9 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) rep->rr_len = wc->byte_len; ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, - rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); - prefetch(rep->rr_base); + rdmab_addr(rep->rr_rdmabuf), + rep->rr_len, DMA_FROM_DEVICE); + prefetch(rdmab_to_msg(rep->rr_rdmabuf)); out_schedule: list_add_tail(&rep->rr_list, sched_list); @@ -1092,23 +1093,21 @@ static struct rpcrdma_rep * rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; - size_t rlen = 1 << fls(cdata->inline_rsize + - sizeof(struct rpcrdma_rep)); + size_t rlen = cdata->inline_rsize << 1; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_rep *rep; int rc; rc = -ENOMEM; - rep = kmalloc(rlen, GFP_KERNEL); + rep = kzalloc(sizeof(*rep), GFP_KERNEL); if (rep == NULL) goto out; - memset(rep, 0, sizeof(*rep)); - rc = rpcrdma_register_internal(ia, rep->rr_base, rlen - - offsetof(struct rpcrdma_rep, rr_base), - &rep->rr_handle, &rep->rr_iov); - if (rc) + rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, rlen, GFP_KERNEL); + if (IS_ERR(rep->rr_rdmabuf)) { + rc = PTR_ERR(rep->rr_rdmabuf); goto out_free; + } rep->rr_buffer = &r_xprt->rx_buf; return rep; @@ -1306,7 +1305,7 @@ rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep) if (!rep) return; - rpcrdma_deregister_internal(ia, rep->rr_handle, &rep->rr_iov); + rpcrdma_free_regbuf(ia, rep->rr_rdmabuf); kfree(rep); } @@ -2209,11 +2208,13 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, recv_wr.next = NULL; recv_wr.wr_id = (u64) (unsigned long) rep; - recv_wr.sg_list = &rep->rr_iov; + recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; recv_wr.num_sge = 1; ib_dma_sync_single_for_cpu(ia->ri_id->device, - rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); + rdmab_addr(rep->rr_rdmabuf), + rdmab_length(rep->rr_rdmabuf), + DMA_BIDIRECTIONAL); rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 84ad863..2b69316 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -180,14 +180,12 @@ enum rpcrdma_chunktype { struct rpcrdma_buffer; struct rpcrdma_rep { - unsigned int rr_len; /* actual received reply length */ - struct rpcrdma_buffer *rr_buffer; /* home base for this structure */ - struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ - void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ - struct list_head rr_list; /* tasklet list */ - struct ib_sge rr_iov; /* for posting */ - struct ib_mr *rr_handle; /* handle for mem in rr_iov */ - char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ + unsigned int rr_len; + struct rpcrdma_buffer *rr_buffer; + struct rpc_xprt *rr_xprt; + void (*rr_func)(struct rpcrdma_rep *); + struct list_head rr_list; + struct rpcrdma_regbuf *rr_rdmabuf; }; /* -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html
WARNING: multiple messages have this Message-ID (diff)
From: Chuck Lever <chuck.lever@oracle.com> To: linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org Subject: [PATCH v2 18/20] xprtrdma: Allocate RPC/RDMA receive buffer separately from struct rpcrdma_rep Date: Tue, 13 Jan 2015 11:27:12 -0500 [thread overview] Message-ID: <20150113162712.14086.19421.stgit@manet.1015granger.net> (raw) In-Reply-To: <20150113161440.14086.24801.stgit@manet.1015granger.net> The rr_base field is the buffer where each RPC/RDMA reply header lands. In some cases the RPC reply header also lands in this buffer, just after the RPC/RDMA header. The pre-posted receive buffers are supposed to be the same size on the client and server. For Solaris and Linux, that size is supposed to be 1024 bytes, the inline threshold. The size of the rr_base buffer is currently dependent on RPCRDMA_MAX_DATA_SEGS. When the server constructs a chunk list in the RPC/RDMA header, each segment in the list takes up a little room in the buffer. If we want a large r/wsize maximum, MAX_SEGS will grow significantly, but notice that the inline threshold size won't change. Therefore the inline size is the real limit on the size of the RPC/RDMA header. The largest RPC reply the client can receive via RDMA SEND is also no bigger than the inline size. Thus the size of the pre-posted receive buffer should be exactly the inline size * 2. The MAX_RPCRDMAHDR term should be replaced, and rounding up ( 1 << fls(yada) ) is not necessary. RPC replies received via RDMA WRITE (long replies) are caught in rq_rcv_buf, which is the second half of the RPC send buffer. Ie, such replies are not involved in any way with rr_base. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> --- net/sunrpc/xprtrdma/rpc_rdma.c | 5 +++-- net/sunrpc/xprtrdma/verbs.c | 27 ++++++++++++++------------- net/sunrpc/xprtrdma/xprt_rdma.h | 14 ++++++-------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index c1d4a09..02efcaa 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -572,6 +572,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b { unsigned int i, total_len; struct rpcrdma_write_chunk *cur_wchunk; + char *base = (char *)rdmab_to_msg(rep->rr_rdmabuf); i = be32_to_cpu(**iptrp); if (i > max) @@ -599,7 +600,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b return -1; cur_wchunk = (struct rpcrdma_write_chunk *) w; } - if ((char *) cur_wchunk > rep->rr_base + rep->rr_len) + if ((char *)cur_wchunk > base + rep->rr_len) return -1; *iptrp = (__be32 *) cur_wchunk; @@ -753,7 +754,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) dprintk("RPC: %s: short/invalid reply\n", __func__); goto repost; } - headerp = (struct rpcrdma_msg *) rep->rr_base; + headerp = rdmab_to_msg(rep->rr_rdmabuf); if (headerp->rm_vers != rpcrdma_version) { dprintk("RPC: %s: invalid version %d\n", __func__, be32_to_cpu(headerp->rm_vers)); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index c81749b..7aac422 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -298,8 +298,9 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) rep->rr_len = wc->byte_len; ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, - rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); - prefetch(rep->rr_base); + rdmab_addr(rep->rr_rdmabuf), + rep->rr_len, DMA_FROM_DEVICE); + prefetch(rdmab_to_msg(rep->rr_rdmabuf)); out_schedule: list_add_tail(&rep->rr_list, sched_list); @@ -1092,23 +1093,21 @@ static struct rpcrdma_rep * rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; - size_t rlen = 1 << fls(cdata->inline_rsize + - sizeof(struct rpcrdma_rep)); + size_t rlen = cdata->inline_rsize << 1; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_rep *rep; int rc; rc = -ENOMEM; - rep = kmalloc(rlen, GFP_KERNEL); + rep = kzalloc(sizeof(*rep), GFP_KERNEL); if (rep == NULL) goto out; - memset(rep, 0, sizeof(*rep)); - rc = rpcrdma_register_internal(ia, rep->rr_base, rlen - - offsetof(struct rpcrdma_rep, rr_base), - &rep->rr_handle, &rep->rr_iov); - if (rc) + rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, rlen, GFP_KERNEL); + if (IS_ERR(rep->rr_rdmabuf)) { + rc = PTR_ERR(rep->rr_rdmabuf); goto out_free; + } rep->rr_buffer = &r_xprt->rx_buf; return rep; @@ -1306,7 +1305,7 @@ rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep) if (!rep) return; - rpcrdma_deregister_internal(ia, rep->rr_handle, &rep->rr_iov); + rpcrdma_free_regbuf(ia, rep->rr_rdmabuf); kfree(rep); } @@ -2209,11 +2208,13 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, recv_wr.next = NULL; recv_wr.wr_id = (u64) (unsigned long) rep; - recv_wr.sg_list = &rep->rr_iov; + recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; recv_wr.num_sge = 1; ib_dma_sync_single_for_cpu(ia->ri_id->device, - rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); + rdmab_addr(rep->rr_rdmabuf), + rdmab_length(rep->rr_rdmabuf), + DMA_BIDIRECTIONAL); rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 84ad863..2b69316 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -180,14 +180,12 @@ enum rpcrdma_chunktype { struct rpcrdma_buffer; struct rpcrdma_rep { - unsigned int rr_len; /* actual received reply length */ - struct rpcrdma_buffer *rr_buffer; /* home base for this structure */ - struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ - void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ - struct list_head rr_list; /* tasklet list */ - struct ib_sge rr_iov; /* for posting */ - struct ib_mr *rr_handle; /* handle for mem in rr_iov */ - char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ + unsigned int rr_len; + struct rpcrdma_buffer *rr_buffer; + struct rpc_xprt *rr_xprt; + void (*rr_func)(struct rpcrdma_rep *); + struct list_head rr_list; + struct rpcrdma_regbuf *rr_rdmabuf; }; /*
next prev parent reply other threads:[~2015-01-13 16:27 UTC|newest] Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top 2015-01-13 16:24 [PATCH v2 00/20] NFS/RDMA client for 3.20 Chuck Lever 2015-01-13 16:24 ` Chuck Lever [not found] ` <20150113161440.14086.24801.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org> 2015-01-13 16:24 ` [PATCH v2 01/20] xprtrdma: human-readable completion status Chuck Lever 2015-01-13 16:24 ` Chuck Lever 2015-01-13 16:25 ` [PATCH v2 02/20] xprtrdma: Modernize htonl and ntohl Chuck Lever 2015-01-13 16:25 ` Chuck Lever [not found] ` <20150113162459.14086.38318.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org> 2015-01-16 18:33 ` Anna Schumaker 2015-01-16 18:33 ` Anna Schumaker [not found] ` <54B95965.3080806-ZwjVKphTwtPQT0dZR+AlfA@public.gmane.org> 2015-01-16 18:56 ` Chuck Lever 2015-01-16 18:56 ` Chuck Lever [not found] ` <D386EBD7-A74F-49ED-BBEE-B8B686CA96A1-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org> 2015-01-16 19:01 ` Anna Schumaker 2015-01-16 19:01 ` Anna Schumaker 2015-01-13 16:25 ` [PATCH v2 03/20] xprtrdma: Display XIDs in host byte order Chuck Lever 2015-01-13 16:25 ` Chuck Lever 2015-01-13 16:25 ` [PATCH v2 04/20] xprtrdma: Clean up hdrlen Chuck Lever 2015-01-13 16:25 ` Chuck Lever 2015-01-13 16:25 ` [PATCH v2 05/20] xprtrdma: Rename "xprt" and "rdma_connect" fields in struct rpcrdma_xprt Chuck Lever 2015-01-13 16:25 ` Chuck Lever 2015-01-13 16:25 ` [PATCH v2 06/20] xprtrdma: Remove rpcrdma_ep::rep_ia Chuck Lever 2015-01-13 16:25 ` Chuck Lever 2015-01-13 16:25 ` [PATCH v2 07/20] xprtrdma: Remove rl_mr field, and the mr_chunk union Chuck Lever 2015-01-13 16:25 ` Chuck Lever 2015-01-13 16:25 ` [PATCH v2 08/20] xprtrdma: Move credit update to RPC reply handler Chuck Lever 2015-01-13 16:25 ` Chuck Lever 2015-01-13 16:25 ` [PATCH v2 09/20] xprtrdma: Remove rpcrdma_ep::rep_func and ::rep_xprt Chuck Lever 2015-01-13 16:25 ` Chuck Lever 2015-01-13 16:26 ` [PATCH v2 10/20] xprtrdma: Free the pd if ib_query_qp() fails Chuck Lever 2015-01-13 16:26 ` Chuck Lever 2015-01-13 16:26 ` [PATCH v2 11/20] xprtrdma: Take struct ib_device_attr off the stack Chuck Lever 2015-01-13 16:26 ` Chuck Lever 2015-01-13 16:26 ` [PATCH v2 12/20] xprtrdma: Take struct ib_qp_attr and ib_qp_init_attr " Chuck Lever 2015-01-13 16:26 ` Chuck Lever 2015-01-13 16:26 ` [PATCH v2 13/20] xprtrdma: Simplify synopsis of rpcrdma_buffer_create() Chuck Lever 2015-01-13 16:26 ` Chuck Lever 2015-01-13 16:26 ` [PATCH v2 14/20] xprtrdma: Refactor rpcrdma_buffer_create() and rpcrdma_buffer_destroy() Chuck Lever 2015-01-13 16:26 ` Chuck Lever 2015-01-13 16:26 ` [PATCH v2 15/20] xprtrdma: Add struct rpcrdma_regbuf and helpers Chuck Lever 2015-01-13 16:26 ` Chuck Lever 2015-01-13 16:26 ` [PATCH v2 16/20] xprtrdma: Allocate RPC send buffer separately from struct rpcrdma_req Chuck Lever 2015-01-13 16:26 ` Chuck Lever 2015-01-13 16:27 ` [PATCH v2 17/20] xprtrdma: Allocate RPC/RDMA " Chuck Lever 2015-01-13 16:27 ` Chuck Lever 2015-01-13 16:27 ` Chuck Lever [this message] 2015-01-13 16:27 ` [PATCH v2 18/20] xprtrdma: Allocate RPC/RDMA receive buffer separately from struct rpcrdma_rep Chuck Lever 2015-01-13 16:27 ` [PATCH v2 19/20] xprtrdma: Allocate zero pad separately from rpcrdma_buffer Chuck Lever 2015-01-13 16:27 ` Chuck Lever 2015-01-13 16:27 ` [PATCH v2 20/20] xprtrdma: Clean up after adding regbuf management Chuck Lever 2015-01-13 16:27 ` Chuck Lever 2015-01-13 17:44 ` [PATCH v2 00/20] NFS/RDMA client for 3.20 Steve Wise 2015-01-13 17:44 ` Steve Wise 2015-01-16 21:02 ` Anna Schumaker 2015-01-16 21:02 ` Anna Schumaker [not found] ` <54B97C73.9000602-ZwjVKphTwtPQT0dZR+AlfA@public.gmane.org> 2015-01-16 21:04 ` Chuck Lever 2015-01-16 21:04 ` Chuck Lever
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20150113162712.14086.19421.stgit@manet.1015granger.net \ --to=chuck.lever-qhclzuegtsvqt0dzr+alfa@public.gmane.org \ --cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \ --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.