All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chuck Lever <chuck.lever@oracle.com>
To: linux-nfs@vger.kernel.org, linux-rdma@vger.kernel.org
Subject: [PATCH 14/20] svcrdma: Support multiple Write chunks in svc_rdma_map_reply_msg()
Date: Mon, 26 Oct 2020 14:55:07 -0400	[thread overview]
Message-ID: <160373850716.1886.4046562444210479073.stgit@klimt.1015granger.net> (raw)
In-Reply-To: <160373843299.1886.12604782813896379719.stgit@klimt.1015granger.net>

Refactor: svc_rdma_map_reply_msg() is restructured to DMA map only
the parts of rq_res that do not contain a result payload.

This change has been tested to confirm that it does not cause a
regression in the no Write chunk and single Write chunk cases.
Multiple Write chunks have not been tested.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h       |    2 
 include/trace/events/rpcrdma.h        |    1 
 net/sunrpc/xprtrdma/svc_rdma_sendto.c |  174 +++++++++++++++++++--------------
 3 files changed, 100 insertions(+), 77 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 7090af1a9791..e09fafba00d7 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -213,7 +213,7 @@ extern int svc_rdma_send(struct svcxprt_rdma *rdma,
 extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
 				  struct svc_rdma_send_ctxt *sctxt,
 				  const struct svc_rdma_recv_ctxt *rctxt,
-				  struct xdr_buf *xdr);
+				  const struct xdr_buf *xdr);
 extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
 				    struct svc_rdma_send_ctxt *sctxt,
 				    struct svc_rdma_recv_ctxt *rctxt,
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index afc58accb9cf..054dedd0280c 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -1687,6 +1687,7 @@ DECLARE_EVENT_CLASS(svcrdma_dma_map_class,
 				TP_ARGS(rdma, dma_addr, length))
 
 DEFINE_SVC_DMA_EVENT(dma_map_page);
+DEFINE_SVC_DMA_EVENT(dma_map_err);
 DEFINE_SVC_DMA_EVENT(dma_unmap_page);
 
 TRACE_EVENT(svcrdma_dma_map_rw_err,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b21beaa0114e..7d35bd6224ea 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -496,39 +496,111 @@ svc_rdma_encode_reply_chunk(struct svc_rdma_recv_ctxt *rctxt,
 	return svc_rdma_encode_write_chunk(sctxt, chunk);
 }
 
-static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
-				 struct svc_rdma_send_ctxt *ctxt,
-				 struct page *page,
-				 unsigned long offset,
-				 unsigned int len)
+struct svc_rdma_map_data {
+	struct svcxprt_rdma		*md_rdma;
+	struct svc_rdma_send_ctxt	*md_ctxt;
+};
+
+/**
+ * svc_rdma_page_dma_map - DMA map one page
+ * @data: pointer to arguments
+ * @page: struct page to DMA map
+ * @offset: offset into the page
+ * @len: number of bytes to map
+ *
+ * Returns:
+ *   %0 if DMA mapping was successful
+ *   %-EIO if the page cannot be DMA mapped
+ */
+static int svc_rdma_page_dma_map(void *data, struct page *page,
+				 unsigned long offset, unsigned int len)
 {
+	struct svc_rdma_map_data *args = data;
+	struct svcxprt_rdma *rdma = args->md_rdma;
+	struct svc_rdma_send_ctxt *ctxt = args->md_ctxt;
 	struct ib_device *dev = rdma->sc_cm_id->device;
 	dma_addr_t dma_addr;
 
+	++ctxt->sc_cur_sge_no;
+
 	dma_addr = ib_dma_map_page(dev, page, offset, len, DMA_TO_DEVICE);
-	trace_svcrdma_dma_map_page(rdma, dma_addr, len);
 	if (ib_dma_mapping_error(dev, dma_addr))
 		goto out_maperr;
 
+	trace_svcrdma_dma_map_page(rdma, dma_addr, len);
 	ctxt->sc_sges[ctxt->sc_cur_sge_no].addr = dma_addr;
 	ctxt->sc_sges[ctxt->sc_cur_sge_no].length = len;
 	ctxt->sc_send_wr.num_sge++;
 	return 0;
 
 out_maperr:
+	trace_svcrdma_dma_map_err(rdma, dma_addr, len);
 	return -EIO;
 }
 
-/* ib_dma_map_page() is used here because svc_rdma_dma_unmap()
+/**
+ * svc_rdma_iov_dma_map - DMA map an iovec
+ * @data: pointer to arguments
+ * @iov: kvec to DMA map
+ *
+ * ib_dma_map_page() is used here because svc_rdma_dma_unmap()
  * handles DMA-unmap and it uses ib_dma_unmap_page() exclusively.
+ *
+ * Returns:
+ *   %0 if DMA mapping was successful
+ *   %-EIO if the iovec cannot be DMA mapped
  */
-static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
-				struct svc_rdma_send_ctxt *ctxt,
-				unsigned char *base,
-				unsigned int len)
+static int svc_rdma_iov_dma_map(void *data, const struct kvec *iov)
 {
-	return svc_rdma_dma_map_page(rdma, ctxt, virt_to_page(base),
-				     offset_in_page(base), len);
+	if (!iov->iov_len)
+		return 0;
+	return svc_rdma_page_dma_map(data, virt_to_page(iov->iov_base),
+				     offset_in_page(iov->iov_base),
+				     iov->iov_len);
+}
+
+/**
+ * svc_rdma_xb_dma_map - DMA map all segments of an xdr_buf
+ * @xdr: xdr_buf containing portion of an RPC message to transmit
+ * @data: pointer to arguments
+ *
+ * Returns:
+ *   %0 if DMA mapping was successful
+ *   %-EIO if DMA mapping failed
+ *
+ * On failure, any DMA mappings that have been already done must be
+ * unmapped by the caller.
+ */
+static int svc_rdma_xb_dma_map(const struct xdr_buf *xdr, void *data)
+{
+	unsigned int len, remaining;
+	unsigned long pageoff;
+	struct page **ppages;
+	int ret;
+
+	ret = svc_rdma_iov_dma_map(data, &xdr->head[0]);
+	if (ret < 0)
+		return ret;
+
+	ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+	pageoff = offset_in_page(xdr->page_base);
+	remaining = xdr->page_len;
+	while (remaining) {
+		len = min_t(u32, PAGE_SIZE - pageoff, remaining);
+
+		ret = svc_rdma_page_dma_map(data, *ppages++, pageoff, len);
+		if (ret < 0)
+			return ret;
+
+		remaining -= len;
+		pageoff = 0;
+	}
+
+	ret = svc_rdma_iov_dma_map(data, &xdr->tail[0]);
+	if (ret < 0)
+		return ret;
+
+	return xdr->len;
 }
 
 struct svc_rdma_pullup_data {
@@ -688,22 +760,22 @@ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma,
  * @rctxt: Write and Reply chunks provided by client
  * @xdr: prepared xdr_buf containing RPC message
  *
- * Load the xdr_buf into the ctxt's sge array, and DMA map each
- * element as it is added. The Send WR's num_sge field is set.
+ * Returns:
+ *   %0 if DMA mapping was successful.
+ *   %-EMSGSIZE if a buffer manipulation problem occurred
+ *   %-EIO if DMA mapping failed
  *
- * Returns zero on success, or a negative errno on failure.
+ * The Send WR's num_sge field is set in all cases.
  */
 int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
 			   struct svc_rdma_send_ctxt *sctxt,
 			   const struct svc_rdma_recv_ctxt *rctxt,
-			   struct xdr_buf *xdr)
+			   const struct xdr_buf *xdr)
 {
-	unsigned int len, remaining;
-	unsigned long page_off;
-	struct page **ppages;
-	unsigned char *base;
-	u32 xdr_pad;
-	int ret;
+	struct svc_rdma_map_data args = {
+		.md_rdma	= rdma,
+		.md_ctxt	= sctxt,
+	};
 
 	/* Set up the (persistently-mapped) transport header SGE. */
 	sctxt->sc_send_wr.num_sge = 1;
@@ -712,7 +784,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
 	/* If there is a Reply chunk, nothing follows the transport
 	 * header, and we're done here.
 	 */
-	if (rctxt && rctxt->rc_reply_chunk)
+	if (!pcl_is_empty(&rctxt->rc_reply_pcl))
 		return 0;
 
 	/* For pull-up, svc_rdma_send() will sync the transport header.
@@ -721,58 +793,8 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
 	if (svc_rdma_pull_up_needed(rdma, sctxt, rctxt, xdr))
 		return svc_rdma_pull_up_reply_msg(rdma, sctxt, rctxt, xdr);
 
-	++sctxt->sc_cur_sge_no;
-	ret = svc_rdma_dma_map_buf(rdma, sctxt,
-				   xdr->head[0].iov_base,
-				   xdr->head[0].iov_len);
-	if (ret < 0)
-		return ret;
-
-	/* If a Write chunk is present, the xdr_buf's page list
-	 * is not included inline. However the Upper Layer may
-	 * have added XDR padding in the tail buffer, and that
-	 * should not be included inline.
-	 */
-	if (rctxt && rctxt->rc_write_list) {
-		base = xdr->tail[0].iov_base;
-		len = xdr->tail[0].iov_len;
-		xdr_pad = xdr_pad_size(xdr->page_len);
-
-		if (len && xdr_pad) {
-			base += xdr_pad;
-			len -= xdr_pad;
-		}
-
-		goto tail;
-	}
-
-	ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
-	page_off = xdr->page_base & ~PAGE_MASK;
-	remaining = xdr->page_len;
-	while (remaining) {
-		len = min_t(u32, PAGE_SIZE - page_off, remaining);
-
-		++sctxt->sc_cur_sge_no;
-		ret = svc_rdma_dma_map_page(rdma, sctxt, *ppages++,
-					    page_off, len);
-		if (ret < 0)
-			return ret;
-
-		remaining -= len;
-		page_off = 0;
-	}
-
-	base = xdr->tail[0].iov_base;
-	len = xdr->tail[0].iov_len;
-tail:
-	if (len) {
-		++sctxt->sc_cur_sge_no;
-		ret = svc_rdma_dma_map_buf(rdma, sctxt, base, len);
-		if (ret < 0)
-			return ret;
-	}
-
-	return 0;
+	return pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+				       svc_rdma_xb_dma_map, &args);
 }
 
 /* The svc_rqst and all resources it owns are released as soon as



  parent reply	other threads:[~2020-10-26 18:55 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-26 18:53 [PATCH 00/20] NFSD support for multiple RPC/RDMA chunks Chuck Lever
2020-10-26 18:53 ` [PATCH 01/20] SUNRPC: Adjust synopsis of xdr_buf_subsegment() Chuck Lever
2020-10-26 18:54 ` [PATCH 02/20] svcrdma: Const-ify the xdr_buf arguments Chuck Lever
2020-10-26 18:54 ` [PATCH 03/20] svcrdma: Refactor the RDMA Write path Chuck Lever
2020-10-26 18:54 ` [PATCH 04/20] SUNRPC: Rename svc_encode_read_payload() Chuck Lever
2020-10-27 20:53   ` J. Bruce Fields
2020-10-28 13:16     ` Chuck Lever
2020-10-26 18:54 ` [PATCH 05/20] NFSD: Invoke svc_encode_result_payload() in "read" NFSD encoders Chuck Lever
2020-10-26 18:54 ` [PATCH 06/20] svcrdma: Post RDMA Writes while XDR encoding replies Chuck Lever
2020-10-26 18:54 ` [PATCH 07/20] svcrdma: Clean up svc_rdma_encode_reply_chunk() Chuck Lever
2020-10-26 18:54 ` [PATCH 08/20] svcrdma: Add a "parsed chunk list" data structure Chuck Lever
2020-10-26 18:54 ` [PATCH 09/20] svcrdma: Use parsed chunk lists to derive the inv_rkey Chuck Lever
2020-10-26 18:54 ` [PATCH 10/20] svcrdma: Use parsed chunk lists to detect reverse direction replies Chuck Lever
2020-10-26 18:54 ` [PATCH 11/20] svcrdma: Use parsed chunk lists to construct RDMA Writes Chuck Lever
2020-10-26 18:54 ` [PATCH 12/20] svcrdma: Use parsed chunk lists to encode Reply transport headers Chuck Lever
2020-10-26 18:55 ` [PATCH 13/20] svcrdma: Support multiple write chunks when pulling up Chuck Lever
2020-10-26 18:55 ` Chuck Lever [this message]
2020-10-26 18:55 ` [PATCH 15/20] svcrdma: Support multiple Write chunks in svc_rdma_send_reply_chunk Chuck Lever
2020-10-26 18:55 ` [PATCH 16/20] svcrdma: Remove chunk list pointers Chuck Lever
2020-10-26 18:55 ` [PATCH 17/20] svcrdma: Clean up chunk tracepoints Chuck Lever
2020-10-26 18:55 ` [PATCH 18/20] svcrdma: Rename info::ri_chunklen Chuck Lever
2020-10-26 18:55 ` [PATCH 19/20] svcrdma: Use the new parsed chunk list when pulling Read chunks Chuck Lever
2020-10-26 18:55 ` [PATCH 20/20] svcrdma: support multiple Read chunks per RPC Chuck Lever
2020-10-27  6:08 ` [PATCH 00/20] NFSD support for multiple RPC/RDMA chunks Leon Romanovsky
2020-10-27 13:24   ` Chuck Lever
2020-10-27 17:25     ` J. Bruce Fields
2020-10-27 17:29       ` Chuck Lever
2020-10-28  7:16     ` Leon Romanovsky
2020-10-28 13:10       ` Chuck Lever

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=160373850716.1886.4046562444210479073.stgit@klimt.1015granger.net \
    --to=chuck.lever@oracle.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.