All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH v2 6/6] svcrdma: support Remote Invalidation for prototyping
Date: Mon, 22 Aug 2016 14:58:23 -0400	[thread overview]
Message-ID: <20160822185823.12076.5952.stgit@klimt.1015granger.net> (raw)
In-Reply-To: <20160822185459.12076.43516.stgit-Hs+gFlyCn65vLzlybtyyYzGyq/o6K9yX@public.gmane.org>

To allow testing, add a sysctl that enables the use of Send With
Invalidate in place of Send when transmitting RPC replies. The
invalidate_rkey is arbitrarily chosen from among rkeys present
in the RPC-over-RDMA header's chunk lists.

Send With Invalidate can be enabled when all client and server HCAs
support it, and the client does not send persistently registered
rkeys (like a local DMA rkey).

Send With Invalidate improves performance only when clients can
recognize, while processing an RPC reply, that an rkey has already
been invalidated. That is a separate change.

In the future, the RPC-over-RDMA protocol might support Remote
Invalidation properly. The protocol needs to enable signaling
between peers to indicate when Remote Invalidation can be used.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
Reviewed-by: Sagi Grimberg <sagi-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
---
 include/linux/sunrpc/svc_rdma.h          |    1 +
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    |   58 ++++++++++++++++++++++++++++--
 net/sunrpc/xprtrdma/svc_rdma_transport.c |   12 +++++-
 3 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 3584bc8..cc3ae16 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -137,6 +137,7 @@ struct svcxprt_rdma {
 	int		     sc_ord;		/* RDMA read limit */
 	int                  sc_max_sge;
 	int                  sc_max_sge_rd;	/* max sge for read target */
+	bool		     sc_snd_w_inv;	/* OK to use Send With Invalidate */
 
 	atomic_t             sc_sq_count;	/* Number of SQ WR on queue */
 	unsigned int	     sc_sq_depth;	/* Depth of SQ */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 3b95b19..f5a91ed 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -225,6 +225,48 @@ svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
 	return rp_ary;
 }
 
+/* RPC-over-RDMA Version One private extension: Remote Invalidation.
+ * Responder's choice: requester signals it can handle Send With
+ * Invalidate, and responder chooses one rkey to invalidate.
+ *
+ * Find a candidate rkey to invalidate when sending a reply.  Picks the
+ * first rkey it finds in the chunks lists.
+ *
+ * Returns zero if RPC's chunk lists are empty.
+ */
+static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
+				 struct rpcrdma_write_array *wr_ary,
+				 struct rpcrdma_write_array *rp_ary)
+{
+	struct rpcrdma_read_chunk *rd_ary;
+	struct rpcrdma_segment *arg_ch;
+	u32 inv_rkey;
+
+	inv_rkey = 0;
+
+	rd_ary = svc_rdma_get_read_chunk(rdma_argp);
+	if (rd_ary) {
+		inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle);
+		goto out;
+	}
+
+	if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) {
+		arg_ch = &wr_ary->wc_array[0].wc_target;
+		inv_rkey = be32_to_cpu(arg_ch->rs_handle);
+		goto out;
+	}
+
+	if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) {
+		arg_ch = &rp_ary->wc_array[0].wc_target;
+		inv_rkey = be32_to_cpu(arg_ch->rs_handle);
+		goto out;
+	}
+
+out:
+	dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey);
+	return inv_rkey;
+}
+
 /* Assumptions:
  * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
  */
@@ -464,7 +506,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		      struct page *page,
 		      struct rpcrdma_msg *rdma_resp,
 		      struct svc_rdma_req_map *vec,
-		      int byte_count)
+		      int byte_count,
+		      u32 inv_rkey)
 {
 	struct svc_rdma_op_ctxt *ctxt;
 	struct ib_send_wr send_wr;
@@ -535,7 +578,11 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	send_wr.wr_cqe = &ctxt->cqe;
 	send_wr.sg_list = ctxt->sge;
 	send_wr.num_sge = sge_no;
-	send_wr.opcode = IB_WR_SEND;
+	if (inv_rkey) {
+		send_wr.opcode = IB_WR_SEND_WITH_INV;
+		send_wr.ex.invalidate_rkey = inv_rkey;
+	} else
+		send_wr.opcode = IB_WR_SEND;
 	send_wr.send_flags =  IB_SEND_SIGNALED;
 
 	ret = svc_rdma_send(rdma, &send_wr);
@@ -567,6 +614,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	int inline_bytes;
 	struct page *res_page;
 	struct svc_rdma_req_map *vec;
+	u32 inv_rkey;
 
 	dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
 
@@ -577,6 +625,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	wr_ary = svc_rdma_get_write_array(rdma_argp);
 	rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
 
+	inv_rkey = 0;
+	if (rdma->sc_snd_w_inv)
+		inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary);
+
 	/* Build an req vec for the XDR */
 	vec = svc_rdma_get_req_map(rdma);
 	ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
@@ -619,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 		goto err1;
 
 	ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec,
-			 inline_bytes);
+			 inline_bytes, inv_rkey);
 	if (ret < 0)
 		goto err0;
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index f51e98a..b2464fc 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -657,9 +657,14 @@ svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
 	if (pmsg &&
 	    pmsg->cp_magic == rpcrdma_cmp_magic &&
 	    pmsg->cp_version == RPCRDMA_CMP_VERSION) {
-		dprintk("svcrdma: client send_size %u, recv_size %u\n",
+		newxprt->sc_snd_w_inv = pmsg->cp_flags &
+					RPCRDMA_CMP_F_SND_W_INV_OK;
+
+		dprintk("svcrdma: client send_size %u, recv_size %u "
+			"remote inv %ssupported\n",
 			rpcrdma_decode_buffer_size(pmsg->cp_send_size),
-			rpcrdma_decode_buffer_size(pmsg->cp_recv_size));
+			rpcrdma_decode_buffer_size(pmsg->cp_recv_size),
+			newxprt->sc_snd_w_inv ? "" : "un");
 	}
 }
 
@@ -1093,7 +1098,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 			dev->attrs.max_fast_reg_page_list_len;
 		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
 		newxprt->sc_reader = rdma_read_chunk_frmr;
-	}
+	} else
+		newxprt->sc_snd_w_inv = false;
 
 	/*
 	 * Determine if a DMA MR is required and if so, what privs are required

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

WARNING: multiple messages have this Message-ID (diff)
From: Chuck Lever <chuck.lever@oracle.com>
To: linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org
Subject: [PATCH v2 6/6] svcrdma: support Remote Invalidation for prototyping
Date: Mon, 22 Aug 2016 14:58:23 -0400	[thread overview]
Message-ID: <20160822185823.12076.5952.stgit@klimt.1015granger.net> (raw)
In-Reply-To: <20160822185459.12076.43516.stgit@klimt.1015granger.net>

To allow testing, add a sysctl that enables the use of Send With
Invalidate in place of Send when transmitting RPC replies. The
invalidate_rkey is arbitrarily chosen from among rkeys present
in the RPC-over-RDMA header's chunk lists.

Send With Invalidate can be enabled when all client and server HCAs
support it, and the client does not send persistently registered
rkeys (like a local DMA rkey).

Send With Invalidate improves performance only when clients can
recognize, while processing an RPC reply, that an rkey has already
been invalidated. That is a separate change.

In the future, the RPC-over-RDMA protocol might support Remote
Invalidation properly. The protocol needs to enable signaling
between peers to indicate when Remote Invalidation can be used.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 include/linux/sunrpc/svc_rdma.h          |    1 +
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    |   58 ++++++++++++++++++++++++++++--
 net/sunrpc/xprtrdma/svc_rdma_transport.c |   12 +++++-
 3 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 3584bc8..cc3ae16 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -137,6 +137,7 @@ struct svcxprt_rdma {
 	int		     sc_ord;		/* RDMA read limit */
 	int                  sc_max_sge;
 	int                  sc_max_sge_rd;	/* max sge for read target */
+	bool		     sc_snd_w_inv;	/* OK to use Send With Invalidate */
 
 	atomic_t             sc_sq_count;	/* Number of SQ WR on queue */
 	unsigned int	     sc_sq_depth;	/* Depth of SQ */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 3b95b19..f5a91ed 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -225,6 +225,48 @@ svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
 	return rp_ary;
 }
 
+/* RPC-over-RDMA Version One private extension: Remote Invalidation.
+ * Responder's choice: requester signals it can handle Send With
+ * Invalidate, and responder chooses one rkey to invalidate.
+ *
+ * Find a candidate rkey to invalidate when sending a reply.  Picks the
+ * first rkey it finds in the chunks lists.
+ *
+ * Returns zero if RPC's chunk lists are empty.
+ */
+static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
+				 struct rpcrdma_write_array *wr_ary,
+				 struct rpcrdma_write_array *rp_ary)
+{
+	struct rpcrdma_read_chunk *rd_ary;
+	struct rpcrdma_segment *arg_ch;
+	u32 inv_rkey;
+
+	inv_rkey = 0;
+
+	rd_ary = svc_rdma_get_read_chunk(rdma_argp);
+	if (rd_ary) {
+		inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle);
+		goto out;
+	}
+
+	if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) {
+		arg_ch = &wr_ary->wc_array[0].wc_target;
+		inv_rkey = be32_to_cpu(arg_ch->rs_handle);
+		goto out;
+	}
+
+	if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) {
+		arg_ch = &rp_ary->wc_array[0].wc_target;
+		inv_rkey = be32_to_cpu(arg_ch->rs_handle);
+		goto out;
+	}
+
+out:
+	dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey);
+	return inv_rkey;
+}
+
 /* Assumptions:
  * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
  */
@@ -464,7 +506,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		      struct page *page,
 		      struct rpcrdma_msg *rdma_resp,
 		      struct svc_rdma_req_map *vec,
-		      int byte_count)
+		      int byte_count,
+		      u32 inv_rkey)
 {
 	struct svc_rdma_op_ctxt *ctxt;
 	struct ib_send_wr send_wr;
@@ -535,7 +578,11 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	send_wr.wr_cqe = &ctxt->cqe;
 	send_wr.sg_list = ctxt->sge;
 	send_wr.num_sge = sge_no;
-	send_wr.opcode = IB_WR_SEND;
+	if (inv_rkey) {
+		send_wr.opcode = IB_WR_SEND_WITH_INV;
+		send_wr.ex.invalidate_rkey = inv_rkey;
+	} else
+		send_wr.opcode = IB_WR_SEND;
 	send_wr.send_flags =  IB_SEND_SIGNALED;
 
 	ret = svc_rdma_send(rdma, &send_wr);
@@ -567,6 +614,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	int inline_bytes;
 	struct page *res_page;
 	struct svc_rdma_req_map *vec;
+	u32 inv_rkey;
 
 	dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
 
@@ -577,6 +625,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	wr_ary = svc_rdma_get_write_array(rdma_argp);
 	rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
 
+	inv_rkey = 0;
+	if (rdma->sc_snd_w_inv)
+		inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary);
+
 	/* Build an req vec for the XDR */
 	vec = svc_rdma_get_req_map(rdma);
 	ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
@@ -619,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 		goto err1;
 
 	ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec,
-			 inline_bytes);
+			 inline_bytes, inv_rkey);
 	if (ret < 0)
 		goto err0;
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index f51e98a..b2464fc 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -657,9 +657,14 @@ svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
 	if (pmsg &&
 	    pmsg->cp_magic == rpcrdma_cmp_magic &&
 	    pmsg->cp_version == RPCRDMA_CMP_VERSION) {
-		dprintk("svcrdma: client send_size %u, recv_size %u\n",
+		newxprt->sc_snd_w_inv = pmsg->cp_flags &
+					RPCRDMA_CMP_F_SND_W_INV_OK;
+
+		dprintk("svcrdma: client send_size %u, recv_size %u "
+			"remote inv %ssupported\n",
 			rpcrdma_decode_buffer_size(pmsg->cp_send_size),
-			rpcrdma_decode_buffer_size(pmsg->cp_recv_size));
+			rpcrdma_decode_buffer_size(pmsg->cp_recv_size),
+			newxprt->sc_snd_w_inv ? "" : "un");
 	}
 }
 
@@ -1093,7 +1098,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 			dev->attrs.max_fast_reg_page_list_len;
 		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
 		newxprt->sc_reader = rdma_read_chunk_frmr;
-	}
+	} else
+		newxprt->sc_snd_w_inv = false;
 
 	/*
 	 * Determine if a DMA MR is required and if so, what privs are required


  parent reply	other threads:[~2016-08-22 18:58 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-22 18:57 [PATCH v2 0/6] server-side NFS/RDMA patches proposed for v4.9 Chuck Lever
2016-08-22 18:57 ` Chuck Lever
     [not found] ` <20160822185459.12076.43516.stgit-Hs+gFlyCn65vLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2016-08-22 18:57   ` [PATCH v2 1/6] SUNRPC: Silence WARN_ON when NFSv4.1 over RDMA is in use Chuck Lever
2016-08-22 18:57     ` Chuck Lever
     [not found]     ` <20160822185742.12076.34446.stgit-Hs+gFlyCn65vLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2016-08-23 17:39       ` Chuck Lever
2016-08-23 17:39         ` Chuck Lever
     [not found]         ` <3E0207D8-CFB2-4233-B92B-165B10E9920E-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2016-08-23 20:53           ` J. Bruce Fields
2016-08-23 20:53             ` J. Bruce Fields
     [not found]             ` <20160823205355.GA29452-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>
2016-08-23 21:05               ` Chuck Lever
2016-08-23 21:05                 ` Chuck Lever
     [not found]                 ` <AD25F201-01A4-4F4F-BAE7-9E984E12EE8D-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2016-08-23 22:21                   ` Trond Myklebust
2016-08-23 22:21                     ` Trond Myklebust
     [not found]                     ` <BB84506B-BC92-4325-A81C-4BC7D39F471C-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org>
2016-08-24 13:38                       ` Fields Bruce James
2016-08-24 13:38                         ` Fields Bruce James
2016-08-24 16:34                       ` Chuck Lever
2016-08-24 16:34                         ` Chuck Lever
2016-08-22 18:57   ` [PATCH v2 2/6] svcrdma: Tail iovec leaves an orphaned DMA mapping Chuck Lever
2016-08-22 18:57     ` Chuck Lever
2016-08-22 18:57   ` [PATCH v2 3/6] svcrdma: Skip put_page() when send_reply() fails Chuck Lever
2016-08-22 18:57     ` Chuck Lever
2016-08-22 18:58   ` [PATCH v2 4/6] rpcrdma: RDMA/CM private message data structure Chuck Lever
2016-08-22 18:58     ` Chuck Lever
2016-08-22 18:58   ` [PATCH v2 5/6] svcrdma: Server-side support for rpcrdma_connect_private Chuck Lever
2016-08-22 18:58     ` Chuck Lever
2016-08-22 18:58   ` Chuck Lever [this message]
2016-08-22 18:58     ` [PATCH v2 6/6] svcrdma: support Remote Invalidation for prototyping Chuck Lever

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160822185823.12076.5952.stgit@klimt.1015granger.net \
    --to=chuck.lever-qhclzuegtsvqt0dzr+alfa@public.gmane.org \
    --cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.