All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 00/16] NFS/RDMA patches proposed for 4.1
@ 2015-03-13 21:26 Chuck Lever
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  0 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:26 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

This is a series of client-side patches for NFS/RDMA. In preparation
for increasing the transport credit limit and maximum rsize/wsize,
I've re-factored the memory registration logic into separate files,
invoked via a method API.

Two optimizations appear in this series:

The old code pre-allocated 64 MRs for every RPC, and attached 64 MRs
to each RPC before posting it. The new code attaches just enough MRs
to handle each RPC. When no data payload chunk is needed, no MRs are
attached to the RPC. For modern HCAs, only one MR is needed for NFS
read or write data payloads.

The final patch in the series splits the rb_lock in two in order to
reduce lock contention.

The series is also available in the nfs-rdma-for-4.1 topic branch at

 git://linux-nfs.org/projects/cel/cel-2.6.git

---

Chuck Lever (16):
      xprtrdma: Display IPv6 addresses and port numbers correctly
      xprtrdma: Perform a full marshal on retransmit
      xprtrdma: Add vector of ops for each memory registration strategy
      xprtrdma: Add a "max_payload" op for each memreg mode
      xprtrdma: Add a "register_external" op for each memreg mode
      xprtrdma: Add a "deregister_external" op for each memreg mode
      xprtrdma: Add "init MRs" memreg op
      xprtrdma: Add "reset MRs" memreg op
      xprtrdma: Add "destroy MRs" memreg op
      xprtrdma: Add "open" memreg op
      xprtrdma: Handle non-SEND completions via a callout
      xprtrdma: Acquire FMRs in rpcrdma_fmr_register_external()
      xprtrdma: Acquire MRs in rpcrdma_register_external()
      xprtrdma: Remove rpcrdma_ia::ri_memreg_strategy
      xprtrdma: Make rpcrdma_{un}map_one() into inline functions
      xprtrdma: Split rb_lock


 include/linux/sunrpc/xprtrdma.h    |    3 
 net/sunrpc/xprtrdma/Makefile       |    3 
 net/sunrpc/xprtrdma/fmr_ops.c      |  270 +++++++++++
 net/sunrpc/xprtrdma/frwr_ops.c     |  485 ++++++++++++++++++++
 net/sunrpc/xprtrdma/physical_ops.c |  110 +++++
 net/sunrpc/xprtrdma/rpc_rdma.c     |   82 ++-
 net/sunrpc/xprtrdma/transport.c    |   65 ++-
 net/sunrpc/xprtrdma/verbs.c        |  856 +++---------------------------------
 net/sunrpc/xprtrdma/xprt_rdma.h    |  108 ++++-
 9 files changed, 1096 insertions(+), 886 deletions(-)
 create mode 100644 net/sunrpc/xprtrdma/fmr_ops.c
 create mode 100644 net/sunrpc/xprtrdma/frwr_ops.c
 create mode 100644 net/sunrpc/xprtrdma/physical_ops.c

--
Chuck Lever
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [PATCH v1 01/16] xprtrdma: Display IPv6 addresses and port numbers correctly
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-13 21:26   ` Chuck Lever
       [not found]     ` <20150313212641.22783.93522.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:26   ` [PATCH v1 02/16] xprtrdma: Perform a full marshal on retransmit Chuck Lever
                     ` (14 subsequent siblings)
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:26 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/transport.c |   47 ++++++++++++++++++++++++++++++++-------
 net/sunrpc/xprtrdma/verbs.c     |   21 +++++++----------
 2 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 2e192ba..26a62e7 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -157,12 +157,47 @@ static struct ctl_table sunrpc_table[] = {
 static struct rpc_xprt_ops xprt_rdma_procs;	/* forward reference */
 
 static void
+xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+	char buf[20];
+
+	snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
+	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
+
+	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
+}
+
+static void
+xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
+{
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+	char buf[40];
+
+	snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
+	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
+
+	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma6";
+}
+
+static void
 xprt_rdma_format_addresses(struct rpc_xprt *xprt)
 {
 	struct sockaddr *sap = (struct sockaddr *)
 					&rpcx_to_rdmad(xprt).addr;
-	struct sockaddr_in *sin = (struct sockaddr_in *)sap;
-	char buf[64];
+	char buf[128];
+
+	switch (sap->sa_family) {
+	case AF_INET:
+		xprt_rdma_format_addresses4(xprt, sap);
+		break;
+	case AF_INET6:
+		xprt_rdma_format_addresses6(xprt, sap);
+		break;
+	default:
+		pr_err("rpcrdma: Unrecognized address family\n");
+		return;
+	}
 
 	(void)rpc_ntop(sap, buf, sizeof(buf));
 	xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
@@ -170,16 +205,10 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
 	snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
 	xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
 
-	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
-
-	snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
-	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
-
 	snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
 	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
 
-	/* netid */
-	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
+	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
 }
 
 static void
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 124676c..1aa55b7 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -50,6 +50,7 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/prefetch.h>
+#include <linux/sunrpc/addr.h>
 #include <asm/bitops.h>
 
 #include "xprt_rdma.h"
@@ -424,7 +425,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 	struct rpcrdma_ia *ia = &xprt->rx_ia;
 	struct rpcrdma_ep *ep = &xprt->rx_ep;
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-	struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
+	struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
 #endif
 	struct ib_qp_attr *attr = &ia->ri_qp_attr;
 	struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
@@ -480,9 +481,8 @@ connected:
 		wake_up_all(&ep->rep_connect_wait);
 		/*FALLTHROUGH*/
 	default:
-		dprintk("RPC:       %s: %pI4:%u (ep 0x%p): %s\n",
-			__func__, &addr->sin_addr.s_addr,
-			ntohs(addr->sin_port), ep,
+		dprintk("RPC:       %s: %pIS:%u (ep 0x%p): %s\n",
+			__func__, sap, rpc_get_port(sap), ep,
 			CONNECTION_MSG(event->event));
 		break;
 	}
@@ -491,19 +491,16 @@ connected:
 	if (connstate == 1) {
 		int ird = attr->max_dest_rd_atomic;
 		int tird = ep->rep_remote_cma.responder_resources;
-		printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
-			"on %s, memreg %d slots %d ird %d%s\n",
-			&addr->sin_addr.s_addr,
-			ntohs(addr->sin_port),
+
+		pr_info("rpcrdma: connection to %pIS:%u on %s, memreg %d slots %d ird %d%s\n",
+			sap, rpc_get_port(sap),
 			ia->ri_id->device->name,
 			ia->ri_memreg_strategy,
 			xprt->rx_buf.rb_max_requests,
 			ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
 	} else if (connstate < 0) {
-		printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
-			&addr->sin_addr.s_addr,
-			ntohs(addr->sin_port),
-			connstate);
+		pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
+			sap, rpc_get_port(sap), connstate);
 	}
 #endif
 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 02/16] xprtrdma: Perform a full marshal on retransmit
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:26   ` [PATCH v1 01/16] xprtrdma: Display IPv6 addresses and port numbers correctly Chuck Lever
@ 2015-03-13 21:26   ` Chuck Lever
       [not found]     ` <20150313212650.22783.28071.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:26   ` [PATCH v1 03/16] xprtrdma: Add vector of ops for each memory registration strategy Chuck Lever
                     ` (13 subsequent siblings)
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:26 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

Commit 6ab59945f292 ("xprtrdma: Update rkeys after transport
reconnect" added logic in the ->send_request path to update the
chunk list when an RPC/RDMA request is retransmitted.

Note that rpc_xdr_encode() resets and re-encodes the entire RPC
send buffer for each retransmit of an RPC. The RPC send buffer
is not preserved from the previous transmission of an RPC.

Revert 6ab59945f292, and instead, just force each request to be
fully marshaled every time through ->send_request. This should
preserve the fix from 6ab59945f292, while also performing pullup
during retransmits.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/rpc_rdma.c  |   71 ++++++++++++++++++---------------------
 net/sunrpc/xprtrdma/transport.c |    5 +--
 net/sunrpc/xprtrdma/xprt_rdma.h |   10 -----
 3 files changed, 34 insertions(+), 52 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 91ffde8..41456d9 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -53,6 +53,14 @@
 # define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
+enum rpcrdma_chunktype {
+	rpcrdma_noch = 0,
+	rpcrdma_readch,
+	rpcrdma_areadch,
+	rpcrdma_writech,
+	rpcrdma_replych
+};
+
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 static const char transfertypes[][12] = {
 	"pure inline",	/* no chunks */
@@ -284,28 +292,6 @@ out:
 }
 
 /*
- * Marshal chunks. This routine returns the header length
- * consumed by marshaling.
- *
- * Returns positive RPC/RDMA header size, or negative errno.
- */
-
-ssize_t
-rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result)
-{
-	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
-	struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf);
-
-	if (req->rl_rtype != rpcrdma_noch)
-		result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
-					       headerp, req->rl_rtype);
-	else if (req->rl_wtype != rpcrdma_noch)
-		result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf,
-					       headerp, req->rl_wtype);
-	return result;
-}
-
-/*
  * Copy write data inline.
  * This function is used for "small" requests. Data which is passed
  * to RPC via iovecs (or page list) is copied directly into the
@@ -397,6 +383,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	char *base;
 	size_t rpclen, padlen;
 	ssize_t hdrlen;
+	enum rpcrdma_chunktype rtype, wtype;
 	struct rpcrdma_msg *headerp;
 
 	/*
@@ -433,13 +420,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	 * into pages; otherwise use reply chunks.
 	 */
 	if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
-		req->rl_wtype = rpcrdma_noch;
+		wtype = rpcrdma_noch;
 	else if (rqst->rq_rcv_buf.page_len == 0)
-		req->rl_wtype = rpcrdma_replych;
+		wtype = rpcrdma_replych;
 	else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
-		req->rl_wtype = rpcrdma_writech;
+		wtype = rpcrdma_writech;
 	else
-		req->rl_wtype = rpcrdma_replych;
+		wtype = rpcrdma_replych;
 
 	/*
 	 * Chunks needed for arguments?
@@ -456,16 +443,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	 * TBD check NFSv4 setacl
 	 */
 	if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
-		req->rl_rtype = rpcrdma_noch;
+		rtype = rpcrdma_noch;
 	else if (rqst->rq_snd_buf.page_len == 0)
-		req->rl_rtype = rpcrdma_areadch;
+		rtype = rpcrdma_areadch;
 	else
-		req->rl_rtype = rpcrdma_readch;
+		rtype = rpcrdma_readch;
 
 	/* The following simplification is not true forever */
-	if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych)
-		req->rl_wtype = rpcrdma_noch;
-	if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) {
+	if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
+		wtype = rpcrdma_noch;
+	if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) {
 		dprintk("RPC:       %s: cannot marshal multiple chunk lists\n",
 			__func__);
 		return -EIO;
@@ -479,7 +466,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	 * When padding is in use and applies to the transfer, insert
 	 * it and change the message type.
 	 */
-	if (req->rl_rtype == rpcrdma_noch) {
+	if (rtype == rpcrdma_noch) {
 
 		padlen = rpcrdma_inline_pullup(rqst,
 						RPCRDMA_INLINE_PAD_VALUE(rqst));
@@ -494,7 +481,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 			headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
 			headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
 			hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
-			if (req->rl_wtype != rpcrdma_noch) {
+			if (wtype != rpcrdma_noch) {
 				dprintk("RPC:       %s: invalid chunk list\n",
 					__func__);
 				return -EIO;
@@ -515,18 +502,26 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 			 * on receive. Therefore, we request a reply chunk
 			 * for non-writes wherever feasible and efficient.
 			 */
-			if (req->rl_wtype == rpcrdma_noch)
-				req->rl_wtype = rpcrdma_replych;
+			if (wtype == rpcrdma_noch)
+				wtype = rpcrdma_replych;
 		}
 	}
 
-	hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen);
+	if (rtype != rpcrdma_noch) {
+		hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
+					       headerp, rtype);
+		wtype = rtype;	/* simplify dprintk */
+
+	} else if (wtype != rpcrdma_noch) {
+		hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf,
+					       headerp, wtype);
+	}
 	if (hdrlen < 0)
 		return hdrlen;
 
 	dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd padlen %zd"
 		" headerp 0x%p base 0x%p lkey 0x%x\n",
-		__func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen,
+		__func__, transfertypes[wtype], hdrlen, rpclen, padlen,
 		headerp, base, rdmab_lkey(req->rl_rdmabuf));
 
 	/*
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 26a62e7..271d306 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -608,10 +608,7 @@ xprt_rdma_send_request(struct rpc_task *task)
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 	int rc = 0;
 
-	if (req->rl_niovs == 0)
-		rc = rpcrdma_marshal_req(rqst);
-	else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL)
-		rc = rpcrdma_marshal_chunks(rqst, 0);
+	rc = rpcrdma_marshal_req(rqst);
 	if (rc < 0)
 		goto failed_marshal;
 
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 0a16fb6..c8afd83 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -143,14 +143,6 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
 	return (struct rpcrdma_msg *)rb->rg_base;
 }
 
-enum rpcrdma_chunktype {
-	rpcrdma_noch = 0,
-	rpcrdma_readch,
-	rpcrdma_areadch,
-	rpcrdma_writech,
-	rpcrdma_replych
-};
-
 /*
  * struct rpcrdma_rep -- this structure encapsulates state required to recv
  * and complete a reply, asychronously. It needs several pieces of
@@ -258,7 +250,6 @@ struct rpcrdma_req {
 	unsigned int	rl_niovs;	/* 0, 2 or 4 */
 	unsigned int	rl_nchunks;	/* non-zero if chunks */
 	unsigned int	rl_connect_cookie;	/* retry detection */
-	enum rpcrdma_chunktype	rl_rtype, rl_wtype;
 	struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
 	struct rpcrdma_rep	*rl_reply;/* holder for reply buffer */
 	struct ib_sge	rl_send_iov[4];	/* for active requests */
@@ -418,7 +409,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
 /*
  * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
  */
-ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t);
 int rpcrdma_marshal_req(struct rpc_rqst *);
 size_t rpcrdma_max_payload(struct rpcrdma_xprt *);
 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 03/16] xprtrdma: Add vector of ops for each memory registration strategy
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:26   ` [PATCH v1 01/16] xprtrdma: Display IPv6 addresses and port numbers correctly Chuck Lever
  2015-03-13 21:26   ` [PATCH v1 02/16] xprtrdma: Perform a full marshal on retransmit Chuck Lever
@ 2015-03-13 21:26   ` Chuck Lever
       [not found]     ` <20150313212659.22783.28341.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:27   ` [PATCH v1 04/16] xprtrdma: Add a "max_payload" op for each memreg mode Chuck Lever
                     ` (12 subsequent siblings)
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:26 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

Instead of employing switch() statements, let's use the typical
Linux kernel idiom for handling behavioral variation: virtual
functions.

Define a vector of operations for each supported memory registration
mode, and add a source file for each mode.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/Makefile       |    3 ++-
 net/sunrpc/xprtrdma/fmr_ops.c      |   22 ++++++++++++++++++++++
 net/sunrpc/xprtrdma/frwr_ops.c     |   22 ++++++++++++++++++++++
 net/sunrpc/xprtrdma/physical_ops.c |   24 ++++++++++++++++++++++++
 net/sunrpc/xprtrdma/verbs.c        |   11 +++++++----
 net/sunrpc/xprtrdma/xprt_rdma.h    |   12 ++++++++++++
 6 files changed, 89 insertions(+), 5 deletions(-)
 create mode 100644 net/sunrpc/xprtrdma/fmr_ops.c
 create mode 100644 net/sunrpc/xprtrdma/frwr_ops.c
 create mode 100644 net/sunrpc/xprtrdma/physical_ops.c

diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index da5136f..579f72b 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o
 
-xprtrdma-y := transport.o rpc_rdma.o verbs.o
+xprtrdma-y := transport.o rpc_rdma.o verbs.o \
+	fmr_ops.o frwr_ops.o physical_ops.o
 
 obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o
 
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
new file mode 100644
index 0000000..ffb7d93
--- /dev/null
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2015 Oracle.  All rights reserved.
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ */
+
+/* Lightweight memory registration using Fast Memory Regions (FMR).
+ * Referred to sometimes as MTHCAFMR mode.
+ *
+ * FMR uses synchronous memory registration and deregistration.
+ * FMR registration is known to be fast, but FMR deregistration
+ * can take tens of usecs to complete.
+ */
+
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY	RPCDBG_TRANS
+#endif
+
+const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
+	.ro_displayname			= "fmr",
+};
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
new file mode 100644
index 0000000..79173f9
--- /dev/null
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2015 Oracle.  All rights reserved.
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ */
+
+/* Lightweight memory registration using Fast Registration Work
+ * Requests (FRWR). Also referred to sometimes as FRMR mode.
+ *
+ * FRWR features ordered asynchronous registration and deregistration
+ * of arbitrarily sized memory regions. This is the fastest and safest
+ * but most complex memory registration mode.
+ */
+
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY	RPCDBG_TRANS
+#endif
+
+const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
+	.ro_displayname			= "frwr",
+};
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
new file mode 100644
index 0000000..b0922ac
--- /dev/null
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2015 Oracle.  All rights reserved.
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ */
+
+/* No-op chunk preparation. All client memory is pre-registered.
+ * Sometimes referred to as ALLPHYSICAL mode.
+ *
+ * Physical registration is simple because all client memory is
+ * pre-registered and never deregistered. This mode is good for
+ * adapter bring up, but is considered not safe: the server is
+ * trusted not to abuse its access to client memory not involved
+ * in RDMA I/O.
+ */
+
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY	RPCDBG_TRANS
+#endif
+
+const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
+	.ro_displayname			= "physical",
+};
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 1aa55b7..e4d9d9c 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -492,10 +492,10 @@ connected:
 		int ird = attr->max_dest_rd_atomic;
 		int tird = ep->rep_remote_cma.responder_resources;
 
-		pr_info("rpcrdma: connection to %pIS:%u on %s, memreg %d slots %d ird %d%s\n",
+		pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
 			sap, rpc_get_port(sap),
 			ia->ri_id->device->name,
-			ia->ri_memreg_strategy,
+			ia->ri_ops->ro_displayname,
 			xprt->rx_buf.rb_max_requests,
 			ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
 	} else if (connstate < 0) {
@@ -649,13 +649,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 	 */
 	switch (memreg) {
 	case RPCRDMA_FRMR:
+		ia->ri_ops = &rpcrdma_frwr_memreg_ops;
 		break;
 	case RPCRDMA_ALLPHYSICAL:
+		ia->ri_ops = &rpcrdma_physical_memreg_ops;
 		mem_priv = IB_ACCESS_LOCAL_WRITE |
 				IB_ACCESS_REMOTE_WRITE |
 				IB_ACCESS_REMOTE_READ;
 		goto register_setup;
 	case RPCRDMA_MTHCAFMR:
+		ia->ri_ops = &rpcrdma_fmr_memreg_ops;
 		if (ia->ri_have_dma_lkey)
 			break;
 		mem_priv = IB_ACCESS_LOCAL_WRITE;
@@ -675,8 +678,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 		rc = -ENOMEM;
 		goto out3;
 	}
-	dprintk("RPC:       %s: memory registration strategy is %d\n",
-		__func__, memreg);
+	dprintk("RPC:       %s: memory registration strategy is '%s'\n",
+		__func__, ia->ri_ops->ro_displayname);
 
 	/* Else will do memory reg/dereg for each chunk */
 	ia->ri_memreg_strategy = memreg;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c8afd83..ef3cf4a 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -60,6 +60,7 @@
  * Interface Adapter -- one per transport instance
  */
 struct rpcrdma_ia {
+	const struct rpcrdma_memreg_ops	*ri_ops;
 	rwlock_t		ri_qplock;
 	struct rdma_cm_id 	*ri_id;
 	struct ib_pd		*ri_pd;
@@ -331,6 +332,17 @@ struct rpcrdma_stats {
 };
 
 /*
+ * Per-registration mode operations
+ */
+struct rpcrdma_memreg_ops {
+	const char	*ro_displayname;
+};
+
+extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops;
+extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops;
+extern const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops;
+
+/*
  * RPCRDMA transport -- encapsulates the structures above for
  * integration with RPC.
  *

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 04/16] xprtrdma: Add a "max_payload" op for each memreg mode
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
                     ` (2 preceding siblings ...)
  2015-03-13 21:26   ` [PATCH v1 03/16] xprtrdma: Add vector of ops for each memory registration strategy Chuck Lever
@ 2015-03-13 21:27   ` Chuck Lever
       [not found]     ` <20150313212708.22783.70403.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:27   ` [PATCH v1 05/16] xprtrdma: Add a "register_external" " Chuck Lever
                     ` (11 subsequent siblings)
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:27 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

The max_payload computation is generalized to ensure that the
payload maximum is the lesser of RPC_MAX_DATA_SEGS and the number of
data segments that can be transmitted in an inline buffer.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/fmr_ops.c      |   13 ++++++++++
 net/sunrpc/xprtrdma/frwr_ops.c     |   13 ++++++++++
 net/sunrpc/xprtrdma/physical_ops.c |   10 +++++++
 net/sunrpc/xprtrdma/transport.c    |    5 +++-
 net/sunrpc/xprtrdma/verbs.c        |   49 +++++++++++-------------------------
 net/sunrpc/xprtrdma/xprt_rdma.h    |    5 +++-
 6 files changed, 59 insertions(+), 36 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index ffb7d93..eec2660 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -17,6 +17,19 @@
 # define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
+/* Maximum scatter/gather per FMR */
+#define RPCRDMA_MAX_FMR_SGES	(64)
+
+/* FMR mode conveys up to 64 pages of payload per chunk segment.
+ */
+static size_t
+fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
+{
+	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
+		     rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES);
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
+	.ro_maxpages			= fmr_op_maxpages,
 	.ro_displayname			= "fmr",
 };
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 79173f9..73a5ac8 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -17,6 +17,19 @@
 # define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
+/* FRWR mode conveys a list of pages per chunk segment. The
+ * maximum length of that list is the FRWR page list depth.
+ */
+static size_t
+frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
+{
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
+		     rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
+	.ro_maxpages			= frwr_op_maxpages,
 	.ro_displayname			= "frwr",
 };
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index b0922ac..28ade19 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -19,6 +19,16 @@
 # define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
+/* PHYSICAL memory registration conveys one page per chunk segment.
+ */
+static size_t
+physical_op_maxpages(struct rpcrdma_xprt *r_xprt)
+{
+	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
+		     rpcrdma_max_segments(r_xprt));
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
+	.ro_maxpages			= physical_op_maxpages,
 	.ro_displayname			= "physical",
 };
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 271d306..9a9da40 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -406,7 +406,10 @@ xprt_setup_rdma(struct xprt_create *args)
 			  xprt_rdma_connect_worker);
 
 	xprt_rdma_format_addresses(xprt);
-	xprt->max_payload = rpcrdma_max_payload(new_xprt);
+	xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
+	if (xprt->max_payload == 0)
+		goto out4;
+	xprt->max_payload <<= PAGE_SHIFT;
 	dprintk("RPC:       %s: transport data payload maximum: %zu bytes\n",
 		__func__, xprt->max_payload);
 
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index e4d9d9c..837d4ea 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -2215,43 +2215,24 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
 	return rc;
 }
 
-/* Physical mapping means one Read/Write list entry per-page.
- * All list entries must fit within an inline buffer
- *
- * NB: The server must return a Write list for NFS READ,
- *     which has the same constraint. Factor in the inline
- *     rsize as well.
+/* How many chunk list items fit within our inline buffers?
  */
-static size_t
-rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
+unsigned int
+rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
-	unsigned int inline_size, pages;
-
-	inline_size = min_t(unsigned int,
-			    cdata->inline_wsize, cdata->inline_rsize);
-	inline_size -= RPCRDMA_HDRLEN_MIN;
-	pages = inline_size / sizeof(struct rpcrdma_segment);
-	return pages << PAGE_SHIFT;
-}
+	int bytes, segments;
 
-static size_t
-rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
-{
-	return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
-}
-
-size_t
-rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
-{
-	size_t result;
-
-	switch (r_xprt->rx_ia.ri_memreg_strategy) {
-	case RPCRDMA_ALLPHYSICAL:
-		result = rpcrdma_physical_max_payload(r_xprt);
-		break;
-	default:
-		result = rpcrdma_mr_max_payload(r_xprt);
+	bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize);
+	bytes -= RPCRDMA_HDRLEN_MIN;
+	if (bytes < sizeof(struct rpcrdma_segment) * 2) {
+		pr_warn("RPC:       %s: inline threshold too small\n",
+			__func__);
+		return 0;
 	}
-	return result;
+
+	segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1);
+	dprintk("RPC:       %s: max chunk list size = %d segments\n",
+		__func__, segments);
+	return segments;
 }
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index ef3cf4a..59e627e 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -334,7 +334,9 @@ struct rpcrdma_stats {
 /*
  * Per-registration mode operations
  */
+struct rpcrdma_xprt;
 struct rpcrdma_memreg_ops {
+	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
 	const char	*ro_displayname;
 };
 
@@ -411,6 +413,8 @@ struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
 void rpcrdma_free_regbuf(struct rpcrdma_ia *,
 			 struct rpcrdma_regbuf *);
 
+unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
+
 /*
  * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
  */
@@ -422,7 +426,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
  * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
  */
 int rpcrdma_marshal_req(struct rpc_rqst *);
-size_t rpcrdma_max_payload(struct rpcrdma_xprt *);
 
 /* Temporary NFS request map cache. Created in svc_rdma.c  */
 extern struct kmem_cache *svc_rdma_map_cachep;

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 05/16] xprtrdma: Add a "register_external" op for each memreg mode
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
                     ` (3 preceding siblings ...)
  2015-03-13 21:27   ` [PATCH v1 04/16] xprtrdma: Add a "max_payload" op for each memreg mode Chuck Lever
@ 2015-03-13 21:27   ` Chuck Lever
       [not found]     ` <20150313212718.22783.10096.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:27   ` [PATCH v1 06/16] xprtrdma: Add a "deregister_external" " Chuck Lever
                     ` (10 subsequent siblings)
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:27 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

There is very little common processing among the different external
memory registration functions. Have rpcrdma_create_chunks() call
the registration method directly. This removes a stack frame and a
switch statement from the external registration path.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/fmr_ops.c      |   51 +++++++++++
 net/sunrpc/xprtrdma/frwr_ops.c     |   88 ++++++++++++++++++
 net/sunrpc/xprtrdma/physical_ops.c |   17 ++++
 net/sunrpc/xprtrdma/rpc_rdma.c     |    5 +
 net/sunrpc/xprtrdma/verbs.c        |  172 +-----------------------------------
 net/sunrpc/xprtrdma/xprt_rdma.h    |    6 +
 6 files changed, 166 insertions(+), 173 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index eec2660..45fb646 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -29,7 +29,58 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
 		     rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES);
 }
 
+/* Use the ib_map_phys_fmr() verb to register a memory region
+ * for remote access via RDMA READ or RDMA WRITE.
+ */
+static int
+fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+	   int nsegs, bool writing)
+{
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct rpcrdma_mr_seg *seg1 = seg;
+	struct rpcrdma_mw *mw = seg1->rl_mw;
+	u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
+	int len, pageoff, i, rc;
+
+	pageoff = offset_in_page(seg1->mr_offset);
+	seg1->mr_offset -= pageoff;	/* start of page */
+	seg1->mr_len += pageoff;
+	len = -pageoff;
+	if (nsegs > RPCRDMA_MAX_FMR_SGES)
+		nsegs = RPCRDMA_MAX_FMR_SGES;
+	for (i = 0; i < nsegs;) {
+		rpcrdma_map_one(ia, seg, writing);
+		physaddrs[i] = seg->mr_dma;
+		len += seg->mr_len;
+		++seg;
+		++i;
+		/* Check for holes */
+		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
+			break;
+	}
+
+	rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma);
+	if (rc)
+		goto out_maperr;
+
+	seg1->mr_rkey = mw->r.fmr->rkey;
+	seg1->mr_base = seg1->mr_dma + pageoff;
+	seg1->mr_nsegs = i;
+	seg1->mr_len = len;
+	return i;
+
+out_maperr:
+	dprintk("RPC:       %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
+		__func__, len, (unsigned long long)seg1->mr_dma,
+		pageoff, i, rc);
+	while (i--)
+		rpcrdma_unmap_one(ia, --seg);
+	return rc;
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
+	.ro_map				= fmr_op_map,
 	.ro_maxpages			= fmr_op_maxpages,
 	.ro_displayname			= "fmr",
 };
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 73a5ac8..2b5ccb0 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -29,7 +29,95 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
 		     rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
 }
 
+/* Post a FAST_REG Work Request to register a memory region
+ * for remote access via RDMA READ or RDMA WRITE.
+ */
+static int
+frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+	    int nsegs, bool writing)
+{
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct rpcrdma_mr_seg *seg1 = seg;
+	struct rpcrdma_mw *mw = seg1->rl_mw;
+	struct rpcrdma_frmr *frmr = &mw->r.frmr;
+	struct ib_mr *mr = frmr->fr_mr;
+	struct ib_send_wr fastreg_wr, *bad_wr;
+	u8 key;
+	int len, pageoff;
+	int i, rc;
+	int seg_len;
+	u64 pa;
+	int page_no;
+
+	pageoff = offset_in_page(seg1->mr_offset);
+	seg1->mr_offset -= pageoff;	/* start of page */
+	seg1->mr_len += pageoff;
+	len = -pageoff;
+	if (nsegs > ia->ri_max_frmr_depth)
+		nsegs = ia->ri_max_frmr_depth;
+	for (page_no = i = 0; i < nsegs;) {
+		rpcrdma_map_one(ia, seg, writing);
+		pa = seg->mr_dma;
+		for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
+			frmr->fr_pgl->page_list[page_no++] = pa;
+			pa += PAGE_SIZE;
+		}
+		len += seg->mr_len;
+		++seg;
+		++i;
+		/* Check for holes */
+		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
+			break;
+	}
+	dprintk("RPC:       %s: Using frmr %p to map %d segments\n",
+		__func__, mw, i);
+
+	frmr->fr_state = FRMR_IS_VALID;
+
+	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+	fastreg_wr.wr_id = (unsigned long)(void *)mw;
+	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+	fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
+	fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
+	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+	fastreg_wr.wr.fast_reg.page_list_len = page_no;
+	fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
+	if (fastreg_wr.wr.fast_reg.length < len) {
+		rc = -EIO;
+		goto out_err;
+	}
+	fastreg_wr.wr.fast_reg.access_flags = writing ?
+				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+				IB_ACCESS_REMOTE_READ;
+	key = (u8)(mr->rkey & 0x000000FF);
+	ib_update_fast_reg_key(mr, ++key);
+	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
+
+	DECR_CQCOUNT(&r_xprt->rx_ep);
+	rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
+	if (rc)
+		goto out_senderr;
+
+	seg1->mr_rkey = mr->rkey;
+	seg1->mr_base = seg1->mr_dma + pageoff;
+	seg1->mr_nsegs = i;
+	seg1->mr_len = len;
+	return i;
+
+out_senderr:
+	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
+	ib_update_fast_reg_key(mr, --key);
+
+out_err:
+	frmr->fr_state = FRMR_IS_INVALID;
+	while (i--)
+		rpcrdma_unmap_one(ia, --seg);
+	return rc;
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
+	.ro_map				= frwr_op_map,
 	.ro_maxpages			= frwr_op_maxpages,
 	.ro_displayname			= "frwr",
 };
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index 28ade19..5a284ee 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -28,7 +28,24 @@ physical_op_maxpages(struct rpcrdma_xprt *r_xprt)
 		     rpcrdma_max_segments(r_xprt));
 }
 
+/* The client's physical memory is already exposed for
+ * remote access via RDMA READ or RDMA WRITE.
+ */
+static int
+physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+		int nsegs, bool writing)
+{
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+	rpcrdma_map_one(ia, seg, writing);
+	seg->mr_rkey = ia->ri_bind_mem->rkey;
+	seg->mr_base = seg->mr_dma;
+	seg->mr_nsegs = 1;
+	return 1;
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
+	.ro_map				= physical_op_map,
 	.ro_maxpages			= physical_op_maxpages,
 	.ro_displayname			= "physical",
 };
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 41456d9..6ab1d03 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -187,6 +187,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
 	struct rpcrdma_write_array *warray = NULL;
 	struct rpcrdma_write_chunk *cur_wchunk = NULL;
 	__be32 *iptr = headerp->rm_body.rm_chunks;
+	int (*map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool);
 
 	if (type == rpcrdma_readch || type == rpcrdma_areadch) {
 		/* a read chunk - server will RDMA Read our memory */
@@ -209,9 +210,9 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
 	if (nsegs < 0)
 		return nsegs;
 
+	map = r_xprt->rx_ia.ri_ops->ro_map;
 	do {
-		n = rpcrdma_register_external(seg, nsegs,
-						cur_wchunk != NULL, r_xprt);
+		n = map(r_xprt, seg, nsegs, cur_wchunk != NULL);
 		if (n <= 0)
 			goto out;
 		if (cur_rchunk) {	/* read */
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 837d4ea..851ed97 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1857,8 +1857,8 @@ rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
  * Wrappers for chunk registration, shared by read/write chunk code.
  */
 
-static void
-rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
+void
+rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, bool writing)
 {
 	seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
 	seg->mr_dmalen = seg->mr_len;
@@ -1878,7 +1878,7 @@ rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
 	}
 }
 
-static void
+void
 rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
 {
 	if (seg->mr_page)
@@ -1890,93 +1890,6 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
 }
 
 static int
-rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
-			int *nsegs, int writing, struct rpcrdma_ia *ia,
-			struct rpcrdma_xprt *r_xprt)
-{
-	struct rpcrdma_mr_seg *seg1 = seg;
-	struct rpcrdma_mw *mw = seg1->rl_mw;
-	struct rpcrdma_frmr *frmr = &mw->r.frmr;
-	struct ib_mr *mr = frmr->fr_mr;
-	struct ib_send_wr fastreg_wr, *bad_wr;
-	u8 key;
-	int len, pageoff;
-	int i, rc;
-	int seg_len;
-	u64 pa;
-	int page_no;
-
-	pageoff = offset_in_page(seg1->mr_offset);
-	seg1->mr_offset -= pageoff;	/* start of page */
-	seg1->mr_len += pageoff;
-	len = -pageoff;
-	if (*nsegs > ia->ri_max_frmr_depth)
-		*nsegs = ia->ri_max_frmr_depth;
-	for (page_no = i = 0; i < *nsegs;) {
-		rpcrdma_map_one(ia, seg, writing);
-		pa = seg->mr_dma;
-		for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
-			frmr->fr_pgl->page_list[page_no++] = pa;
-			pa += PAGE_SIZE;
-		}
-		len += seg->mr_len;
-		++seg;
-		++i;
-		/* Check for holes */
-		if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
-		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
-			break;
-	}
-	dprintk("RPC:       %s: Using frmr %p to map %d segments\n",
-		__func__, mw, i);
-
-	frmr->fr_state = FRMR_IS_VALID;
-
-	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
-	fastreg_wr.wr_id = (unsigned long)(void *)mw;
-	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
-	fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
-	fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
-	fastreg_wr.wr.fast_reg.page_list_len = page_no;
-	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-	fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
-	if (fastreg_wr.wr.fast_reg.length < len) {
-		rc = -EIO;
-		goto out_err;
-	}
-
-	/* Bump the key */
-	key = (u8)(mr->rkey & 0x000000FF);
-	ib_update_fast_reg_key(mr, ++key);
-
-	fastreg_wr.wr.fast_reg.access_flags = (writing ?
-				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
-				IB_ACCESS_REMOTE_READ);
-	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
-	DECR_CQCOUNT(&r_xprt->rx_ep);
-
-	rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
-	if (rc) {
-		dprintk("RPC:       %s: failed ib_post_send for register,"
-			" status %i\n", __func__, rc);
-		ib_update_fast_reg_key(mr, --key);
-		goto out_err;
-	} else {
-		seg1->mr_rkey = mr->rkey;
-		seg1->mr_base = seg1->mr_dma + pageoff;
-		seg1->mr_nsegs = i;
-		seg1->mr_len = len;
-	}
-	*nsegs = i;
-	return 0;
-out_err:
-	frmr->fr_state = FRMR_IS_INVALID;
-	while (i--)
-		rpcrdma_unmap_one(ia, --seg);
-	return rc;
-}
-
-static int
 rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
 			struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
 {
@@ -2007,49 +1920,6 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
 }
 
 static int
-rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
-			int *nsegs, int writing, struct rpcrdma_ia *ia)
-{
-	struct rpcrdma_mr_seg *seg1 = seg;
-	u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
-	int len, pageoff, i, rc;
-
-	pageoff = offset_in_page(seg1->mr_offset);
-	seg1->mr_offset -= pageoff;	/* start of page */
-	seg1->mr_len += pageoff;
-	len = -pageoff;
-	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
-		*nsegs = RPCRDMA_MAX_DATA_SEGS;
-	for (i = 0; i < *nsegs;) {
-		rpcrdma_map_one(ia, seg, writing);
-		physaddrs[i] = seg->mr_dma;
-		len += seg->mr_len;
-		++seg;
-		++i;
-		/* Check for holes */
-		if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
-		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
-			break;
-	}
-	rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma);
-	if (rc) {
-		dprintk("RPC:       %s: failed ib_map_phys_fmr "
-			"%u@0x%llx+%i (%d)... status %i\n", __func__,
-			len, (unsigned long long)seg1->mr_dma,
-			pageoff, i, rc);
-		while (i--)
-			rpcrdma_unmap_one(ia, --seg);
-	} else {
-		seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey;
-		seg1->mr_base = seg1->mr_dma + pageoff;
-		seg1->mr_nsegs = i;
-		seg1->mr_len = len;
-	}
-	*nsegs = i;
-	return rc;
-}
-
-static int
 rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
 			struct rpcrdma_ia *ia)
 {
@@ -2070,42 +1940,6 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
 }
 
 int
-rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
-			int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
-{
-	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-	int rc = 0;
-
-	switch (ia->ri_memreg_strategy) {
-
-	case RPCRDMA_ALLPHYSICAL:
-		rpcrdma_map_one(ia, seg, writing);
-		seg->mr_rkey = ia->ri_bind_mem->rkey;
-		seg->mr_base = seg->mr_dma;
-		seg->mr_nsegs = 1;
-		nsegs = 1;
-		break;
-
-	/* Registration using frmr registration */
-	case RPCRDMA_FRMR:
-		rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
-		break;
-
-	/* Registration using fmr memory registration */
-	case RPCRDMA_MTHCAFMR:
-		rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
-		break;
-
-	default:
-		return -EIO;
-	}
-	if (rc)
-		return rc;
-
-	return nsegs;
-}
-
-int
 rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
 		struct rpcrdma_xprt *r_xprt)
 {
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 59e627e..7bf077b 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -336,6 +336,8 @@ struct rpcrdma_stats {
  */
 struct rpcrdma_xprt;
 struct rpcrdma_memreg_ops {
+	int		(*ro_map)(struct rpcrdma_xprt *,
+				  struct rpcrdma_mr_seg *, int, bool);
 	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
 	const char	*ro_displayname;
 };
@@ -403,8 +405,6 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);
 void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
 void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
 
-int rpcrdma_register_external(struct rpcrdma_mr_seg *,
-				int, int, struct rpcrdma_xprt *);
 int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
 				struct rpcrdma_xprt *);
 
@@ -414,6 +414,8 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
 			 struct rpcrdma_regbuf *);
 
 unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
+void rpcrdma_map_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *, bool);
+void rpcrdma_unmap_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *);
 
 /*
  * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 06/16] xprtrdma: Add a "deregister_external" op for each memreg mode
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
                     ` (4 preceding siblings ...)
  2015-03-13 21:27   ` [PATCH v1 05/16] xprtrdma: Add a "register_external" " Chuck Lever
@ 2015-03-13 21:27   ` Chuck Lever
       [not found]     ` <20150313212728.22783.11821.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:27   ` [PATCH v1 07/16] xprtrdma: Add "init MRs" memreg op Chuck Lever
                     ` (9 subsequent siblings)
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:27 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

There is very little common processing among the different external
memory deregistration functions.

In addition, instead of calling the deregistration function for each
segment, have one call release all segments for a request. This makes
the API a little asymmetrical, but a hair faster.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/fmr_ops.c      |   37 ++++++++++++++++
 net/sunrpc/xprtrdma/frwr_ops.c     |   46 ++++++++++++++++++++
 net/sunrpc/xprtrdma/physical_ops.c |   13 ++++++
 net/sunrpc/xprtrdma/rpc_rdma.c     |    7 +--
 net/sunrpc/xprtrdma/transport.c    |    8 +---
 net/sunrpc/xprtrdma/verbs.c        |   81 ------------------------------------
 net/sunrpc/xprtrdma/xprt_rdma.h    |    5 +-
 7 files changed, 103 insertions(+), 94 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 45fb646..9b983b4 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -20,6 +20,32 @@
 /* Maximum scatter/gather per FMR */
 #define RPCRDMA_MAX_FMR_SGES	(64)
 
+/* Use the ib_unmap_fmr() verb to prevent further remote
+ * access via RDMA READ or RDMA WRITE.
+ */
+static int
+__fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
+{
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct rpcrdma_mr_seg *seg1 = seg;
+	int rc, nsegs = seg->mr_nsegs;
+	LIST_HEAD(l);
+
+	list_add(&seg1->rl_mw->r.fmr->list, &l);
+	rc = ib_unmap_fmr(&l);
+	read_lock(&ia->ri_qplock);
+	while (seg1->mr_nsegs--)
+		rpcrdma_unmap_one(ia, seg++);
+	read_unlock(&ia->ri_qplock);
+	if (rc)
+		goto out_err;
+	return nsegs;
+
+out_err:
+	dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
+	return nsegs;
+}
+
 /* FMR mode conveys up to 64 pages of payload per chunk segment.
  */
 static size_t
@@ -79,8 +105,19 @@ out_maperr:
 	return rc;
 }
 
+static void
+fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+	     unsigned int count)
+{
+	unsigned int i;
+
+	for (i = 0; count--;)
+		i += __fmr_unmap(r_xprt, &req->rl_segments[i]);
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
 	.ro_map				= fmr_op_map,
+	.ro_unmap			= fmr_op_unmap,
 	.ro_maxpages			= fmr_op_maxpages,
 	.ro_displayname			= "fmr",
 };
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 2b5ccb0..05b5761 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -17,6 +17,41 @@
 # define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
+/* Post a LOCAL_INV Work Request to prevent further remote access
+ * via RDMA READ or RDMA WRITE.
+ */
+static int
+__frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
+{
+	struct rpcrdma_mr_seg *seg1 = seg;
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct ib_send_wr invalidate_wr, *bad_wr;
+	int rc, nsegs = seg->mr_nsegs;
+
+	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
+
+	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
+	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
+	invalidate_wr.opcode = IB_WR_LOCAL_INV;
+	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
+	DECR_CQCOUNT(&r_xprt->rx_ep);
+
+	read_lock(&ia->ri_qplock);
+	while (seg1->mr_nsegs--)
+		rpcrdma_unmap_one(ia, seg++);
+	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
+	read_unlock(&ia->ri_qplock);
+	if (rc)
+		goto out_err;
+	return nsegs;
+
+out_err:
+	/* Force rpcrdma_buffer_get() to retry */
+	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
+	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
+	return nsegs;
+}
+
 /* FRWR mode conveys a list of pages per chunk segment. The
  * maximum length of that list is the FRWR page list depth.
  */
@@ -116,8 +151,19 @@ out_err:
 	return rc;
 }
 
+static void
+frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+	      unsigned int count)
+{
+	unsigned int i;
+
+	for (i = 0; count--;)
+		i += __frwr_unmap(r_xprt, &req->rl_segments[i]);
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
 	.ro_map				= frwr_op_map,
+	.ro_unmap			= frwr_op_unmap,
 	.ro_maxpages			= frwr_op_maxpages,
 	.ro_displayname			= "frwr",
 };
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index 5a284ee..f2c15be 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -44,8 +44,21 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	return 1;
 }
 
+/* Unmap a memory region, but leave it registered.
+ */
+static void
+physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+		  unsigned int count)
+{
+	unsigned int i;
+
+	for (i = 0; i < count; i++)
+		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]);
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
 	.ro_map				= physical_op_map,
+	.ro_unmap			= physical_op_unmap,
 	.ro_maxpages			= physical_op_maxpages,
 	.ro_displayname			= "physical",
 };
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 6ab1d03..7b51d9d 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -284,11 +284,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
 	return (unsigned char *)iptr - (unsigned char *)headerp;
 
 out:
-	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
-		for (pos = 0; nchunks--;)
-			pos += rpcrdma_deregister_external(
-					&req->rl_segments[pos], r_xprt);
-	}
+	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR)
+		r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, nchunks);
 	return n;
 }
 
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 9a9da40..c484671 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -571,7 +571,6 @@ xprt_rdma_free(void *buffer)
 	struct rpcrdma_req *req;
 	struct rpcrdma_xprt *r_xprt;
 	struct rpcrdma_regbuf *rb;
-	int i;
 
 	if (buffer == NULL)
 		return;
@@ -582,11 +581,8 @@ xprt_rdma_free(void *buffer)
 
 	dprintk("RPC:       %s: called on 0x%p\n", __func__, req->rl_reply);
 
-	for (i = 0; req->rl_nchunks;) {
-		--req->rl_nchunks;
-		i += rpcrdma_deregister_external(
-			&req->rl_segments[i], r_xprt);
-	}
+	r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, req->rl_nchunks);
+	req->rl_nchunks = 0;
 
 	rpcrdma_buffer_put(req);
 }
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 851ed97..a1621fd 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1509,7 +1509,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
 	}
 }
 
-/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
+/* rpcrdma_unmap_one() was already done during deregistration.
  * Redo only the ib_post_send().
  */
 static void
@@ -1889,85 +1889,6 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
 				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
 }
 
-static int
-rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
-			struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
-{
-	struct rpcrdma_mr_seg *seg1 = seg;
-	struct ib_send_wr invalidate_wr, *bad_wr;
-	int rc;
-
-	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
-
-	memset(&invalidate_wr, 0, sizeof invalidate_wr);
-	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
-	invalidate_wr.opcode = IB_WR_LOCAL_INV;
-	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
-	DECR_CQCOUNT(&r_xprt->rx_ep);
-
-	read_lock(&ia->ri_qplock);
-	while (seg1->mr_nsegs--)
-		rpcrdma_unmap_one(ia, seg++);
-	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
-	read_unlock(&ia->ri_qplock);
-	if (rc) {
-		/* Force rpcrdma_buffer_get() to retry */
-		seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
-		dprintk("RPC:       %s: failed ib_post_send for invalidate,"
-			" status %i\n", __func__, rc);
-	}
-	return rc;
-}
-
-static int
-rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
-			struct rpcrdma_ia *ia)
-{
-	struct rpcrdma_mr_seg *seg1 = seg;
-	LIST_HEAD(l);
-	int rc;
-
-	list_add(&seg1->rl_mw->r.fmr->list, &l);
-	rc = ib_unmap_fmr(&l);
-	read_lock(&ia->ri_qplock);
-	while (seg1->mr_nsegs--)
-		rpcrdma_unmap_one(ia, seg++);
-	read_unlock(&ia->ri_qplock);
-	if (rc)
-		dprintk("RPC:       %s: failed ib_unmap_fmr,"
-			" status %i\n", __func__, rc);
-	return rc;
-}
-
-int
-rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
-		struct rpcrdma_xprt *r_xprt)
-{
-	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-	int nsegs = seg->mr_nsegs, rc;
-
-	switch (ia->ri_memreg_strategy) {
-
-	case RPCRDMA_ALLPHYSICAL:
-		read_lock(&ia->ri_qplock);
-		rpcrdma_unmap_one(ia, seg);
-		read_unlock(&ia->ri_qplock);
-		break;
-
-	case RPCRDMA_FRMR:
-		rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
-		break;
-
-	case RPCRDMA_MTHCAFMR:
-		rc = rpcrdma_deregister_fmr_external(seg, ia);
-		break;
-
-	default:
-		break;
-	}
-	return nsegs;
-}
-
 /*
  * Prepost any receive buffer, then post send.
  *
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 7bf077b..3aabbb2 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -338,6 +338,8 @@ struct rpcrdma_xprt;
 struct rpcrdma_memreg_ops {
 	int		(*ro_map)(struct rpcrdma_xprt *,
 				  struct rpcrdma_mr_seg *, int, bool);
+	void		(*ro_unmap)(struct rpcrdma_xprt *,
+				    struct rpcrdma_req *, unsigned int);
 	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
 	const char	*ro_displayname;
 };
@@ -405,9 +407,6 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);
 void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
 void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
 
-int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
-				struct rpcrdma_xprt *);
-
 struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
 					    size_t, gfp_t);
 void rpcrdma_free_regbuf(struct rpcrdma_ia *,

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 07/16] xprtrdma: Add "init MRs" memreg op
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
                     ` (5 preceding siblings ...)
  2015-03-13 21:27   ` [PATCH v1 06/16] xprtrdma: Add a "deregister_external" " Chuck Lever
@ 2015-03-13 21:27   ` Chuck Lever
       [not found]     ` <20150313212738.22783.34521.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:27   ` [PATCH v1 08/16] xprtrdma: Add "reset " Chuck Lever
                     ` (8 subsequent siblings)
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:27 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

This method is used when setting up a new transport instance to
create a pool of Memory Region objects that will be used to register
memory during operation.

Memory Regions are not needed for "physical" registration, since
->prepare and ->release are basically no-ops for that mode.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/fmr_ops.c      |   42 +++++++++++++++
 net/sunrpc/xprtrdma/frwr_ops.c     |   66 +++++++++++++++++++++++
 net/sunrpc/xprtrdma/physical_ops.c |    7 ++
 net/sunrpc/xprtrdma/verbs.c        |  104 +-----------------------------------
 net/sunrpc/xprtrdma/xprt_rdma.h    |    1 
 5 files changed, 119 insertions(+), 101 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 9b983b4..1501db0 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -55,6 +55,47 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
 		     rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES);
 }
 
+static int
+fmr_op_init(struct rpcrdma_xprt *r_xprt)
+{
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
+	struct ib_fmr_attr fmr_attr = {
+		.max_pages	= RPCRDMA_MAX_FMR_SGES,
+		.max_maps	= 1,
+		.page_shift	= PAGE_SHIFT
+	};
+	struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
+	struct rpcrdma_mw *r;
+	int i, rc;
+
+	INIT_LIST_HEAD(&buf->rb_mws);
+	INIT_LIST_HEAD(&buf->rb_all);
+
+	i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
+	dprintk("RPC:       %s: initalizing %d FMRs\n", __func__, i);
+
+	while (i--) {
+		r = kzalloc(sizeof(*r), GFP_KERNEL);
+		if (!r)
+			return -ENOMEM;
+
+		r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
+		if (IS_ERR(r->r.fmr))
+			goto out_fmr_err;
+
+		list_add(&r->mw_list, &buf->rb_mws);
+		list_add(&r->mw_all, &buf->rb_all);
+	}
+	return 0;
+
+out_fmr_err:
+	rc = PTR_ERR(r->r.fmr);
+	dprintk("RPC:       %s: ib_alloc_fmr status %i\n", __func__, rc);
+	kfree(r);
+	return rc;
+}
+
 /* Use the ib_map_phys_fmr() verb to register a memory region
  * for remote access via RDMA READ or RDMA WRITE.
  */
@@ -119,5 +160,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
 	.ro_map				= fmr_op_map,
 	.ro_unmap			= fmr_op_unmap,
 	.ro_maxpages			= fmr_op_maxpages,
+	.ro_init			= fmr_op_init,
 	.ro_displayname			= "fmr",
 };
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 05b5761..975372c 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -17,6 +17,35 @@
 # define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
+static int
+__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
+	    unsigned int depth)
+{
+	struct rpcrdma_frmr *f = &r->r.frmr;
+	int rc;
+
+	f->fr_mr = ib_alloc_fast_reg_mr(pd, depth);
+	if (IS_ERR(f->fr_mr))
+		goto out_mr_err;
+	f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
+	if (IS_ERR(f->fr_pgl))
+		goto out_list_err;
+	return 0;
+
+out_mr_err:
+	rc = PTR_ERR(f->fr_mr);
+	dprintk("RPC:       %s: ib_alloc_fast_reg_mr status %i\n",
+		__func__, rc);
+	return rc;
+
+out_list_err:
+	rc = PTR_ERR(f->fr_pgl);
+	dprintk("RPC:       %s: ib_alloc_fast_reg_page_list status %i\n",
+		__func__, rc);
+	ib_dereg_mr(f->fr_mr);
+	return rc;
+}
+
 /* Post a LOCAL_INV Work Request to prevent further remote access
  * via RDMA READ or RDMA WRITE.
  */
@@ -64,6 +93,42 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
 		     rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
 }
 
+static int
+frwr_op_init(struct rpcrdma_xprt *r_xprt)
+{
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	struct ib_device *device = r_xprt->rx_ia.ri_id->device;
+	unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
+	struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
+	int i;
+
+	INIT_LIST_HEAD(&buf->rb_mws);
+	INIT_LIST_HEAD(&buf->rb_all);
+
+	i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
+	dprintk("RPC:       %s: initalizing %d FRMRs\n", __func__, i);
+
+	while (i--) {
+		struct rpcrdma_mw *r;
+		int rc;
+
+		r = kzalloc(sizeof(*r), GFP_KERNEL);
+		if (!r)
+			return -ENOMEM;
+
+		rc = __frwr_init(r, pd, device, depth);
+		if (rc) {
+			kfree(r);
+			return rc;
+		}
+
+		list_add(&r->mw_list, &buf->rb_mws);
+		list_add(&r->mw_all, &buf->rb_all);
+	}
+
+	return 0;
+}
+
 /* Post a FAST_REG Work Request to register a memory region
  * for remote access via RDMA READ or RDMA WRITE.
  */
@@ -165,5 +230,6 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
 	.ro_map				= frwr_op_map,
 	.ro_unmap			= frwr_op_unmap,
 	.ro_maxpages			= frwr_op_maxpages,
+	.ro_init			= frwr_op_init,
 	.ro_displayname			= "frwr",
 };
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index f2c15be..ae2b0bc 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -28,6 +28,12 @@ physical_op_maxpages(struct rpcrdma_xprt *r_xprt)
 		     rpcrdma_max_segments(r_xprt));
 }
 
+static int
+physical_op_init(struct rpcrdma_xprt *r_xprt)
+{
+	return 0;
+}
+
 /* The client's physical memory is already exposed for
  * remote access via RDMA READ or RDMA WRITE.
  */
@@ -60,5 +66,6 @@ const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
 	.ro_map				= physical_op_map,
 	.ro_unmap			= physical_op_unmap,
 	.ro_maxpages			= physical_op_maxpages,
+	.ro_init			= physical_op_init,
 	.ro_displayname			= "physical",
 };
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index a1621fd..d7810d6 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1123,91 +1123,6 @@ out:
 	return ERR_PTR(rc);
 }
 
-static int
-rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
-{
-	int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
-	struct ib_fmr_attr fmr_attr = {
-		.max_pages	= RPCRDMA_MAX_DATA_SEGS,
-		.max_maps	= 1,
-		.page_shift	= PAGE_SHIFT
-	};
-	struct rpcrdma_mw *r;
-	int i, rc;
-
-	i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
-	dprintk("RPC:       %s: initalizing %d FMRs\n", __func__, i);
-
-	while (i--) {
-		r = kzalloc(sizeof(*r), GFP_KERNEL);
-		if (r == NULL)
-			return -ENOMEM;
-
-		r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
-		if (IS_ERR(r->r.fmr)) {
-			rc = PTR_ERR(r->r.fmr);
-			dprintk("RPC:       %s: ib_alloc_fmr failed %i\n",
-				__func__, rc);
-			goto out_free;
-		}
-
-		list_add(&r->mw_list, &buf->rb_mws);
-		list_add(&r->mw_all, &buf->rb_all);
-	}
-	return 0;
-
-out_free:
-	kfree(r);
-	return rc;
-}
-
-static int
-rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
-{
-	struct rpcrdma_frmr *f;
-	struct rpcrdma_mw *r;
-	int i, rc;
-
-	i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
-	dprintk("RPC:       %s: initalizing %d FRMRs\n", __func__, i);
-
-	while (i--) {
-		r = kzalloc(sizeof(*r), GFP_KERNEL);
-		if (r == NULL)
-			return -ENOMEM;
-		f = &r->r.frmr;
-
-		f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
-						ia->ri_max_frmr_depth);
-		if (IS_ERR(f->fr_mr)) {
-			rc = PTR_ERR(f->fr_mr);
-			dprintk("RPC:       %s: ib_alloc_fast_reg_mr "
-				"failed %i\n", __func__, rc);
-			goto out_free;
-		}
-
-		f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
-							ia->ri_max_frmr_depth);
-		if (IS_ERR(f->fr_pgl)) {
-			rc = PTR_ERR(f->fr_pgl);
-			dprintk("RPC:       %s: ib_alloc_fast_reg_page_list "
-				"failed %i\n", __func__, rc);
-
-			ib_dereg_mr(f->fr_mr);
-			goto out_free;
-		}
-
-		list_add(&r->mw_list, &buf->rb_mws);
-		list_add(&r->mw_all, &buf->rb_all);
-	}
-
-	return 0;
-
-out_free:
-	kfree(r);
-	return rc;
-}
-
 int
 rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 {
@@ -1244,22 +1159,9 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 	buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
 	p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
 
-	INIT_LIST_HEAD(&buf->rb_mws);
-	INIT_LIST_HEAD(&buf->rb_all);
-	switch (ia->ri_memreg_strategy) {
-	case RPCRDMA_FRMR:
-		rc = rpcrdma_init_frmrs(ia, buf);
-		if (rc)
-			goto out;
-		break;
-	case RPCRDMA_MTHCAFMR:
-		rc = rpcrdma_init_fmrs(ia, buf);
-		if (rc)
-			goto out;
-		break;
-	default:
-		break;
-	}
+	rc = ia->ri_ops->ro_init(r_xprt);
+	if (rc)
+		goto out;
 
 	for (i = 0; i < buf->rb_max_requests; i++) {
 		struct rpcrdma_req *req;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 3aabbb2..4fe3c38 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -341,6 +341,7 @@ struct rpcrdma_memreg_ops {
 	void		(*ro_unmap)(struct rpcrdma_xprt *,
 				    struct rpcrdma_req *, unsigned int);
 	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
+	int		(*ro_init)(struct rpcrdma_xprt *);
 	const char	*ro_displayname;
 };
 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 08/16] xprtrdma: Add "reset MRs" memreg op
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
                     ` (6 preceding siblings ...)
  2015-03-13 21:27   ` [PATCH v1 07/16] xprtrdma: Add "init MRs" memreg op Chuck Lever
@ 2015-03-13 21:27   ` Chuck Lever
       [not found]     ` <20150313212747.22783.98300.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:27   ` [PATCH v1 09/16] xprtrdma: Add "destroy " Chuck Lever
                     ` (7 subsequent siblings)
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:27 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

This method is invoked when a transport instance is about to be
reconnected. Each Memory Region object is reset to its initial
state.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/fmr_ops.c      |   23 ++++++++
 net/sunrpc/xprtrdma/frwr_ops.c     |   46 ++++++++++++++++
 net/sunrpc/xprtrdma/physical_ops.c |    6 ++
 net/sunrpc/xprtrdma/verbs.c        |  103 +-----------------------------------
 net/sunrpc/xprtrdma/xprt_rdma.h    |    1 
 5 files changed, 78 insertions(+), 101 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 1501db0..1ccb3de 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -156,10 +156,33 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 		i += __fmr_unmap(r_xprt, &req->rl_segments[i]);
 }
 
+/* After a disconnect, unmap all FMRs.
+ *
+ * This is invoked only in the transport connect worker in order
+ * to serialize with rpcrdma_register_fmr_external().
+ */
+static void
+fmr_op_reset(struct rpcrdma_xprt *r_xprt)
+{
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	struct rpcrdma_mw *r;
+	LIST_HEAD(list);
+	int rc;
+
+	list_for_each_entry(r, &buf->rb_all, mw_all)
+		list_add(&r->r.fmr->list, &list);
+
+	rc = ib_unmap_fmr(&list);
+	if (rc)
+		dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
+			__func__, rc);
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
 	.ro_map				= fmr_op_map,
 	.ro_unmap			= fmr_op_unmap,
 	.ro_maxpages			= fmr_op_maxpages,
 	.ro_init			= fmr_op_init,
+	.ro_reset			= fmr_op_reset,
 	.ro_displayname			= "fmr",
 };
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 975372c..b4ce0e5 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -81,6 +81,18 @@ out_err:
 	return nsegs;
 }
 
+static void
+__frwr_release(struct rpcrdma_mw *r)
+{
+	int rc;
+
+	rc = ib_dereg_mr(r->r.frmr.fr_mr);
+	if (rc)
+		dprintk("RPC:       %s: ib_dereg_mr status %i\n",
+			__func__, rc);
+	ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+}
+
 /* FRWR mode conveys a list of pages per chunk segment. The
  * maximum length of that list is the FRWR page list depth.
  */
@@ -226,10 +238,44 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 		i += __frwr_unmap(r_xprt, &req->rl_segments[i]);
 }
 
+/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
+ * an unusable state. Find FRMRs in this state and dereg / reg
+ * each.  FRMRs that are VALID and attached to an rpcrdma_req are
+ * also torn down.
+ *
+ * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
+ *
+ * This is invoked only in the transport connect worker in order
+ * to serialize with rpcrdma_register_frmr_external().
+ */
+static void
+frwr_op_reset(struct rpcrdma_xprt *r_xprt)
+{
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	struct ib_device *device = r_xprt->rx_ia.ri_id->device;
+	unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
+	struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
+	struct rpcrdma_mw *r;
+	int rc;
+
+	list_for_each_entry(r, &buf->rb_all, mw_all) {
+		if (r->r.frmr.fr_state == FRMR_IS_INVALID)
+			continue;
+
+		__frwr_release(r);
+		rc = __frwr_init(r, pd, device, depth);
+		if (rc)
+			continue;
+
+		r->r.frmr.fr_state = FRMR_IS_INVALID;
+	}
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
 	.ro_map				= frwr_op_map,
 	.ro_unmap			= frwr_op_unmap,
 	.ro_maxpages			= frwr_op_maxpages,
 	.ro_init			= frwr_op_init,
+	.ro_reset			= frwr_op_reset,
 	.ro_displayname			= "frwr",
 };
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index ae2b0bc..0afc691 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -62,10 +62,16 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]);
 }
 
+static void
+physical_op_reset(struct rpcrdma_xprt *r_xprt)
+{
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
 	.ro_map				= physical_op_map,
 	.ro_unmap			= physical_op_unmap,
 	.ro_maxpages			= physical_op_maxpages,
 	.ro_init			= physical_op_init,
+	.ro_reset			= physical_op_reset,
 	.ro_displayname			= "physical",
 };
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index d7810d6..e17d91a 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -63,9 +63,6 @@
 # define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
-static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
-static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
-
 /*
  * internal functions
  */
@@ -944,21 +941,9 @@ retry:
 		rpcrdma_ep_disconnect(ep, ia);
 		rpcrdma_flush_cqs(ep);
 
-		switch (ia->ri_memreg_strategy) {
-		case RPCRDMA_FRMR:
-			rpcrdma_reset_frmrs(ia);
-			break;
-		case RPCRDMA_MTHCAFMR:
-			rpcrdma_reset_fmrs(ia);
-			break;
-		case RPCRDMA_ALLPHYSICAL:
-			break;
-		default:
-			rc = -EIO;
-			goto out;
-		}
-
 		xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
+		ia->ri_ops->ro_reset(xprt);
+
 		id = rpcrdma_create_id(xprt, ia,
 				(struct sockaddr *)&xprt->rx_data.addr);
 		if (IS_ERR(id)) {
@@ -1288,90 +1273,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 	kfree(buf->rb_pool);
 }
 
-/* After a disconnect, unmap all FMRs.
- *
- * This is invoked only in the transport connect worker in order
- * to serialize with rpcrdma_register_fmr_external().
- */
-static void
-rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
-{
-	struct rpcrdma_xprt *r_xprt =
-				container_of(ia, struct rpcrdma_xprt, rx_ia);
-	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-	struct list_head *pos;
-	struct rpcrdma_mw *r;
-	LIST_HEAD(l);
-	int rc;
-
-	list_for_each(pos, &buf->rb_all) {
-		r = list_entry(pos, struct rpcrdma_mw, mw_all);
-
-		INIT_LIST_HEAD(&l);
-		list_add(&r->r.fmr->list, &l);
-		rc = ib_unmap_fmr(&l);
-		if (rc)
-			dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
-				__func__, rc);
-	}
-}
-
-/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
- * an unusable state. Find FRMRs in this state and dereg / reg
- * each.  FRMRs that are VALID and attached to an rpcrdma_req are
- * also torn down.
- *
- * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
- *
- * This is invoked only in the transport connect worker in order
- * to serialize with rpcrdma_register_frmr_external().
- */
-static void
-rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
-{
-	struct rpcrdma_xprt *r_xprt =
-				container_of(ia, struct rpcrdma_xprt, rx_ia);
-	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-	struct list_head *pos;
-	struct rpcrdma_mw *r;
-	int rc;
-
-	list_for_each(pos, &buf->rb_all) {
-		r = list_entry(pos, struct rpcrdma_mw, mw_all);
-
-		if (r->r.frmr.fr_state == FRMR_IS_INVALID)
-			continue;
-
-		rc = ib_dereg_mr(r->r.frmr.fr_mr);
-		if (rc)
-			dprintk("RPC:       %s: ib_dereg_mr failed %i\n",
-				__func__, rc);
-		ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
-
-		r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
-					ia->ri_max_frmr_depth);
-		if (IS_ERR(r->r.frmr.fr_mr)) {
-			rc = PTR_ERR(r->r.frmr.fr_mr);
-			dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
-				" failed %i\n", __func__, rc);
-			continue;
-		}
-		r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
-					ia->ri_id->device,
-					ia->ri_max_frmr_depth);
-		if (IS_ERR(r->r.frmr.fr_pgl)) {
-			rc = PTR_ERR(r->r.frmr.fr_pgl);
-			dprintk("RPC:       %s: "
-				"ib_alloc_fast_reg_page_list "
-				"failed %i\n", __func__, rc);
-
-			ib_dereg_mr(r->r.frmr.fr_mr);
-			continue;
-		}
-		r->r.frmr.fr_state = FRMR_IS_INVALID;
-	}
-}
-
 /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
  * some req segments uninitialized.
  */
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 4fe3c38..cdf6763 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -342,6 +342,7 @@ struct rpcrdma_memreg_ops {
 				    struct rpcrdma_req *, unsigned int);
 	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
 	int		(*ro_init)(struct rpcrdma_xprt *);
+	void		(*ro_reset)(struct rpcrdma_xprt *);
 	const char	*ro_displayname;
 };
 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 09/16] xprtrdma: Add "destroy MRs" memreg op
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
                     ` (7 preceding siblings ...)
  2015-03-13 21:27   ` [PATCH v1 08/16] xprtrdma: Add "reset " Chuck Lever
@ 2015-03-13 21:27   ` Chuck Lever
       [not found]     ` <20150313212758.22783.67493.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:28   ` [PATCH v1 10/16] xprtrdma: Add "open" " Chuck Lever
                     ` (6 subsequent siblings)
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:27 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

Memory Region objects associated with a transport instance are
destroyed before the instance is shutdown and destroyed.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/fmr_ops.c      |   21 +++++++++++++++
 net/sunrpc/xprtrdma/frwr_ops.c     |   17 ++++++++++++
 net/sunrpc/xprtrdma/physical_ops.c |    6 ++++
 net/sunrpc/xprtrdma/verbs.c        |   52 +-----------------------------------
 net/sunrpc/xprtrdma/xprt_rdma.h    |    1 +
 5 files changed, 46 insertions(+), 51 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 1ccb3de..3115e4b 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -178,11 +178,32 @@ fmr_op_reset(struct rpcrdma_xprt *r_xprt)
 			__func__, rc);
 }
 
+static void
+fmr_op_destroy(struct rpcrdma_buffer *buf)
+{
+	struct rpcrdma_mw *r;
+	int rc;
+
+	while (!list_empty(&buf->rb_all)) {
+		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
+		list_del(&r->mw_all);
+		list_del(&r->mw_list);
+
+		rc = ib_dealloc_fmr(r->r.fmr);
+		if (rc)
+			dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
+				__func__, rc);
+
+		kfree(r);
+	}
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
 	.ro_map				= fmr_op_map,
 	.ro_unmap			= fmr_op_unmap,
 	.ro_maxpages			= fmr_op_maxpages,
 	.ro_init			= fmr_op_init,
 	.ro_reset			= fmr_op_reset,
+	.ro_destroy			= fmr_op_destroy,
 	.ro_displayname			= "fmr",
 };
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index b4ce0e5..fc3a228 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -271,11 +271,28 @@ frwr_op_reset(struct rpcrdma_xprt *r_xprt)
 	}
 }
 
+static void
+frwr_op_destroy(struct rpcrdma_buffer *buf)
+{
+	struct rpcrdma_mw *r;
+
+	while (!list_empty(&buf->rb_all)) {
+		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
+		list_del(&r->mw_all);
+		list_del(&r->mw_list);
+
+		__frwr_release(r);
+
+		kfree(r);
+	}
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
 	.ro_map				= frwr_op_map,
 	.ro_unmap			= frwr_op_unmap,
 	.ro_maxpages			= frwr_op_maxpages,
 	.ro_init			= frwr_op_init,
 	.ro_reset			= frwr_op_reset,
+	.ro_destroy			= frwr_op_destroy,
 	.ro_displayname			= "frwr",
 };
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index 0afc691..f8da8c4 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -67,11 +67,17 @@ physical_op_reset(struct rpcrdma_xprt *r_xprt)
 {
 }
 
+static void
+physical_op_destroy(struct rpcrdma_buffer *buf)
+{
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
 	.ro_map				= physical_op_map,
 	.ro_unmap			= physical_op_unmap,
 	.ro_maxpages			= physical_op_maxpages,
 	.ro_init			= physical_op_init,
 	.ro_reset			= physical_op_reset,
+	.ro_destroy			= physical_op_destroy,
 	.ro_displayname			= "physical",
 };
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index e17d91a..dcbc736 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1198,47 +1198,6 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
 	kfree(req);
 }
 
-static void
-rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
-{
-	struct rpcrdma_mw *r;
-	int rc;
-
-	while (!list_empty(&buf->rb_all)) {
-		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
-		list_del(&r->mw_all);
-		list_del(&r->mw_list);
-
-		rc = ib_dealloc_fmr(r->r.fmr);
-		if (rc)
-			dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
-				__func__, rc);
-
-		kfree(r);
-	}
-}
-
-static void
-rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
-{
-	struct rpcrdma_mw *r;
-	int rc;
-
-	while (!list_empty(&buf->rb_all)) {
-		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
-		list_del(&r->mw_all);
-		list_del(&r->mw_list);
-
-		rc = ib_dereg_mr(r->r.frmr.fr_mr);
-		if (rc)
-			dprintk("RPC:       %s: ib_dereg_mr failed %i\n",
-				__func__, rc);
-		ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
-
-		kfree(r);
-	}
-}
-
 void
 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 {
@@ -1259,16 +1218,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 			rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
 	}
 
-	switch (ia->ri_memreg_strategy) {
-	case RPCRDMA_FRMR:
-		rpcrdma_destroy_frmrs(buf);
-		break;
-	case RPCRDMA_MTHCAFMR:
-		rpcrdma_destroy_fmrs(buf);
-		break;
-	default:
-		break;
-	}
+	ia->ri_ops->ro_destroy(buf);
 
 	kfree(buf->rb_pool);
 }
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cdf6763..a0e3c3e 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -343,6 +343,7 @@ struct rpcrdma_memreg_ops {
 	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
 	int		(*ro_init)(struct rpcrdma_xprt *);
 	void		(*ro_reset)(struct rpcrdma_xprt *);
+	void		(*ro_destroy)(struct rpcrdma_buffer *);
 	const char	*ro_displayname;
 };
 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 10/16] xprtrdma: Add "open" memreg op
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
                     ` (8 preceding siblings ...)
  2015-03-13 21:27   ` [PATCH v1 09/16] xprtrdma: Add "destroy " Chuck Lever
@ 2015-03-13 21:28   ` Chuck Lever
       [not found]     ` <20150313212807.22783.61881.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:28   ` [PATCH v1 11/16] xprtrdma: Handle non-SEND completions via a callout Chuck Lever
                     ` (5 subsequent siblings)
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:28 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

The open op determines the size of various transport data structures
based on device capabilities and memory registration mode.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/fmr_ops.c      |   22 +++++++++++++
 net/sunrpc/xprtrdma/frwr_ops.c     |   60 ++++++++++++++++++++++++++++++++++++
 net/sunrpc/xprtrdma/physical_ops.c |   22 +++++++++++++
 net/sunrpc/xprtrdma/verbs.c        |   54 ++------------------------------
 net/sunrpc/xprtrdma/xprt_rdma.h    |    3 ++
 5 files changed, 110 insertions(+), 51 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 3115e4b..96e6cd3 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -46,6 +46,27 @@ out_err:
 	return nsegs;
 }
 
+static int
+fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
+	    struct rpcrdma_create_data_internal *cdata)
+{
+	struct ib_device_attr *devattr = &ia->ri_devattr;
+	unsigned int wrs, max_wrs;
+
+	max_wrs = devattr->max_qp_wr;
+	if (cdata->max_requests > max_wrs)
+		cdata->max_requests = max_wrs;
+
+	wrs = cdata->max_requests;
+	ep->rep_attr.cap.max_send_wr = wrs;
+	ep->rep_attr.cap.max_recv_wr = wrs;
+
+	dprintk("RPC:       %s: pre-allocating %u send WRs, %u recv WRs\n",
+		__func__, ep->rep_attr.cap.max_send_wr,
+		ep->rep_attr.cap.max_recv_wr);
+	return 0;
+}
+
 /* FMR mode conveys up to 64 pages of payload per chunk segment.
  */
 static size_t
@@ -201,6 +222,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
 const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
 	.ro_map				= fmr_op_map,
 	.ro_unmap			= fmr_op_unmap,
+	.ro_open			= fmr_op_open,
 	.ro_maxpages			= fmr_op_maxpages,
 	.ro_init			= fmr_op_init,
 	.ro_reset			= fmr_op_reset,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index fc3a228..9bb4b2d 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -93,6 +93,65 @@ __frwr_release(struct rpcrdma_mw *r)
 	ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
 }
 
+static int
+frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
+	     struct rpcrdma_create_data_internal *cdata)
+{
+	struct ib_device_attr *devattr = &ia->ri_devattr;
+	unsigned int wrs, max_wrs;
+	int depth = 7;
+
+	max_wrs = devattr->max_qp_wr;
+	if (cdata->max_requests > max_wrs)
+		cdata->max_requests = max_wrs;
+
+	wrs = cdata->max_requests;
+	ep->rep_attr.cap.max_send_wr = wrs;
+	ep->rep_attr.cap.max_recv_wr = wrs;
+
+	ia->ri_max_frmr_depth =
+			min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
+			      devattr->max_fast_reg_page_list_len);
+	dprintk("RPC:       %s: device's max FR page list len = %u\n",
+		__func__, ia->ri_max_frmr_depth);
+
+	/* Add room for frmr register and invalidate WRs.
+	 * 1. FRMR reg WR for head
+	 * 2. FRMR invalidate WR for head
+	 * 3. N FRMR reg WRs for pagelist
+	 * 4. N FRMR invalidate WRs for pagelist
+	 * 5. FRMR reg WR for tail
+	 * 6. FRMR invalidate WR for tail
+	 * 7. The RDMA_SEND WR
+	 */
+
+	/* Calculate N if the device max FRMR depth is smaller than
+	 * RPCRDMA_MAX_DATA_SEGS.
+	 */
+	if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
+		int delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth;
+
+		do {
+			depth += 2; /* FRMR reg + invalidate */
+			delta -= ia->ri_max_frmr_depth;
+		} while (delta > 0);
+	}
+
+	ep->rep_attr.cap.max_send_wr *= depth;
+	if (ep->rep_attr.cap.max_send_wr > max_wrs) {
+		cdata->max_requests = max_wrs / depth;
+		if (!cdata->max_requests)
+			return -EINVAL;
+		ep->rep_attr.cap.max_send_wr = cdata->max_requests *
+					       depth;
+	}
+
+	dprintk("RPC:       %s: pre-allocating %u send WRs, %u recv WRs\n",
+		__func__, ep->rep_attr.cap.max_send_wr,
+		ep->rep_attr.cap.max_recv_wr);
+	return 0;
+}
+
 /* FRWR mode conveys a list of pages per chunk segment. The
  * maximum length of that list is the FRWR page list depth.
  */
@@ -290,6 +349,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf)
 const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
 	.ro_map				= frwr_op_map,
 	.ro_unmap			= frwr_op_unmap,
+	.ro_open			= frwr_op_open,
 	.ro_maxpages			= frwr_op_maxpages,
 	.ro_init			= frwr_op_init,
 	.ro_reset			= frwr_op_reset,
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index f8da8c4..0998f4f 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -19,6 +19,27 @@
 # define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
+static int
+physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
+		 struct rpcrdma_create_data_internal *cdata)
+{
+	struct ib_device_attr *devattr = &ia->ri_devattr;
+	unsigned int wrs, max_wrs;
+
+	max_wrs = devattr->max_qp_wr;
+	if (cdata->max_requests > max_wrs)
+		cdata->max_requests = max_wrs;
+
+	wrs = cdata->max_requests;
+	ep->rep_attr.cap.max_send_wr = wrs;
+	ep->rep_attr.cap.max_recv_wr = wrs;
+
+	dprintk("RPC:       %s: pre-allocating %u send WRs, %u recv WRs\n",
+		__func__, ep->rep_attr.cap.max_send_wr,
+		ep->rep_attr.cap.max_recv_wr);
+	return 0;
+}
+
 /* PHYSICAL memory registration conveys one page per chunk segment.
  */
 static size_t
@@ -75,6 +96,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf)
 const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
 	.ro_map				= physical_op_map,
 	.ro_unmap			= physical_op_unmap,
+	.ro_open			= physical_op_open,
 	.ro_maxpages			= physical_op_maxpages,
 	.ro_init			= physical_op_init,
 	.ro_reset			= physical_op_reset,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index dcbc736..17b2a29 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -621,11 +621,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 			dprintk("RPC:       %s: FRMR registration "
 				"not supported by HCA\n", __func__);
 			memreg = RPCRDMA_MTHCAFMR;
-		} else {
-			/* Mind the ia limit on FRMR page list depth */
-			ia->ri_max_frmr_depth = min_t(unsigned int,
-				RPCRDMA_MAX_DATA_SEGS,
-				devattr->max_fast_reg_page_list_len);
 		}
 	}
 	if (memreg == RPCRDMA_MTHCAFMR) {
@@ -734,56 +729,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 	struct ib_cq *sendcq, *recvcq;
 	int rc, err;
 
-	/* check provider's send/recv wr limits */
-	if (cdata->max_requests > devattr->max_qp_wr)
-		cdata->max_requests = devattr->max_qp_wr;
+	rc = ia->ri_ops->ro_open(ia, ep, cdata);
+	if (rc)
+		return rc;
 
 	ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
 	ep->rep_attr.qp_context = ep;
-	/* send_cq and recv_cq initialized below */
 	ep->rep_attr.srq = NULL;
-	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
-	switch (ia->ri_memreg_strategy) {
-	case RPCRDMA_FRMR: {
-		int depth = 7;
-
-		/* Add room for frmr register and invalidate WRs.
-		 * 1. FRMR reg WR for head
-		 * 2. FRMR invalidate WR for head
-		 * 3. N FRMR reg WRs for pagelist
-		 * 4. N FRMR invalidate WRs for pagelist
-		 * 5. FRMR reg WR for tail
-		 * 6. FRMR invalidate WR for tail
-		 * 7. The RDMA_SEND WR
-		 */
-
-		/* Calculate N if the device max FRMR depth is smaller than
-		 * RPCRDMA_MAX_DATA_SEGS.
-		 */
-		if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
-			int delta = RPCRDMA_MAX_DATA_SEGS -
-				    ia->ri_max_frmr_depth;
-
-			do {
-				depth += 2; /* FRMR reg + invalidate */
-				delta -= ia->ri_max_frmr_depth;
-			} while (delta > 0);
-
-		}
-		ep->rep_attr.cap.max_send_wr *= depth;
-		if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
-			cdata->max_requests = devattr->max_qp_wr / depth;
-			if (!cdata->max_requests)
-				return -EINVAL;
-			ep->rep_attr.cap.max_send_wr = cdata->max_requests *
-						       depth;
-		}
-		break;
-	}
-	default:
-		break;
-	}
-	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
 	ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
 	ep->rep_attr.cap.max_recv_sge = 1;
 	ep->rep_attr.cap.max_inline_data = 0;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index a0e3c3e..a53a564 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -340,6 +340,9 @@ struct rpcrdma_memreg_ops {
 				  struct rpcrdma_mr_seg *, int, bool);
 	void		(*ro_unmap)(struct rpcrdma_xprt *,
 				    struct rpcrdma_req *, unsigned int);
+	int		(*ro_open)(struct rpcrdma_ia *,
+				   struct rpcrdma_ep *,
+				   struct rpcrdma_create_data_internal *);
 	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
 	int		(*ro_init)(struct rpcrdma_xprt *);
 	void		(*ro_reset)(struct rpcrdma_xprt *);

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 11/16] xprtrdma: Handle non-SEND completions via a callout
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
                     ` (9 preceding siblings ...)
  2015-03-13 21:28   ` [PATCH v1 10/16] xprtrdma: Add "open" " Chuck Lever
@ 2015-03-13 21:28   ` Chuck Lever
       [not found]     ` <20150313212816.22783.49677.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:28   ` [PATCH v1 12/16] xprtrdma: Acquire FMRs in rpcrdma_fmr_register_external() Chuck Lever
                     ` (4 subsequent siblings)
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:28 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

Allow each memory registration mode to plug in a callout that handles
the completion of a memory registration operation.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/frwr_ops.c  |   17 +++++++++++++++++
 net/sunrpc/xprtrdma/verbs.c     |   14 +++++---------
 net/sunrpc/xprtrdma/xprt_rdma.h |    5 +++++
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 9bb4b2d..6e6d8ba 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -164,6 +164,22 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
 		     rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
 }
 
+/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */
+static void
+frwr_sendcompletion(struct ib_wc *wc)
+{
+	struct rpcrdma_mw *r;
+
+	if (likely(wc->status == IB_WC_SUCCESS))
+		return;
+
+	/* WARNING: Only wr_id and status are reliable at this point */
+	r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
+	dprintk("RPC:       %s: frmr %p (stale), status %d\n",
+		__func__, r, wc->status);
+	r->r.frmr.fr_state = FRMR_IS_STALE;
+}
+
 static int
 frwr_op_init(struct rpcrdma_xprt *r_xprt)
 {
@@ -195,6 +211,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
 
 		list_add(&r->mw_list, &buf->rb_mws);
 		list_add(&r->mw_all, &buf->rb_all);
+		r->mw_sendcompletion = frwr_sendcompletion;
 	}
 
 	return 0;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 17b2a29..f108a57 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -204,21 +204,17 @@ static const char * const wc_status[] = {
 static void
 rpcrdma_sendcq_process_wc(struct ib_wc *wc)
 {
-	if (likely(wc->status == IB_WC_SUCCESS))
-		return;
-
 	/* WARNING: Only wr_id and status are reliable at this point */
-	if (wc->wr_id == 0ULL) {
-		if (wc->status != IB_WC_WR_FLUSH_ERR)
+	if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) {
+		if (wc->status != IB_WC_SUCCESS &&
+		    wc->status != IB_WC_WR_FLUSH_ERR)
 			pr_err("RPC:       %s: SEND: %s\n",
 			       __func__, COMPLETION_MSG(wc->status));
 	} else {
 		struct rpcrdma_mw *r;
 
 		r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
-		r->r.frmr.fr_state = FRMR_IS_STALE;
-		pr_err("RPC:       %s: frmr %p (stale): %s\n",
-		       __func__, r, COMPLETION_MSG(wc->status));
+		r->mw_sendcompletion(wc);
 	}
 }
 
@@ -1616,7 +1612,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
 	}
 
 	send_wr.next = NULL;
-	send_wr.wr_id = 0ULL;	/* no send cookie */
+	send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION;
 	send_wr.sg_list = req->rl_send_iov;
 	send_wr.num_sge = req->rl_niovs;
 	send_wr.opcode = IB_WR_SEND;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index a53a564..40a0ee8 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -106,6 +106,10 @@ struct rpcrdma_ep {
 #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
 #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
 
+/* Force completion handler to ignore the signal
+ */
+#define RPCRDMA_IGNORE_COMPLETION	(0ULL)
+
 /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
  *
  * The below structure appears at the front of a large region of kmalloc'd
@@ -206,6 +210,7 @@ struct rpcrdma_mw {
 		struct ib_fmr		*fmr;
 		struct rpcrdma_frmr	frmr;
 	} r;
+	void			(*mw_sendcompletion)(struct ib_wc *);
 	struct list_head	mw_list;
 	struct list_head	mw_all;
 };

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 12/16] xprtrdma: Acquire FMRs in rpcrdma_fmr_register_external()
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
                     ` (10 preceding siblings ...)
  2015-03-13 21:28   ` [PATCH v1 11/16] xprtrdma: Handle non-SEND completions via a callout Chuck Lever
@ 2015-03-13 21:28   ` Chuck Lever
       [not found]     ` <20150313212825.22783.51384.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:28   ` [PATCH v1 13/16] xprtrdma: Acquire MRs in rpcrdma_register_external() Chuck Lever
                     ` (3 subsequent siblings)
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:28 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

Acquiring 64 FMRs in rpcrdma_buffer_get() while holding the buffer
pool lock is expensive, and unnecessary because FMR mode can
transfer up to a 1MB payload using just a single ib_fmr.

Instead, acquire ib_fmrs one-at-a-time as chunks are registered, and
return them to rb_mws immediately during deregistration.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/fmr_ops.c   |   41 ++++++++++++++++++++++++++++++++++++---
 net/sunrpc/xprtrdma/verbs.c     |   41 ++++++++++++++-------------------------
 net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
 3 files changed, 54 insertions(+), 29 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 96e6cd3..9c6c2e8 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -28,10 +28,11 @@ __fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_mr_seg *seg1 = seg;
+	struct rpcrdma_mw *mw = seg1->rl_mw;
 	int rc, nsegs = seg->mr_nsegs;
 	LIST_HEAD(l);
 
-	list_add(&seg1->rl_mw->r.fmr->list, &l);
+	list_add(&mw->r.fmr->list, &l);
 	rc = ib_unmap_fmr(&l);
 	read_lock(&ia->ri_qplock);
 	while (seg1->mr_nsegs--)
@@ -39,11 +40,14 @@ __fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
 	read_unlock(&ia->ri_qplock);
 	if (rc)
 		goto out_err;
+out:
+	seg1->rl_mw = NULL;
+	rpcrdma_put_mw(r_xprt, mw);
 	return nsegs;
 
 out_err:
 	dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
-	return nsegs;
+	goto out;
 }
 
 static int
@@ -117,6 +121,27 @@ out_fmr_err:
 	return rc;
 }
 
+static struct rpcrdma_mw *
+__fmr_get_mw(struct rpcrdma_xprt *r_xprt)
+{
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	struct rpcrdma_mw *mw = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&buf->rb_lock, flags);
+
+	if (!list_empty(&buf->rb_mws)) {
+		mw = list_entry(buf->rb_mws.next,
+				struct rpcrdma_mw, mw_list);
+		list_del_init(&mw->mw_list);
+	} else {
+		pr_err("RPC:       %s: no MWs available\n", __func__);
+	}
+
+	spin_unlock_irqrestore(&buf->rb_lock, flags);
+	return mw;
+}
+
 /* Use the ib_map_phys_fmr() verb to register a memory region
  * for remote access via RDMA READ or RDMA WRITE.
  */
@@ -126,10 +151,18 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_mr_seg *seg1 = seg;
-	struct rpcrdma_mw *mw = seg1->rl_mw;
+	struct rpcrdma_mw *mw;
 	u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
 	int len, pageoff, i, rc;
 
+	mw = __fmr_get_mw(r_xprt);
+	if (!mw)
+		return -ENOMEM;
+	if (seg1->rl_mw) {
+		rpcrdma_put_mw(r_xprt, seg1->rl_mw);
+		seg1->rl_mw = NULL;
+	}
+
 	pageoff = offset_in_page(seg1->mr_offset);
 	seg1->mr_offset -= pageoff;	/* start of page */
 	seg1->mr_len += pageoff;
@@ -152,6 +185,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	if (rc)
 		goto out_maperr;
 
+	seg1->rl_mw = mw;
 	seg1->mr_rkey = mw->r.fmr->rkey;
 	seg1->mr_base = seg1->mr_dma + pageoff;
 	seg1->mr_nsegs = i;
@@ -164,6 +198,7 @@ out_maperr:
 		pageoff, i, rc);
 	while (i--)
 		rpcrdma_unmap_one(ia, --seg);
+	rpcrdma_put_mw(r_xprt, mw);
 	return rc;
 }
 
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index f108a57..f2316d8 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1171,6 +1171,21 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 	kfree(buf->rb_pool);
 }
 
+void
+rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
+{
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	unsigned long flags;
+
+	if (!list_empty(&mw->mw_list))
+		pr_warn("RPC:       %s: mw %p still on a list!\n",
+			__func__, mw);
+
+	spin_lock_irqsave(&buf->rb_lock, flags);
+	list_add_tail(&mw->mw_list, &buf->rb_mws);
+	spin_unlock_irqrestore(&buf->rb_lock, flags);
+}
+
 /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
  * some req segments uninitialized.
  */
@@ -1292,28 +1307,6 @@ rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
 	return NULL;
 }
 
-static struct rpcrdma_req *
-rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
-{
-	struct rpcrdma_mw *r;
-	int i;
-
-	i = RPCRDMA_MAX_SEGS - 1;
-	while (!list_empty(&buf->rb_mws)) {
-		r = list_entry(buf->rb_mws.next,
-			       struct rpcrdma_mw, mw_list);
-		list_del(&r->mw_list);
-		req->rl_segments[i].rl_mw = r;
-		if (unlikely(i-- == 0))
-			return req;	/* Success */
-	}
-
-	/* Not enough entries on rb_mws for this req */
-	rpcrdma_buffer_put_sendbuf(req, buf);
-	rpcrdma_buffer_put_mrs(req, buf);
-	return NULL;
-}
-
 /*
  * Get a set of request/reply buffers.
  *
@@ -1355,9 +1348,6 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
 	case RPCRDMA_FRMR:
 		req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
 		break;
-	case RPCRDMA_MTHCAFMR:
-		req = rpcrdma_buffer_get_fmrs(req, buffers);
-		break;
 	default:
 		break;
 	}
@@ -1382,7 +1372,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
 	rpcrdma_buffer_put_sendbuf(req, buffers);
 	switch (ia->ri_memreg_strategy) {
 	case RPCRDMA_FRMR:
-	case RPCRDMA_MTHCAFMR:
 		rpcrdma_buffer_put_mrs(req, buffers);
 		break;
 	default:
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 40a0ee8..d5aa5b4 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -426,6 +426,7 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
 unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
 void rpcrdma_map_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *, bool);
 void rpcrdma_unmap_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *);
+void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
 
 /*
  * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 13/16] xprtrdma: Acquire MRs in rpcrdma_register_external()
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
                     ` (11 preceding siblings ...)
  2015-03-13 21:28   ` [PATCH v1 12/16] xprtrdma: Acquire FMRs in rpcrdma_fmr_register_external() Chuck Lever
@ 2015-03-13 21:28   ` Chuck Lever
       [not found]     ` <20150313212835.22783.12326.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:28   ` [PATCH v1 14/16] xprtrdma: Remove rpcrdma_ia::ri_memreg_strategy Chuck Lever
                     ` (2 subsequent siblings)
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:28 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

Acquiring 64 MRs in rpcrdma_buffer_get() while holding the buffer
pool lock is expensive, and unnecessary because most modern adapters
can transfer 100s of KBs of payload using just a single MR.

Instead, acquire MRs one-at-a-time as chunks are registered, and
return them to rb_mws immediately during deregistration.

Note: commit 539431a437d2 ("xprtrdma: Don't invalidate FRMRs if
registration fails") is reverted: There is now a valid case where
registration can fail (with -ENOMEM) but the QP is still in RTS.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/frwr_ops.c |  126 ++++++++++++++++++++++++++++++++++++---
 net/sunrpc/xprtrdma/rpc_rdma.c |    3 -
 net/sunrpc/xprtrdma/verbs.c    |  130 ----------------------------------------
 3 files changed, 120 insertions(+), 139 deletions(-)

diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 6e6d8ba..d23e064 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -54,15 +54,16 @@ __frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
 {
 	struct rpcrdma_mr_seg *seg1 = seg;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct rpcrdma_mw *mw = seg1->rl_mw;
 	struct ib_send_wr invalidate_wr, *bad_wr;
 	int rc, nsegs = seg->mr_nsegs;
 
-	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
+	mw->r.frmr.fr_state = FRMR_IS_INVALID;
 
 	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
-	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
+	invalidate_wr.wr_id = (unsigned long)(void *)mw;
 	invalidate_wr.opcode = IB_WR_LOCAL_INV;
-	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
+	invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey;
 	DECR_CQCOUNT(&r_xprt->rx_ep);
 
 	read_lock(&ia->ri_qplock);
@@ -72,13 +73,17 @@ __frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
 	read_unlock(&ia->ri_qplock);
 	if (rc)
 		goto out_err;
+
+out:
+	seg1->rl_mw = NULL;
+	rpcrdma_put_mw(r_xprt, mw);
 	return nsegs;
 
 out_err:
 	/* Force rpcrdma_buffer_get() to retry */
-	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
+	mw->r.frmr.fr_state = FRMR_IS_STALE;
 	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
-	return nsegs;
+	goto out;
 }
 
 static void
@@ -217,6 +222,99 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
 	return 0;
 }
 
+/* rpcrdma_unmap_one() was already done by rpcrdma_frwr_releasesge().
+ * Redo only the ib_post_send().
+ */
+static void
+__retry_local_inv(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
+{
+	struct rpcrdma_xprt *r_xprt =
+				container_of(ia, struct rpcrdma_xprt, rx_ia);
+	struct ib_send_wr invalidate_wr, *bad_wr;
+	int rc;
+
+	pr_warn("RPC:       %s: FRMR %p is stale\n", __func__, r);
+
+	/* When this FRMR is re-inserted into rb_mws, it is no longer stale */
+	r->r.frmr.fr_state = FRMR_IS_INVALID;
+
+	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
+	invalidate_wr.wr_id = (unsigned long)(void *)r;
+	invalidate_wr.opcode = IB_WR_LOCAL_INV;
+	invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
+	DECR_CQCOUNT(&r_xprt->rx_ep);
+
+	pr_warn("RPC:       %s: frmr %p invalidating rkey %08x\n",
+		__func__, r, r->r.frmr.fr_mr->rkey);
+
+	read_lock(&ia->ri_qplock);
+	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
+	read_unlock(&ia->ri_qplock);
+	if (rc) {
+		/* Force __frwr_get_mw() to retry */
+		r->r.frmr.fr_state = FRMR_IS_STALE;
+		dprintk("RPC:       %s: ib_post_send status %i\n",
+			__func__, rc);
+	}
+}
+
+static void
+__retry_flushed_linv(struct rpcrdma_buffer *buf, struct list_head *stale)
+{
+	struct rpcrdma_ia *ia = rdmab_to_ia(buf);
+	struct list_head *pos;
+	struct rpcrdma_mw *r;
+	unsigned long flags;
+	unsigned count;
+
+	count = 0;
+	list_for_each(pos, stale) {
+		r = list_entry(pos, struct rpcrdma_mw, mw_list);
+		__retry_local_inv(ia, r);
+		++count;
+	}
+
+	pr_warn("RPC:       %s: adding %u MRs to rb_mws\n",
+		__func__, count);
+
+	spin_lock_irqsave(&buf->rb_lock, flags);
+	list_splice_tail(stale, &buf->rb_mws);
+	spin_unlock_irqrestore(&buf->rb_lock, flags);
+}
+
+static struct rpcrdma_mw *
+__frwr_get_mw(struct rpcrdma_xprt *r_xprt)
+{
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	struct rpcrdma_mw *mw;
+	unsigned long flags;
+	LIST_HEAD(stale);
+	int count;
+
+	spin_lock_irqsave(&buf->rb_lock, flags);
+	count = 0;
+	while (!list_empty(&buf->rb_mws)) {
+		mw = list_entry(buf->rb_mws.next, struct rpcrdma_mw, mw_list);
+		list_del_init(&mw->mw_list);
+		if (mw->r.frmr.fr_state == FRMR_IS_INVALID)
+			goto out_unlock;
+		list_add_tail(&mw->mw_list, &stale);
+		count++;
+	}
+
+	pr_err("RPC:       %s: no INVALID MWs available\n", __func__);
+	mw = NULL;
+
+out_unlock:
+	spin_unlock_irqrestore(&buf->rb_lock, flags);
+	if (!list_empty(&stale)) {
+		dprintk("RPC:       %s: found %d unsuitable MWs\n",
+			__func__, count);
+		__retry_flushed_linv(buf, &stale);
+	}
+	return mw;
+}
+
 /* Post a FAST_REG Work Request to register a memory region
  * for remote access via RDMA READ or RDMA WRITE.
  */
@@ -226,9 +324,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_mr_seg *seg1 = seg;
-	struct rpcrdma_mw *mw = seg1->rl_mw;
-	struct rpcrdma_frmr *frmr = &mw->r.frmr;
-	struct ib_mr *mr = frmr->fr_mr;
+	struct rpcrdma_frmr *frmr;
+	struct rpcrdma_mw *mw;
+	struct ib_mr *mr;
 	struct ib_send_wr fastreg_wr, *bad_wr;
 	u8 key;
 	int len, pageoff;
@@ -237,12 +335,21 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	u64 pa;
 	int page_no;
 
+	mw = __frwr_get_mw(r_xprt);
+	if (!mw)
+		return -ENOMEM;
+	if (seg1->rl_mw) {
+		rpcrdma_put_mw(r_xprt, seg1->rl_mw);
+		seg1->rl_mw = NULL;
+	}
+
 	pageoff = offset_in_page(seg1->mr_offset);
 	seg1->mr_offset -= pageoff;	/* start of page */
 	seg1->mr_len += pageoff;
 	len = -pageoff;
 	if (nsegs > ia->ri_max_frmr_depth)
 		nsegs = ia->ri_max_frmr_depth;
+	frmr = &mw->r.frmr;
 	for (page_no = i = 0; i < nsegs;) {
 		rpcrdma_map_one(ia, seg, writing);
 		pa = seg->mr_dma;
@@ -278,6 +385,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	fastreg_wr.wr.fast_reg.access_flags = writing ?
 				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
 				IB_ACCESS_REMOTE_READ;
+	mr = frmr->fr_mr;
 	key = (u8)(mr->rkey & 0x000000FF);
 	ib_update_fast_reg_key(mr, ++key);
 	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
@@ -287,6 +395,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	if (rc)
 		goto out_senderr;
 
+	seg1->rl_mw = mw;
 	seg1->mr_rkey = mr->rkey;
 	seg1->mr_base = seg1->mr_dma + pageoff;
 	seg1->mr_nsegs = i;
@@ -301,6 +410,7 @@ out_err:
 	frmr->fr_state = FRMR_IS_INVALID;
 	while (i--)
 		rpcrdma_unmap_one(ia, --seg);
+	rpcrdma_put_mw(r_xprt, mw);
 	return rc;
 }
 
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 7b51d9d..b9689ca 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -284,8 +284,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
 	return (unsigned char *)iptr - (unsigned char *)headerp;
 
 out:
-	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR)
-		r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, nchunks);
+	r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, nchunks);
 	return n;
 }
 
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index f2316d8..5b9c104 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1186,33 +1186,6 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
 	spin_unlock_irqrestore(&buf->rb_lock, flags);
 }
 
-/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
- * some req segments uninitialized.
- */
-static void
-rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
-{
-	if (*mw) {
-		list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
-		*mw = NULL;
-	}
-}
-
-/* Cycle mw's back in reverse order, and "spin" them.
- * This delays and scrambles reuse as much as possible.
- */
-static void
-rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
-{
-	struct rpcrdma_mr_seg *seg = req->rl_segments;
-	struct rpcrdma_mr_seg *seg1 = seg;
-	int i;
-
-	for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
-		rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
-	rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
-}
-
 static void
 rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
 {
@@ -1225,88 +1198,6 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
 	}
 }
 
-/* rpcrdma_unmap_one() was already done during deregistration.
- * Redo only the ib_post_send().
- */
-static void
-rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
-{
-	struct rpcrdma_xprt *r_xprt =
-				container_of(ia, struct rpcrdma_xprt, rx_ia);
-	struct ib_send_wr invalidate_wr, *bad_wr;
-	int rc;
-
-	dprintk("RPC:       %s: FRMR %p is stale\n", __func__, r);
-
-	/* When this FRMR is re-inserted into rb_mws, it is no longer stale */
-	r->r.frmr.fr_state = FRMR_IS_INVALID;
-
-	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
-	invalidate_wr.wr_id = (unsigned long)(void *)r;
-	invalidate_wr.opcode = IB_WR_LOCAL_INV;
-	invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
-	DECR_CQCOUNT(&r_xprt->rx_ep);
-
-	dprintk("RPC:       %s: frmr %p invalidating rkey %08x\n",
-		__func__, r, r->r.frmr.fr_mr->rkey);
-
-	read_lock(&ia->ri_qplock);
-	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
-	read_unlock(&ia->ri_qplock);
-	if (rc) {
-		/* Force rpcrdma_buffer_get() to retry */
-		r->r.frmr.fr_state = FRMR_IS_STALE;
-		dprintk("RPC:       %s: ib_post_send failed, %i\n",
-			__func__, rc);
-	}
-}
-
-static void
-rpcrdma_retry_flushed_linv(struct list_head *stale,
-			   struct rpcrdma_buffer *buf)
-{
-	struct rpcrdma_ia *ia = rdmab_to_ia(buf);
-	struct list_head *pos;
-	struct rpcrdma_mw *r;
-	unsigned long flags;
-
-	list_for_each(pos, stale) {
-		r = list_entry(pos, struct rpcrdma_mw, mw_list);
-		rpcrdma_retry_local_inv(r, ia);
-	}
-
-	spin_lock_irqsave(&buf->rb_lock, flags);
-	list_splice_tail(stale, &buf->rb_mws);
-	spin_unlock_irqrestore(&buf->rb_lock, flags);
-}
-
-static struct rpcrdma_req *
-rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
-			 struct list_head *stale)
-{
-	struct rpcrdma_mw *r;
-	int i;
-
-	i = RPCRDMA_MAX_SEGS - 1;
-	while (!list_empty(&buf->rb_mws)) {
-		r = list_entry(buf->rb_mws.next,
-			       struct rpcrdma_mw, mw_list);
-		list_del(&r->mw_list);
-		if (r->r.frmr.fr_state == FRMR_IS_STALE) {
-			list_add(&r->mw_list, stale);
-			continue;
-		}
-		req->rl_segments[i].rl_mw = r;
-		if (unlikely(i-- == 0))
-			return req;	/* Success */
-	}
-
-	/* Not enough entries on rb_mws for this req */
-	rpcrdma_buffer_put_sendbuf(req, buf);
-	rpcrdma_buffer_put_mrs(req, buf);
-	return NULL;
-}
-
 /*
  * Get a set of request/reply buffers.
  *
@@ -1319,12 +1210,11 @@ rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
 struct rpcrdma_req *
 rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
 {
-	struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
-	struct list_head stale;
 	struct rpcrdma_req *req;
 	unsigned long flags;
 
 	spin_lock_irqsave(&buffers->rb_lock, flags);
+
 	if (buffers->rb_send_index == buffers->rb_max_requests) {
 		spin_unlock_irqrestore(&buffers->rb_lock, flags);
 		dprintk("RPC:       %s: out of request buffers\n", __func__);
@@ -1343,17 +1233,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
 	}
 	buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
 
-	INIT_LIST_HEAD(&stale);
-	switch (ia->ri_memreg_strategy) {
-	case RPCRDMA_FRMR:
-		req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
-		break;
-	default:
-		break;
-	}
 	spin_unlock_irqrestore(&buffers->rb_lock, flags);
-	if (!list_empty(&stale))
-		rpcrdma_retry_flushed_linv(&stale, buffers);
 	return req;
 }
 
@@ -1365,18 +1245,10 @@ void
 rpcrdma_buffer_put(struct rpcrdma_req *req)
 {
 	struct rpcrdma_buffer *buffers = req->rl_buffer;
-	struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
 	unsigned long flags;
 
 	spin_lock_irqsave(&buffers->rb_lock, flags);
 	rpcrdma_buffer_put_sendbuf(req, buffers);
-	switch (ia->ri_memreg_strategy) {
-	case RPCRDMA_FRMR:
-		rpcrdma_buffer_put_mrs(req, buffers);
-		break;
-	default:
-		break;
-	}
 	spin_unlock_irqrestore(&buffers->rb_lock, flags);
 }
 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 14/16] xprtrdma: Remove rpcrdma_ia::ri_memreg_strategy
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
                     ` (12 preceding siblings ...)
  2015-03-13 21:28   ` [PATCH v1 13/16] xprtrdma: Acquire MRs in rpcrdma_register_external() Chuck Lever
@ 2015-03-13 21:28   ` Chuck Lever
       [not found]     ` <20150313212844.22783.1438.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:28   ` [PATCH v1 15/16] xprtrdma: Make rpcrdma_{un}map_one() into inline functions Chuck Lever
  2015-03-13 21:29   ` [PATCH v1 16/16] xprtrdma: Split rb_lock Chuck Lever
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:28 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

Clean up: This field is no longer used.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 include/linux/sunrpc/xprtrdma.h |    3 ++-
 net/sunrpc/xprtrdma/verbs.c     |    3 ---
 net/sunrpc/xprtrdma/xprt_rdma.h |    1 -
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index 64a0a0a..10bc3cde 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -61,7 +61,8 @@
 
 #define RPCRDMA_INLINE_PAD_THRESH  (512)/* payload threshold to pad (bytes) */
 
-/* memory registration strategies */
+/* Memory registration strategies, by number.
+ * This is part of a kernel / user space API. Do not remove. */
 enum rpcrdma_memreg {
 	RPCRDMA_BOUNCEBUFFERS = 0,
 	RPCRDMA_REGISTER,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 5b9c104..21b63fe 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -669,9 +669,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 	dprintk("RPC:       %s: memory registration strategy is '%s'\n",
 		__func__, ia->ri_ops->ro_displayname);
 
-	/* Else will do memory reg/dereg for each chunk */
-	ia->ri_memreg_strategy = memreg;
-
 	rwlock_init(&ia->ri_qplock);
 	return 0;
 
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index d5aa5b4..b167e44 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -69,7 +69,6 @@ struct rpcrdma_ia {
 	int			ri_have_dma_lkey;
 	struct completion	ri_done;
 	int			ri_async_rc;
-	enum rpcrdma_memreg	ri_memreg_strategy;
 	unsigned int		ri_max_frmr_depth;
 	struct ib_device_attr	ri_devattr;
 	struct ib_qp_attr	ri_qp_attr;

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 15/16] xprtrdma: Make rpcrdma_{un}map_one() into inline functions
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
                     ` (13 preceding siblings ...)
  2015-03-13 21:28   ` [PATCH v1 14/16] xprtrdma: Remove rpcrdma_ia::ri_memreg_strategy Chuck Lever
@ 2015-03-13 21:28   ` Chuck Lever
       [not found]     ` <20150313212853.22783.62285.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-13 21:29   ` [PATCH v1 16/16] xprtrdma: Split rb_lock Chuck Lever
  15 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:28 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

These functions are called in a loop for each page transferred via
RDMA READ or WRITE. Extract loop invariants and inline them to
reduce CPU overhead.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/fmr_ops.c      |   10 ++++++--
 net/sunrpc/xprtrdma/frwr_ops.c     |   10 ++++++--
 net/sunrpc/xprtrdma/physical_ops.c |   11 ++++++---
 net/sunrpc/xprtrdma/verbs.c        |   44 ++++++-----------------------------
 net/sunrpc/xprtrdma/xprt_rdma.h    |   45 ++++++++++++++++++++++++++++++++++--
 5 files changed, 73 insertions(+), 47 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 9c6c2e8..1404f20 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -29,14 +29,16 @@ __fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_mr_seg *seg1 = seg;
 	struct rpcrdma_mw *mw = seg1->rl_mw;
+	struct ib_device *device;
 	int rc, nsegs = seg->mr_nsegs;
 	LIST_HEAD(l);
 
 	list_add(&mw->r.fmr->list, &l);
 	rc = ib_unmap_fmr(&l);
 	read_lock(&ia->ri_qplock);
+	device = ia->ri_id->device;
 	while (seg1->mr_nsegs--)
-		rpcrdma_unmap_one(ia, seg++);
+		rpcrdma_unmap_one(device, seg++);
 	read_unlock(&ia->ri_qplock);
 	if (rc)
 		goto out_err;
@@ -150,6 +152,8 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	   int nsegs, bool writing)
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct ib_device *device = ia->ri_id->device;
+	enum dma_data_direction direction = rpcrdma_data_dir(writing);
 	struct rpcrdma_mr_seg *seg1 = seg;
 	struct rpcrdma_mw *mw;
 	u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
@@ -170,7 +174,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	if (nsegs > RPCRDMA_MAX_FMR_SGES)
 		nsegs = RPCRDMA_MAX_FMR_SGES;
 	for (i = 0; i < nsegs;) {
-		rpcrdma_map_one(ia, seg, writing);
+		rpcrdma_map_one(device, seg, direction);
 		physaddrs[i] = seg->mr_dma;
 		len += seg->mr_len;
 		++seg;
@@ -197,7 +201,7 @@ out_maperr:
 		__func__, len, (unsigned long long)seg1->mr_dma,
 		pageoff, i, rc);
 	while (i--)
-		rpcrdma_unmap_one(ia, --seg);
+		rpcrdma_unmap_one(device, --seg);
 	rpcrdma_put_mw(r_xprt, mw);
 	return rc;
 }
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index d23e064..4494668 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -55,6 +55,7 @@ __frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
 	struct rpcrdma_mr_seg *seg1 = seg;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_mw *mw = seg1->rl_mw;
+	struct ib_device *device;
 	struct ib_send_wr invalidate_wr, *bad_wr;
 	int rc, nsegs = seg->mr_nsegs;
 
@@ -67,8 +68,9 @@ __frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
 	DECR_CQCOUNT(&r_xprt->rx_ep);
 
 	read_lock(&ia->ri_qplock);
+	device = ia->ri_id->device;
 	while (seg1->mr_nsegs--)
-		rpcrdma_unmap_one(ia, seg++);
+		rpcrdma_unmap_one(device, seg++);
 	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
 	read_unlock(&ia->ri_qplock);
 	if (rc)
@@ -323,6 +325,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	    int nsegs, bool writing)
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct ib_device *device = ia->ri_id->device;
+	enum dma_data_direction direction = rpcrdma_data_dir(writing);
 	struct rpcrdma_mr_seg *seg1 = seg;
 	struct rpcrdma_frmr *frmr;
 	struct rpcrdma_mw *mw;
@@ -351,7 +355,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 		nsegs = ia->ri_max_frmr_depth;
 	frmr = &mw->r.frmr;
 	for (page_no = i = 0; i < nsegs;) {
-		rpcrdma_map_one(ia, seg, writing);
+		rpcrdma_map_one(device, seg, direction);
 		pa = seg->mr_dma;
 		for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
 			frmr->fr_pgl->page_list[page_no++] = pa;
@@ -409,7 +413,7 @@ out_senderr:
 out_err:
 	frmr->fr_state = FRMR_IS_INVALID;
 	while (i--)
-		rpcrdma_unmap_one(ia, --seg);
+		rpcrdma_unmap_one(device, --seg);
 	rpcrdma_put_mw(r_xprt, mw);
 	return rc;
 }
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index 0998f4f..9ce7dfc 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -64,7 +64,8 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 
-	rpcrdma_map_one(ia, seg, writing);
+	rpcrdma_map_one(ia->ri_id->device, seg,
+			rpcrdma_data_dir(writing));
 	seg->mr_rkey = ia->ri_bind_mem->rkey;
 	seg->mr_base = seg->mr_dma;
 	seg->mr_nsegs = 1;
@@ -77,10 +78,14 @@ static void
 physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 		  unsigned int count)
 {
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	unsigned int i;
 
-	for (i = 0; i < count; i++)
-		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]);
+	for (i = 0; i < count; i++) {
+		read_lock(&ia->ri_qplock);
+		rpcrdma_unmap_one(ia->ri_id->device, &req->rl_segments[i]);
+		read_unlock(&ia->ri_qplock);
+	}
 }
 
 static void
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 21b63fe..460c917 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1288,6 +1288,14 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
  * Wrappers for internal-use kmalloc memory registration, used by buffer code.
  */
 
+void
+rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg)
+{
+	dprintk("RPC:       map_one: offset %p iova %llx len %zu\n",
+		seg->mr_offset,
+		(unsigned long long)seg->mr_dma, seg->mr_dmalen);
+}
+
 static int
 rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
 				struct ib_mr **mrp, struct ib_sge *iov)
@@ -1413,42 +1421,6 @@ rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
 }
 
 /*
- * Wrappers for chunk registration, shared by read/write chunk code.
- */
-
-void
-rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, bool writing)
-{
-	seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-	seg->mr_dmalen = seg->mr_len;
-	if (seg->mr_page)
-		seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
-				seg->mr_page, offset_in_page(seg->mr_offset),
-				seg->mr_dmalen, seg->mr_dir);
-	else
-		seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
-				seg->mr_offset,
-				seg->mr_dmalen, seg->mr_dir);
-	if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
-		dprintk("RPC:       %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
-			__func__,
-			(unsigned long long)seg->mr_dma,
-			seg->mr_offset, seg->mr_dmalen);
-	}
-}
-
-void
-rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
-{
-	if (seg->mr_page)
-		ib_dma_unmap_page(ia->ri_id->device,
-				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
-	else
-		ib_dma_unmap_single(ia->ri_id->device,
-				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
-}
-
-/*
  * Prepost any receive buffer, then post send.
  *
  * Receive buffer is donated to hardware, reclaimed upon recv completion.
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index b167e44..10c4fa5 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -423,11 +423,52 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
 			 struct rpcrdma_regbuf *);
 
 unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
-void rpcrdma_map_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *, bool);
-void rpcrdma_unmap_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *);
 void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
 
 /*
+ * Wrappers for chunk registration, shared by read/write chunk code.
+ */
+
+void rpcrdma_mapping_error(struct rpcrdma_mr_seg *);
+
+static inline enum dma_data_direction
+rpcrdma_data_dir(bool writing)
+{
+	return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+}
+
+static inline void
+rpcrdma_map_one(struct ib_device *device, struct rpcrdma_mr_seg *seg,
+		enum dma_data_direction direction)
+{
+	seg->mr_dir = direction;
+	seg->mr_dmalen = seg->mr_len;
+
+	if (seg->mr_page)
+		seg->mr_dma = ib_dma_map_page(device,
+				seg->mr_page, offset_in_page(seg->mr_offset),
+				seg->mr_dmalen, seg->mr_dir);
+	else
+		seg->mr_dma = ib_dma_map_single(device,
+				seg->mr_offset,
+				seg->mr_dmalen, seg->mr_dir);
+
+	if (ib_dma_mapping_error(device, seg->mr_dma))
+		rpcrdma_mapping_error(seg);
+}
+
+static inline void
+rpcrdma_unmap_one(struct ib_device *device, struct rpcrdma_mr_seg *seg)
+{
+	if (seg->mr_page)
+		ib_dma_unmap_page(device,
+				  seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
+	else
+		ib_dma_unmap_single(device,
+				    seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
+}
+
+/*
  * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
  */
 void rpcrdma_connect_worker(struct work_struct *);

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH v1 16/16] xprtrdma: Split rb_lock
       [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
                     ` (14 preceding siblings ...)
  2015-03-13 21:28   ` [PATCH v1 15/16] xprtrdma: Make rpcrdma_{un}map_one() into inline functions Chuck Lever
@ 2015-03-13 21:29   ` Chuck Lever
  15 siblings, 0 replies; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:29 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

/proc/lock_stat showed contention between rpcrdma_buffer_get/put
and the MR allocation functions during I/O intensive workloads.

Now that MRs are no longer allocated in rpcrdma_buffer_get(),
there's no reason the rb_mws list has to be managed using the
same lock as the send/receive buffers. Split that lock. The
new lock does not need to disable interrupts because buffer
get/put is never called in an interrupt context.

struct rpcrdma_buffer is re-arranged to ensure rb_mwlock and
rb_mws is always in a different cacheline than rb_lock and the
buffer pointers.

Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 net/sunrpc/xprtrdma/fmr_ops.c   |    6 +++---
 net/sunrpc/xprtrdma/frwr_ops.c  |   28 ++++++++++++----------------
 net/sunrpc/xprtrdma/verbs.c     |    5 ++---
 net/sunrpc/xprtrdma/xprt_rdma.h |   16 +++++++++-------
 4 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 1404f20..00d362d 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -96,6 +96,7 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
 	struct rpcrdma_mw *r;
 	int i, rc;
 
+	spin_lock_init(&buf->rb_mwlock);
 	INIT_LIST_HEAD(&buf->rb_mws);
 	INIT_LIST_HEAD(&buf->rb_all);
 
@@ -128,9 +129,8 @@ __fmr_get_mw(struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 	struct rpcrdma_mw *mw = NULL;
-	unsigned long flags;
 
-	spin_lock_irqsave(&buf->rb_lock, flags);
+	spin_lock(&buf->rb_mwlock);
 
 	if (!list_empty(&buf->rb_mws)) {
 		mw = list_entry(buf->rb_mws.next,
@@ -140,7 +140,7 @@ __fmr_get_mw(struct rpcrdma_xprt *r_xprt)
 		pr_err("RPC:       %s: no MWs available\n", __func__);
 	}
 
-	spin_unlock_irqrestore(&buf->rb_lock, flags);
+	spin_unlock(&buf->rb_mwlock);
 	return mw;
 }
 
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 4494668..7973b94 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -196,6 +196,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
 	struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
 	int i;
 
+	spin_lock_init(&buf->rb_mwlock);
 	INIT_LIST_HEAD(&buf->rb_mws);
 	INIT_LIST_HEAD(&buf->rb_all);
 
@@ -228,10 +229,9 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
  * Redo only the ib_post_send().
  */
 static void
-__retry_local_inv(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
+__retry_local_inv(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *r)
 {
-	struct rpcrdma_xprt *r_xprt =
-				container_of(ia, struct rpcrdma_xprt, rx_ia);
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct ib_send_wr invalidate_wr, *bad_wr;
 	int rc;
 
@@ -258,30 +258,27 @@ __retry_local_inv(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
 		dprintk("RPC:       %s: ib_post_send status %i\n",
 			__func__, rc);
 	}
+
+	rpcrdma_put_mw(r_xprt, r);
 }
 
 static void
-__retry_flushed_linv(struct rpcrdma_buffer *buf, struct list_head *stale)
+__retry_flushed_linv(struct rpcrdma_xprt *r_xprt, struct list_head *stale)
 {
-	struct rpcrdma_ia *ia = rdmab_to_ia(buf);
 	struct list_head *pos;
 	struct rpcrdma_mw *r;
-	unsigned long flags;
 	unsigned count;
 
 	count = 0;
 	list_for_each(pos, stale) {
 		r = list_entry(pos, struct rpcrdma_mw, mw_list);
-		__retry_local_inv(ia, r);
+		list_del_init(&r->mw_list);
+		__retry_local_inv(r_xprt, r);
 		++count;
 	}
 
-	pr_warn("RPC:       %s: adding %u MRs to rb_mws\n",
+	pr_warn("RPC:       %s: added %u MRs to rb_mws\n",
 		__func__, count);
-
-	spin_lock_irqsave(&buf->rb_lock, flags);
-	list_splice_tail(stale, &buf->rb_mws);
-	spin_unlock_irqrestore(&buf->rb_lock, flags);
 }
 
 static struct rpcrdma_mw *
@@ -289,11 +286,10 @@ __frwr_get_mw(struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 	struct rpcrdma_mw *mw;
-	unsigned long flags;
 	LIST_HEAD(stale);
 	int count;
 
-	spin_lock_irqsave(&buf->rb_lock, flags);
+	spin_lock(&buf->rb_mwlock);
 	count = 0;
 	while (!list_empty(&buf->rb_mws)) {
 		mw = list_entry(buf->rb_mws.next, struct rpcrdma_mw, mw_list);
@@ -308,11 +304,11 @@ __frwr_get_mw(struct rpcrdma_xprt *r_xprt)
 	mw = NULL;
 
 out_unlock:
-	spin_unlock_irqrestore(&buf->rb_lock, flags);
+	spin_unlock(&buf->rb_mwlock);
 	if (!list_empty(&stale)) {
 		dprintk("RPC:       %s: found %d unsuitable MWs\n",
 			__func__, count);
-		__retry_flushed_linv(buf, &stale);
+		__retry_flushed_linv(r_xprt, &stale);
 	}
 	return mw;
 }
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 460c917..d4de417 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1172,15 +1172,14 @@ void
 rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
 {
 	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-	unsigned long flags;
 
 	if (!list_empty(&mw->mw_list))
 		pr_warn("RPC:       %s: mw %p still on a list!\n",
 			__func__, mw);
 
-	spin_lock_irqsave(&buf->rb_lock, flags);
+	spin_lock(&buf->rb_mwlock);
 	list_add_tail(&mw->mw_list, &buf->rb_mws);
-	spin_unlock_irqrestore(&buf->rb_lock, flags);
+	spin_unlock(&buf->rb_mwlock);
 }
 
 static void
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 10c4fa5..998f188 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -280,15 +280,17 @@ rpcr_to_rdmar(struct rpc_rqst *rqst)
  * One of these is associated with a transport instance
  */
 struct rpcrdma_buffer {
-	spinlock_t	rb_lock;	/* protects indexes */
-	u32		rb_max_requests;/* client max requests */
-	struct list_head rb_mws;	/* optional memory windows/fmrs/frmrs */
-	struct list_head rb_all;
-	int		rb_send_index;
+	spinlock_t		rb_mwlock;
+	struct list_head	rb_mws;
+	struct list_head	rb_all;
+	char			*rb_pool;
+
+	spinlock_t		rb_lock;
+	u32			rb_max_requests;
+	int			rb_send_index;
+	int			rb_recv_index;
 	struct rpcrdma_req	**rb_send_bufs;
-	int		rb_recv_index;
 	struct rpcrdma_rep	**rb_recv_bufs;
-	char		*rb_pool;
 };
 #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 01/16] xprtrdma: Display IPv6 addresses and port numbers correctly
       [not found]     ` <20150313212641.22783.93522.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-15  2:50       ` Sagi Grimberg
       [not found]         ` <5504F37A.20803-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  2015-03-24  8:27       ` Devesh Sharma
  1 sibling, 1 reply; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-15  2:50 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/13/2015 11:26 PM, Chuck Lever wrote:
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   net/sunrpc/xprtrdma/transport.c |   47 ++++++++++++++++++++++++++++++++-------
>   net/sunrpc/xprtrdma/verbs.c     |   21 +++++++----------
>   2 files changed, 47 insertions(+), 21 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index 2e192ba..26a62e7 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -157,12 +157,47 @@ static struct ctl_table sunrpc_table[] = {
>   static struct rpc_xprt_ops xprt_rdma_procs;	/* forward reference */
>
>   static void
> +xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
> +{
> +	struct sockaddr_in *sin = (struct sockaddr_in *)sap;
> +	char buf[20];
> +
> +	snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
> +	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
> +
> +	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
> +}
> +
> +static void
> +xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
> +{
> +	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
> +	char buf[40];
> +
> +	snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);

Don't you prefer %pIS can handle both IPv4/v6 instead of two different
routines? or maybe even %pISp can be better?

> +	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
> +
> +	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma6";

Was RPC_DISPLAY_NETID "rdma6" before this patch?

> +}
> +
> +static void
>   xprt_rdma_format_addresses(struct rpc_xprt *xprt)
>   {
>   	struct sockaddr *sap = (struct sockaddr *)
>   					&rpcx_to_rdmad(xprt).addr;
> -	struct sockaddr_in *sin = (struct sockaddr_in *)sap;
> -	char buf[64];
> +	char buf[128];
> +
> +	switch (sap->sa_family) {
> +	case AF_INET:
> +		xprt_rdma_format_addresses4(xprt, sap);
> +		break;
> +	case AF_INET6:
> +		xprt_rdma_format_addresses6(xprt, sap);
> +		break;
> +	default:
> +		pr_err("rpcrdma: Unrecognized address family\n");
> +		return;
> +	}
>
>   	(void)rpc_ntop(sap, buf, sizeof(buf));
>   	xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
> @@ -170,16 +205,10 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
>   	snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
>   	xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
>
> -	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
> -
> -	snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
> -	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
> -
>   	snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
>   	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
>
> -	/* netid */
> -	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
> +	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
>   }
>
>   static void
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 124676c..1aa55b7 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -50,6 +50,7 @@
>   #include <linux/interrupt.h>
>   #include <linux/slab.h>
>   #include <linux/prefetch.h>
> +#include <linux/sunrpc/addr.h>
>   #include <asm/bitops.h>
>
>   #include "xprt_rdma.h"
> @@ -424,7 +425,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
>   	struct rpcrdma_ia *ia = &xprt->rx_ia;
>   	struct rpcrdma_ep *ep = &xprt->rx_ep;
>   #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
> -	struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
> +	struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
>   #endif
>   	struct ib_qp_attr *attr = &ia->ri_qp_attr;
>   	struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
> @@ -480,9 +481,8 @@ connected:
>   		wake_up_all(&ep->rep_connect_wait);
>   		/*FALLTHROUGH*/
>   	default:
> -		dprintk("RPC:       %s: %pI4:%u (ep 0x%p): %s\n",
> -			__func__, &addr->sin_addr.s_addr,
> -			ntohs(addr->sin_port), ep,
> +		dprintk("RPC:       %s: %pIS:%u (ep 0x%p): %s\n",
> +			__func__, sap, rpc_get_port(sap), ep,
>   			CONNECTION_MSG(event->event));
>   		break;
>   	}
> @@ -491,19 +491,16 @@ connected:
>   	if (connstate == 1) {
>   		int ird = attr->max_dest_rd_atomic;
>   		int tird = ep->rep_remote_cma.responder_resources;
> -		printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
> -			"on %s, memreg %d slots %d ird %d%s\n",
> -			&addr->sin_addr.s_addr,
> -			ntohs(addr->sin_port),
> +
> +		pr_info("rpcrdma: connection to %pIS:%u on %s, memreg %d slots %d ird %d%s\n",
> +			sap, rpc_get_port(sap),
>   			ia->ri_id->device->name,
>   			ia->ri_memreg_strategy,
>   			xprt->rx_buf.rb_max_requests,
>   			ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
>   	} else if (connstate < 0) {
> -		printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
> -			&addr->sin_addr.s_addr,
> -			ntohs(addr->sin_port),
> -			connstate);
> +		pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
> +			sap, rpc_get_port(sap), connstate);
>   	}
>   #endif
>
>

Other than the minors above, Looks good.

Reviewed-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 02/16] xprtrdma: Perform a full marshal on retransmit
       [not found]     ` <20150313212650.22783.28071.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-15  3:01       ` Sagi Grimberg
  0 siblings, 0 replies; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-15  3:01 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/13/2015 11:26 PM, Chuck Lever wrote:
> Commit 6ab59945f292 ("xprtrdma: Update rkeys after transport
> reconnect" added logic in the ->send_request path to update the
> chunk list when an RPC/RDMA request is retransmitted.
>
> Note that rpc_xdr_encode() resets and re-encodes the entire RPC
> send buffer for each retransmit of an RPC. The RPC send buffer
> is not preserved from the previous transmission of an RPC.
>
> Revert 6ab59945f292, and instead, just force each request to be
> fully marshaled every time through ->send_request. This should
> preserve the fix from 6ab59945f292, while also performing pullup
> during retransmits.
>
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   net/sunrpc/xprtrdma/rpc_rdma.c  |   71 ++++++++++++++++++---------------------
>   net/sunrpc/xprtrdma/transport.c |    5 +--
>   net/sunrpc/xprtrdma/xprt_rdma.h |   10 -----
>   3 files changed, 34 insertions(+), 52 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index 91ffde8..41456d9 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -53,6 +53,14 @@
>   # define RPCDBG_FACILITY	RPCDBG_TRANS
>   #endif
>
> +enum rpcrdma_chunktype {
> +	rpcrdma_noch = 0,
> +	rpcrdma_readch,
> +	rpcrdma_areadch,
> +	rpcrdma_writech,
> +	rpcrdma_replych
> +};
> +
>   #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
>   static const char transfertypes[][12] = {
>   	"pure inline",	/* no chunks */
> @@ -284,28 +292,6 @@ out:
>   }
>
>   /*
> - * Marshal chunks. This routine returns the header length
> - * consumed by marshaling.
> - *
> - * Returns positive RPC/RDMA header size, or negative errno.
> - */
> -
> -ssize_t
> -rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result)
> -{
> -	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
> -	struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf);
> -
> -	if (req->rl_rtype != rpcrdma_noch)
> -		result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
> -					       headerp, req->rl_rtype);
> -	else if (req->rl_wtype != rpcrdma_noch)
> -		result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf,
> -					       headerp, req->rl_wtype);
> -	return result;
> -}
> -
> -/*
>    * Copy write data inline.
>    * This function is used for "small" requests. Data which is passed
>    * to RPC via iovecs (or page list) is copied directly into the
> @@ -397,6 +383,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>   	char *base;
>   	size_t rpclen, padlen;
>   	ssize_t hdrlen;
> +	enum rpcrdma_chunktype rtype, wtype;
>   	struct rpcrdma_msg *headerp;
>
>   	/*
> @@ -433,13 +420,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>   	 * into pages; otherwise use reply chunks.
>   	 */
>   	if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
> -		req->rl_wtype = rpcrdma_noch;
> +		wtype = rpcrdma_noch;
>   	else if (rqst->rq_rcv_buf.page_len == 0)
> -		req->rl_wtype = rpcrdma_replych;
> +		wtype = rpcrdma_replych;
>   	else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
> -		req->rl_wtype = rpcrdma_writech;
> +		wtype = rpcrdma_writech;
>   	else
> -		req->rl_wtype = rpcrdma_replych;
> +		wtype = rpcrdma_replych;
>
>   	/*
>   	 * Chunks needed for arguments?
> @@ -456,16 +443,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>   	 * TBD check NFSv4 setacl
>   	 */
>   	if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
> -		req->rl_rtype = rpcrdma_noch;
> +		rtype = rpcrdma_noch;
>   	else if (rqst->rq_snd_buf.page_len == 0)
> -		req->rl_rtype = rpcrdma_areadch;
> +		rtype = rpcrdma_areadch;
>   	else
> -		req->rl_rtype = rpcrdma_readch;
> +		rtype = rpcrdma_readch;
>
>   	/* The following simplification is not true forever */
> -	if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych)
> -		req->rl_wtype = rpcrdma_noch;
> -	if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) {
> +	if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
> +		wtype = rpcrdma_noch;
> +	if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) {
>   		dprintk("RPC:       %s: cannot marshal multiple chunk lists\n",
>   			__func__);
>   		return -EIO;
> @@ -479,7 +466,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>   	 * When padding is in use and applies to the transfer, insert
>   	 * it and change the message type.
>   	 */
> -	if (req->rl_rtype == rpcrdma_noch) {
> +	if (rtype == rpcrdma_noch) {
>
>   		padlen = rpcrdma_inline_pullup(rqst,
>   						RPCRDMA_INLINE_PAD_VALUE(rqst));
> @@ -494,7 +481,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>   			headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
>   			headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
>   			hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
> -			if (req->rl_wtype != rpcrdma_noch) {
> +			if (wtype != rpcrdma_noch) {
>   				dprintk("RPC:       %s: invalid chunk list\n",
>   					__func__);
>   				return -EIO;
> @@ -515,18 +502,26 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>   			 * on receive. Therefore, we request a reply chunk
>   			 * for non-writes wherever feasible and efficient.
>   			 */
> -			if (req->rl_wtype == rpcrdma_noch)
> -				req->rl_wtype = rpcrdma_replych;
> +			if (wtype == rpcrdma_noch)
> +				wtype = rpcrdma_replych;
>   		}
>   	}
>
> -	hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen);
> +	if (rtype != rpcrdma_noch) {
> +		hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
> +					       headerp, rtype);
> +		wtype = rtype;	/* simplify dprintk */
> +
> +	} else if (wtype != rpcrdma_noch) {
> +		hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf,
> +					       headerp, wtype);
> +	}
>   	if (hdrlen < 0)
>   		return hdrlen;
>
>   	dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd padlen %zd"
>   		" headerp 0x%p base 0x%p lkey 0x%x\n",
> -		__func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen,
> +		__func__, transfertypes[wtype], hdrlen, rpclen, padlen,
>   		headerp, base, rdmab_lkey(req->rl_rdmabuf));
>
>   	/*
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index 26a62e7..271d306 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -608,10 +608,7 @@ xprt_rdma_send_request(struct rpc_task *task)
>   	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
>   	int rc = 0;
>
> -	if (req->rl_niovs == 0)
> -		rc = rpcrdma_marshal_req(rqst);
> -	else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL)
> -		rc = rpcrdma_marshal_chunks(rqst, 0);
> +	rc = rpcrdma_marshal_req(rqst);
>   	if (rc < 0)
>   		goto failed_marshal;
>
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index 0a16fb6..c8afd83 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -143,14 +143,6 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
>   	return (struct rpcrdma_msg *)rb->rg_base;
>   }
>
> -enum rpcrdma_chunktype {
> -	rpcrdma_noch = 0,
> -	rpcrdma_readch,
> -	rpcrdma_areadch,
> -	rpcrdma_writech,
> -	rpcrdma_replych
> -};
> -
>   /*
>    * struct rpcrdma_rep -- this structure encapsulates state required to recv
>    * and complete a reply, asychronously. It needs several pieces of
> @@ -258,7 +250,6 @@ struct rpcrdma_req {
>   	unsigned int	rl_niovs;	/* 0, 2 or 4 */
>   	unsigned int	rl_nchunks;	/* non-zero if chunks */
>   	unsigned int	rl_connect_cookie;	/* retry detection */
> -	enum rpcrdma_chunktype	rl_rtype, rl_wtype;
>   	struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
>   	struct rpcrdma_rep	*rl_reply;/* holder for reply buffer */
>   	struct ib_sge	rl_send_iov[4];	/* for active requests */
> @@ -418,7 +409,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
>   /*
>    * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
>    */
> -ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t);
>   int rpcrdma_marshal_req(struct rpc_rqst *);
>   size_t rpcrdma_max_payload(struct rpcrdma_xprt *);
>

Seems fine.

Acked-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 03/16] xprtrdma: Add vector of ops for each memory registration strategy
       [not found]     ` <20150313212659.22783.28341.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-15  3:04       ` Sagi Grimberg
  0 siblings, 0 replies; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-15  3:04 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/13/2015 11:26 PM, Chuck Lever wrote:
> Instead of employing switch() statements, let's use the typical
> Linux kernel idiom for handling behavioral variation: virtual
> functions.
>
> Define a vector of operations for each supported memory registration
> mode, and add a source file for each mode.
>
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   net/sunrpc/xprtrdma/Makefile       |    3 ++-
>   net/sunrpc/xprtrdma/fmr_ops.c      |   22 ++++++++++++++++++++++
>   net/sunrpc/xprtrdma/frwr_ops.c     |   22 ++++++++++++++++++++++
>   net/sunrpc/xprtrdma/physical_ops.c |   24 ++++++++++++++++++++++++
>   net/sunrpc/xprtrdma/verbs.c        |   11 +++++++----
>   net/sunrpc/xprtrdma/xprt_rdma.h    |   12 ++++++++++++
>   6 files changed, 89 insertions(+), 5 deletions(-)
>   create mode 100644 net/sunrpc/xprtrdma/fmr_ops.c
>   create mode 100644 net/sunrpc/xprtrdma/frwr_ops.c
>   create mode 100644 net/sunrpc/xprtrdma/physical_ops.c
>
> diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
> index da5136f..579f72b 100644
> --- a/net/sunrpc/xprtrdma/Makefile
> +++ b/net/sunrpc/xprtrdma/Makefile
> @@ -1,6 +1,7 @@
>   obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o
>
> -xprtrdma-y := transport.o rpc_rdma.o verbs.o
> +xprtrdma-y := transport.o rpc_rdma.o verbs.o \
> +	fmr_ops.o frwr_ops.o physical_ops.o
>
>   obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o
>
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> new file mode 100644
> index 0000000..ffb7d93
> --- /dev/null
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -0,0 +1,22 @@
> +/*
> + * Copyright (c) 2015 Oracle.  All rights reserved.
> + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
> + */
> +
> +/* Lightweight memory registration using Fast Memory Regions (FMR).
> + * Referred to sometimes as MTHCAFMR mode.
> + *
> + * FMR uses synchronous memory registration and deregistration.
> + * FMR registration is known to be fast, but FMR deregistration
> + * can take tens of usecs to complete.
> + */
> +
> +#include "xprt_rdma.h"
> +
> +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
> +# define RPCDBG_FACILITY	RPCDBG_TRANS
> +#endif
> +
> +const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
> +	.ro_displayname			= "fmr",
> +};
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> new file mode 100644
> index 0000000..79173f9
> --- /dev/null
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -0,0 +1,22 @@
> +/*
> + * Copyright (c) 2015 Oracle.  All rights reserved.
> + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
> + */
> +
> +/* Lightweight memory registration using Fast Registration Work
> + * Requests (FRWR). Also referred to sometimes as FRMR mode.
> + *
> + * FRWR features ordered asynchronous registration and deregistration
> + * of arbitrarily sized memory regions. This is the fastest and safest
> + * but most complex memory registration mode.
> + */
> +
> +#include "xprt_rdma.h"
> +
> +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
> +# define RPCDBG_FACILITY	RPCDBG_TRANS
> +#endif
> +
> +const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
> +	.ro_displayname			= "frwr",
> +};
> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
> new file mode 100644
> index 0000000..b0922ac
> --- /dev/null
> +++ b/net/sunrpc/xprtrdma/physical_ops.c
> @@ -0,0 +1,24 @@
> +/*
> + * Copyright (c) 2015 Oracle.  All rights reserved.
> + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
> + */
> +
> +/* No-op chunk preparation. All client memory is pre-registered.
> + * Sometimes referred to as ALLPHYSICAL mode.
> + *
> + * Physical registration is simple because all client memory is
> + * pre-registered and never deregistered. This mode is good for
> + * adapter bring up, but is considered not safe: the server is
> + * trusted not to abuse its access to client memory not involved
> + * in RDMA I/O.
> + */
> +
> +#include "xprt_rdma.h"
> +
> +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
> +# define RPCDBG_FACILITY	RPCDBG_TRANS
> +#endif
> +
> +const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
> +	.ro_displayname			= "physical",
> +};
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 1aa55b7..e4d9d9c 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -492,10 +492,10 @@ connected:
>   		int ird = attr->max_dest_rd_atomic;
>   		int tird = ep->rep_remote_cma.responder_resources;
>
> -		pr_info("rpcrdma: connection to %pIS:%u on %s, memreg %d slots %d ird %d%s\n",
> +		pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
>   			sap, rpc_get_port(sap),
>   			ia->ri_id->device->name,
> -			ia->ri_memreg_strategy,
> +			ia->ri_ops->ro_displayname,
>   			xprt->rx_buf.rb_max_requests,
>   			ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
>   	} else if (connstate < 0) {
> @@ -649,13 +649,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
>   	 */
>   	switch (memreg) {
>   	case RPCRDMA_FRMR:
> +		ia->ri_ops = &rpcrdma_frwr_memreg_ops;
>   		break;
>   	case RPCRDMA_ALLPHYSICAL:
> +		ia->ri_ops = &rpcrdma_physical_memreg_ops;
>   		mem_priv = IB_ACCESS_LOCAL_WRITE |
>   				IB_ACCESS_REMOTE_WRITE |
>   				IB_ACCESS_REMOTE_READ;
>   		goto register_setup;
>   	case RPCRDMA_MTHCAFMR:
> +		ia->ri_ops = &rpcrdma_fmr_memreg_ops;
>   		if (ia->ri_have_dma_lkey)
>   			break;
>   		mem_priv = IB_ACCESS_LOCAL_WRITE;
> @@ -675,8 +678,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
>   		rc = -ENOMEM;
>   		goto out3;
>   	}
> -	dprintk("RPC:       %s: memory registration strategy is %d\n",
> -		__func__, memreg);
> +	dprintk("RPC:       %s: memory registration strategy is '%s'\n",
> +		__func__, ia->ri_ops->ro_displayname);
>
>   	/* Else will do memory reg/dereg for each chunk */
>   	ia->ri_memreg_strategy = memreg;
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index c8afd83..ef3cf4a 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -60,6 +60,7 @@
>    * Interface Adapter -- one per transport instance
>    */
>   struct rpcrdma_ia {
> +	const struct rpcrdma_memreg_ops	*ri_ops;
>   	rwlock_t		ri_qplock;
>   	struct rdma_cm_id 	*ri_id;
>   	struct ib_pd		*ri_pd;
> @@ -331,6 +332,17 @@ struct rpcrdma_stats {
>   };
>
>   /*
> + * Per-registration mode operations
> + */
> +struct rpcrdma_memreg_ops {
> +	const char	*ro_displayname;
> +};
> +
> +extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops;
> +extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops;
> +extern const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops;
> +
> +/*
>    * RPCRDMA transport -- encapsulates the structures above for
>    * integration with RPC.
>    *
>

Don't know if I'd use three different files, but other than that I like
the change.

Reviewed-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 04/16] xprtrdma: Add a "max_payload" op for each memreg mode
       [not found]     ` <20150313212708.22783.70403.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-16 10:18       ` Sagi Grimberg
  0 siblings, 0 replies; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-16 10:18 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/13/2015 11:27 PM, Chuck Lever wrote:
> The max_payload computation is generalized to ensure that the
> payload maximum is the lesser of RPC_MAX_DATA_SEGS and the number of
> data segments that can be transmitted in an inline buffer.
>
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   net/sunrpc/xprtrdma/fmr_ops.c      |   13 ++++++++++
>   net/sunrpc/xprtrdma/frwr_ops.c     |   13 ++++++++++
>   net/sunrpc/xprtrdma/physical_ops.c |   10 +++++++
>   net/sunrpc/xprtrdma/transport.c    |    5 +++-
>   net/sunrpc/xprtrdma/verbs.c        |   49 +++++++++++-------------------------
>   net/sunrpc/xprtrdma/xprt_rdma.h    |    5 +++-
>   6 files changed, 59 insertions(+), 36 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index ffb7d93..eec2660 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -17,6 +17,19 @@
>   # define RPCDBG_FACILITY	RPCDBG_TRANS
>   #endif
>
> +/* Maximum scatter/gather per FMR */
> +#define RPCRDMA_MAX_FMR_SGES	(64)
> +
> +/* FMR mode conveys up to 64 pages of payload per chunk segment.
> + */
> +static size_t
> +fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
> +{
> +	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
> +		     rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES);
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
> +	.ro_maxpages			= fmr_op_maxpages,
>   	.ro_displayname			= "fmr",
>   };
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 79173f9..73a5ac8 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -17,6 +17,19 @@
>   # define RPCDBG_FACILITY	RPCDBG_TRANS
>   #endif
>
> +/* FRWR mode conveys a list of pages per chunk segment. The
> + * maximum length of that list is the FRWR page list depth.
> + */
> +static size_t
> +frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
> +{
> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> +
> +	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
> +		     rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
> +	.ro_maxpages			= frwr_op_maxpages,
>   	.ro_displayname			= "frwr",
>   };
> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
> index b0922ac..28ade19 100644
> --- a/net/sunrpc/xprtrdma/physical_ops.c
> +++ b/net/sunrpc/xprtrdma/physical_ops.c
> @@ -19,6 +19,16 @@
>   # define RPCDBG_FACILITY	RPCDBG_TRANS
>   #endif
>
> +/* PHYSICAL memory registration conveys one page per chunk segment.
> + */
> +static size_t
> +physical_op_maxpages(struct rpcrdma_xprt *r_xprt)
> +{
> +	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
> +		     rpcrdma_max_segments(r_xprt));
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
> +	.ro_maxpages			= physical_op_maxpages,
>   	.ro_displayname			= "physical",
>   };
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index 271d306..9a9da40 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -406,7 +406,10 @@ xprt_setup_rdma(struct xprt_create *args)
>   			  xprt_rdma_connect_worker);
>
>   	xprt_rdma_format_addresses(xprt);
> -	xprt->max_payload = rpcrdma_max_payload(new_xprt);
> +	xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
> +	if (xprt->max_payload == 0)
> +		goto out4;
> +	xprt->max_payload <<= PAGE_SHIFT;
>   	dprintk("RPC:       %s: transport data payload maximum: %zu bytes\n",
>   		__func__, xprt->max_payload);
>
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index e4d9d9c..837d4ea 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -2215,43 +2215,24 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
>   	return rc;
>   }
>
> -/* Physical mapping means one Read/Write list entry per-page.
> - * All list entries must fit within an inline buffer
> - *
> - * NB: The server must return a Write list for NFS READ,
> - *     which has the same constraint. Factor in the inline
> - *     rsize as well.
> +/* How many chunk list items fit within our inline buffers?
>    */
> -static size_t
> -rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
> +unsigned int
> +rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt)
>   {
>   	struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
> -	unsigned int inline_size, pages;
> -
> -	inline_size = min_t(unsigned int,
> -			    cdata->inline_wsize, cdata->inline_rsize);
> -	inline_size -= RPCRDMA_HDRLEN_MIN;
> -	pages = inline_size / sizeof(struct rpcrdma_segment);
> -	return pages << PAGE_SHIFT;
> -}
> +	int bytes, segments;
>
> -static size_t
> -rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
> -{
> -	return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
> -}
> -
> -size_t
> -rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
> -{
> -	size_t result;
> -
> -	switch (r_xprt->rx_ia.ri_memreg_strategy) {
> -	case RPCRDMA_ALLPHYSICAL:
> -		result = rpcrdma_physical_max_payload(r_xprt);
> -		break;
> -	default:
> -		result = rpcrdma_mr_max_payload(r_xprt);
> +	bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize);
> +	bytes -= RPCRDMA_HDRLEN_MIN;
> +	if (bytes < sizeof(struct rpcrdma_segment) * 2) {
> +		pr_warn("RPC:       %s: inline threshold too small\n",
> +			__func__);
> +		return 0;
>   	}
> -	return result;
> +
> +	segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1);
> +	dprintk("RPC:       %s: max chunk list size = %d segments\n",
> +		__func__, segments);
> +	return segments;
>   }
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index ef3cf4a..59e627e 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -334,7 +334,9 @@ struct rpcrdma_stats {
>   /*
>    * Per-registration mode operations
>    */
> +struct rpcrdma_xprt;
>   struct rpcrdma_memreg_ops {
> +	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>   	const char	*ro_displayname;
>   };
>
> @@ -411,6 +413,8 @@ struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
>   void rpcrdma_free_regbuf(struct rpcrdma_ia *,
>   			 struct rpcrdma_regbuf *);
>
> +unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
> +
>   /*
>    * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
>    */
> @@ -422,7 +426,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
>    * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
>    */
>   int rpcrdma_marshal_req(struct rpc_rqst *);
> -size_t rpcrdma_max_payload(struct rpcrdma_xprt *);
>
>   /* Temporary NFS request map cache. Created in svc_rdma.c  */
>   extern struct kmem_cache *svc_rdma_map_cachep;
>

Reviewed-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 05/16] xprtrdma: Add a "register_external" op for each memreg mode
       [not found]     ` <20150313212718.22783.10096.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-16 10:28       ` Sagi Grimberg
       [not found]         ` <5506B036.1040905-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  0 siblings, 1 reply; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-16 10:28 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/13/2015 11:27 PM, Chuck Lever wrote:
> There is very little common processing among the different external
> memory registration functions. Have rpcrdma_create_chunks() call
> the registration method directly. This removes a stack frame and a
> switch statement from the external registration path.
>
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   net/sunrpc/xprtrdma/fmr_ops.c      |   51 +++++++++++
>   net/sunrpc/xprtrdma/frwr_ops.c     |   88 ++++++++++++++++++
>   net/sunrpc/xprtrdma/physical_ops.c |   17 ++++
>   net/sunrpc/xprtrdma/rpc_rdma.c     |    5 +
>   net/sunrpc/xprtrdma/verbs.c        |  172 +-----------------------------------
>   net/sunrpc/xprtrdma/xprt_rdma.h    |    6 +
>   6 files changed, 166 insertions(+), 173 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index eec2660..45fb646 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -29,7 +29,58 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>   		     rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES);
>   }
>
> +/* Use the ib_map_phys_fmr() verb to register a memory region
> + * for remote access via RDMA READ or RDMA WRITE.
> + */
> +static int
> +fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
> +	   int nsegs, bool writing)
> +{
> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> +	struct rpcrdma_mr_seg *seg1 = seg;
> +	struct rpcrdma_mw *mw = seg1->rl_mw;
> +	u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
> +	int len, pageoff, i, rc;
> +
> +	pageoff = offset_in_page(seg1->mr_offset);
> +	seg1->mr_offset -= pageoff;	/* start of page */
> +	seg1->mr_len += pageoff;
> +	len = -pageoff;
> +	if (nsegs > RPCRDMA_MAX_FMR_SGES)
> +		nsegs = RPCRDMA_MAX_FMR_SGES;
> +	for (i = 0; i < nsegs;) {
> +		rpcrdma_map_one(ia, seg, writing);
> +		physaddrs[i] = seg->mr_dma;
> +		len += seg->mr_len;
> +		++seg;
> +		++i;
> +		/* Check for holes */
> +		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
> +		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
> +			break;
> +	}
> +
> +	rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma);
> +	if (rc)
> +		goto out_maperr;
> +
> +	seg1->mr_rkey = mw->r.fmr->rkey;
> +	seg1->mr_base = seg1->mr_dma + pageoff;
> +	seg1->mr_nsegs = i;
> +	seg1->mr_len = len;
> +	return i;
> +
> +out_maperr:
> +	dprintk("RPC:       %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
> +		__func__, len, (unsigned long long)seg1->mr_dma,
> +		pageoff, i, rc);
> +	while (i--)
> +		rpcrdma_unmap_one(ia, --seg);
> +	return rc;
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
> +	.ro_map				= fmr_op_map,
>   	.ro_maxpages			= fmr_op_maxpages,
>   	.ro_displayname			= "fmr",
>   };
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 73a5ac8..2b5ccb0 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -29,7 +29,95 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>   		     rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
>   }
>
> +/* Post a FAST_REG Work Request to register a memory region
> + * for remote access via RDMA READ or RDMA WRITE.
> + */
> +static int
> +frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
> +	    int nsegs, bool writing)
> +{
> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> +	struct rpcrdma_mr_seg *seg1 = seg;
> +	struct rpcrdma_mw *mw = seg1->rl_mw;
> +	struct rpcrdma_frmr *frmr = &mw->r.frmr;
> +	struct ib_mr *mr = frmr->fr_mr;
> +	struct ib_send_wr fastreg_wr, *bad_wr;
> +	u8 key;
> +	int len, pageoff;
> +	int i, rc;
> +	int seg_len;
> +	u64 pa;
> +	int page_no;
> +
> +	pageoff = offset_in_page(seg1->mr_offset);
> +	seg1->mr_offset -= pageoff;	/* start of page */
> +	seg1->mr_len += pageoff;
> +	len = -pageoff;
> +	if (nsegs > ia->ri_max_frmr_depth)
> +		nsegs = ia->ri_max_frmr_depth;
> +	for (page_no = i = 0; i < nsegs;) {
> +		rpcrdma_map_one(ia, seg, writing);
> +		pa = seg->mr_dma;
> +		for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
> +			frmr->fr_pgl->page_list[page_no++] = pa;
> +			pa += PAGE_SIZE;
> +		}
> +		len += seg->mr_len;
> +		++seg;
> +		++i;
> +		/* Check for holes */
> +		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
> +		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
> +			break;
> +	}
> +	dprintk("RPC:       %s: Using frmr %p to map %d segments\n",
> +		__func__, mw, i);
> +
> +	frmr->fr_state = FRMR_IS_VALID;
> +
> +	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
> +	fastreg_wr.wr_id = (unsigned long)(void *)mw;
> +	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
> +	fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
> +	fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
> +	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
> +	fastreg_wr.wr.fast_reg.page_list_len = page_no;
> +	fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;

Umm, is this necessarily true? will the region length be full pages?
Why don't you assign fast_reg.length = len?
I might be missing something here...

> +	if (fastreg_wr.wr.fast_reg.length < len) {
> +		rc = -EIO;
> +		goto out_err;
> +	}
> +	fastreg_wr.wr.fast_reg.access_flags = writing ?
> +				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
> +				IB_ACCESS_REMOTE_READ;
> +	key = (u8)(mr->rkey & 0x000000FF);
> +	ib_update_fast_reg_key(mr, ++key);
> +	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
> +
> +	DECR_CQCOUNT(&r_xprt->rx_ep);
> +	rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
> +	if (rc)
> +		goto out_senderr;
> +
> +	seg1->mr_rkey = mr->rkey;
> +	seg1->mr_base = seg1->mr_dma + pageoff;
> +	seg1->mr_nsegs = i;
> +	seg1->mr_len = len;
> +	return i;
> +
> +out_senderr:
> +	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
> +	ib_update_fast_reg_key(mr, --key);
> +
> +out_err:
> +	frmr->fr_state = FRMR_IS_INVALID;
> +	while (i--)
> +		rpcrdma_unmap_one(ia, --seg);
> +	return rc;
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
> +	.ro_map				= frwr_op_map,
>   	.ro_maxpages			= frwr_op_maxpages,
>   	.ro_displayname			= "frwr",
>   };
> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
> index 28ade19..5a284ee 100644
> --- a/net/sunrpc/xprtrdma/physical_ops.c
> +++ b/net/sunrpc/xprtrdma/physical_ops.c
> @@ -28,7 +28,24 @@ physical_op_maxpages(struct rpcrdma_xprt *r_xprt)
>   		     rpcrdma_max_segments(r_xprt));
>   }
>
> +/* The client's physical memory is already exposed for
> + * remote access via RDMA READ or RDMA WRITE.
> + */
> +static int
> +physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
> +		int nsegs, bool writing)
> +{
> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> +
> +	rpcrdma_map_one(ia, seg, writing);
> +	seg->mr_rkey = ia->ri_bind_mem->rkey;
> +	seg->mr_base = seg->mr_dma;
> +	seg->mr_nsegs = 1;
> +	return 1;
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
> +	.ro_map				= physical_op_map,
>   	.ro_maxpages			= physical_op_maxpages,
>   	.ro_displayname			= "physical",
>   };
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index 41456d9..6ab1d03 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -187,6 +187,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
>   	struct rpcrdma_write_array *warray = NULL;
>   	struct rpcrdma_write_chunk *cur_wchunk = NULL;
>   	__be32 *iptr = headerp->rm_body.rm_chunks;
> +	int (*map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool);
>
>   	if (type == rpcrdma_readch || type == rpcrdma_areadch) {
>   		/* a read chunk - server will RDMA Read our memory */
> @@ -209,9 +210,9 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
>   	if (nsegs < 0)
>   		return nsegs;
>
> +	map = r_xprt->rx_ia.ri_ops->ro_map;
>   	do {
> -		n = rpcrdma_register_external(seg, nsegs,
> -						cur_wchunk != NULL, r_xprt);
> +		n = map(r_xprt, seg, nsegs, cur_wchunk != NULL);
>   		if (n <= 0)
>   			goto out;
>   		if (cur_rchunk) {	/* read */
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 837d4ea..851ed97 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1857,8 +1857,8 @@ rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
>    * Wrappers for chunk registration, shared by read/write chunk code.
>    */
>
> -static void
> -rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
> +void
> +rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, bool writing)
>   {
>   	seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
>   	seg->mr_dmalen = seg->mr_len;
> @@ -1878,7 +1878,7 @@ rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
>   	}
>   }
>
> -static void
> +void
>   rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
>   {
>   	if (seg->mr_page)
> @@ -1890,93 +1890,6 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
>   }
>
>   static int
> -rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
> -			int *nsegs, int writing, struct rpcrdma_ia *ia,
> -			struct rpcrdma_xprt *r_xprt)
> -{
> -	struct rpcrdma_mr_seg *seg1 = seg;
> -	struct rpcrdma_mw *mw = seg1->rl_mw;
> -	struct rpcrdma_frmr *frmr = &mw->r.frmr;
> -	struct ib_mr *mr = frmr->fr_mr;
> -	struct ib_send_wr fastreg_wr, *bad_wr;
> -	u8 key;
> -	int len, pageoff;
> -	int i, rc;
> -	int seg_len;
> -	u64 pa;
> -	int page_no;
> -
> -	pageoff = offset_in_page(seg1->mr_offset);
> -	seg1->mr_offset -= pageoff;	/* start of page */
> -	seg1->mr_len += pageoff;
> -	len = -pageoff;
> -	if (*nsegs > ia->ri_max_frmr_depth)
> -		*nsegs = ia->ri_max_frmr_depth;
> -	for (page_no = i = 0; i < *nsegs;) {
> -		rpcrdma_map_one(ia, seg, writing);
> -		pa = seg->mr_dma;
> -		for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
> -			frmr->fr_pgl->page_list[page_no++] = pa;
> -			pa += PAGE_SIZE;
> -		}
> -		len += seg->mr_len;
> -		++seg;
> -		++i;
> -		/* Check for holes */
> -		if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
> -		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
> -			break;
> -	}
> -	dprintk("RPC:       %s: Using frmr %p to map %d segments\n",
> -		__func__, mw, i);
> -
> -	frmr->fr_state = FRMR_IS_VALID;
> -
> -	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
> -	fastreg_wr.wr_id = (unsigned long)(void *)mw;
> -	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
> -	fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
> -	fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
> -	fastreg_wr.wr.fast_reg.page_list_len = page_no;
> -	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
> -	fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
> -	if (fastreg_wr.wr.fast_reg.length < len) {
> -		rc = -EIO;
> -		goto out_err;
> -	}
> -
> -	/* Bump the key */
> -	key = (u8)(mr->rkey & 0x000000FF);
> -	ib_update_fast_reg_key(mr, ++key);
> -
> -	fastreg_wr.wr.fast_reg.access_flags = (writing ?
> -				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
> -				IB_ACCESS_REMOTE_READ);
> -	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
> -	DECR_CQCOUNT(&r_xprt->rx_ep);
> -
> -	rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
> -	if (rc) {
> -		dprintk("RPC:       %s: failed ib_post_send for register,"
> -			" status %i\n", __func__, rc);
> -		ib_update_fast_reg_key(mr, --key);
> -		goto out_err;
> -	} else {
> -		seg1->mr_rkey = mr->rkey;
> -		seg1->mr_base = seg1->mr_dma + pageoff;
> -		seg1->mr_nsegs = i;
> -		seg1->mr_len = len;
> -	}
> -	*nsegs = i;
> -	return 0;
> -out_err:
> -	frmr->fr_state = FRMR_IS_INVALID;
> -	while (i--)
> -		rpcrdma_unmap_one(ia, --seg);
> -	return rc;
> -}
> -
> -static int
>   rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
>   			struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
>   {
> @@ -2007,49 +1920,6 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
>   }
>
>   static int
> -rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
> -			int *nsegs, int writing, struct rpcrdma_ia *ia)
> -{
> -	struct rpcrdma_mr_seg *seg1 = seg;
> -	u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
> -	int len, pageoff, i, rc;
> -
> -	pageoff = offset_in_page(seg1->mr_offset);
> -	seg1->mr_offset -= pageoff;	/* start of page */
> -	seg1->mr_len += pageoff;
> -	len = -pageoff;
> -	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
> -		*nsegs = RPCRDMA_MAX_DATA_SEGS;
> -	for (i = 0; i < *nsegs;) {
> -		rpcrdma_map_one(ia, seg, writing);
> -		physaddrs[i] = seg->mr_dma;
> -		len += seg->mr_len;
> -		++seg;
> -		++i;
> -		/* Check for holes */
> -		if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
> -		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
> -			break;
> -	}
> -	rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma);
> -	if (rc) {
> -		dprintk("RPC:       %s: failed ib_map_phys_fmr "
> -			"%u@0x%llx+%i (%d)... status %i\n", __func__,
> -			len, (unsigned long long)seg1->mr_dma,
> -			pageoff, i, rc);
> -		while (i--)
> -			rpcrdma_unmap_one(ia, --seg);
> -	} else {
> -		seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey;
> -		seg1->mr_base = seg1->mr_dma + pageoff;
> -		seg1->mr_nsegs = i;
> -		seg1->mr_len = len;
> -	}
> -	*nsegs = i;
> -	return rc;
> -}
> -
> -static int
>   rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
>   			struct rpcrdma_ia *ia)
>   {
> @@ -2070,42 +1940,6 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
>   }
>
>   int
> -rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
> -			int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
> -{
> -	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> -	int rc = 0;
> -
> -	switch (ia->ri_memreg_strategy) {
> -
> -	case RPCRDMA_ALLPHYSICAL:
> -		rpcrdma_map_one(ia, seg, writing);
> -		seg->mr_rkey = ia->ri_bind_mem->rkey;
> -		seg->mr_base = seg->mr_dma;
> -		seg->mr_nsegs = 1;
> -		nsegs = 1;
> -		break;
> -
> -	/* Registration using frmr registration */
> -	case RPCRDMA_FRMR:
> -		rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
> -		break;
> -
> -	/* Registration using fmr memory registration */
> -	case RPCRDMA_MTHCAFMR:
> -		rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
> -		break;
> -
> -	default:
> -		return -EIO;
> -	}
> -	if (rc)
> -		return rc;
> -
> -	return nsegs;
> -}
> -
> -int
>   rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
>   		struct rpcrdma_xprt *r_xprt)
>   {
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index 59e627e..7bf077b 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -336,6 +336,8 @@ struct rpcrdma_stats {
>    */
>   struct rpcrdma_xprt;
>   struct rpcrdma_memreg_ops {
> +	int		(*ro_map)(struct rpcrdma_xprt *,
> +				  struct rpcrdma_mr_seg *, int, bool);
>   	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>   	const char	*ro_displayname;
>   };
> @@ -403,8 +405,6 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);
>   void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
>   void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
>
> -int rpcrdma_register_external(struct rpcrdma_mr_seg *,
> -				int, int, struct rpcrdma_xprt *);
>   int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
>   				struct rpcrdma_xprt *);
>
> @@ -414,6 +414,8 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
>   			 struct rpcrdma_regbuf *);
>
>   unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
> +void rpcrdma_map_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *, bool);
> +void rpcrdma_unmap_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *);
>
>   /*
>    * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 06/16] xprtrdma: Add a "deregister_external" op for each memreg mode
       [not found]     ` <20150313212728.22783.11821.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-16 10:34       ` Sagi Grimberg
       [not found]         ` <5506B19F.80105-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  2015-03-24 11:12       ` Devesh Sharma
  1 sibling, 1 reply; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-16 10:34 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/13/2015 11:27 PM, Chuck Lever wrote:
> There is very little common processing among the different external
> memory deregistration functions.
>
> In addition, instead of calling the deregistration function for each
> segment, have one call release all segments for a request. This makes
> the API a little asymmetrical, but a hair faster.
>
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   net/sunrpc/xprtrdma/fmr_ops.c      |   37 ++++++++++++++++
>   net/sunrpc/xprtrdma/frwr_ops.c     |   46 ++++++++++++++++++++
>   net/sunrpc/xprtrdma/physical_ops.c |   13 ++++++
>   net/sunrpc/xprtrdma/rpc_rdma.c     |    7 +--
>   net/sunrpc/xprtrdma/transport.c    |    8 +---
>   net/sunrpc/xprtrdma/verbs.c        |   81 ------------------------------------
>   net/sunrpc/xprtrdma/xprt_rdma.h    |    5 +-
>   7 files changed, 103 insertions(+), 94 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index 45fb646..9b983b4 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -20,6 +20,32 @@
>   /* Maximum scatter/gather per FMR */
>   #define RPCRDMA_MAX_FMR_SGES	(64)
>
> +/* Use the ib_unmap_fmr() verb to prevent further remote
> + * access via RDMA READ or RDMA WRITE.
> + */
> +static int
> +__fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
> +{
> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> +	struct rpcrdma_mr_seg *seg1 = seg;
> +	int rc, nsegs = seg->mr_nsegs;
> +	LIST_HEAD(l);
> +
> +	list_add(&seg1->rl_mw->r.fmr->list, &l);
> +	rc = ib_unmap_fmr(&l);
> +	read_lock(&ia->ri_qplock);
> +	while (seg1->mr_nsegs--)
> +		rpcrdma_unmap_one(ia, seg++);
> +	read_unlock(&ia->ri_qplock);

So I know you are just moving things around here, but can you explain
why the read_lock is taken here? Why do you need this protection?

> +	if (rc)
> +		goto out_err;
> +	return nsegs;
> +
> +out_err:
> +	dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
> +	return nsegs;
> +}
> +
>   /* FMR mode conveys up to 64 pages of payload per chunk segment.
>    */
>   static size_t
> @@ -79,8 +105,19 @@ out_maperr:
>   	return rc;
>   }
>
> +static void
> +fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> +	     unsigned int count)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; count--;)
> +		i += __fmr_unmap(r_xprt, &req->rl_segments[i]);
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>   	.ro_map				= fmr_op_map,
> +	.ro_unmap			= fmr_op_unmap,
>   	.ro_maxpages			= fmr_op_maxpages,
>   	.ro_displayname			= "fmr",
>   };
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 2b5ccb0..05b5761 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -17,6 +17,41 @@
>   # define RPCDBG_FACILITY	RPCDBG_TRANS
>   #endif
>
> +/* Post a LOCAL_INV Work Request to prevent further remote access
> + * via RDMA READ or RDMA WRITE.
> + */
> +static int
> +__frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
> +{
> +	struct rpcrdma_mr_seg *seg1 = seg;
> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> +	struct ib_send_wr invalidate_wr, *bad_wr;
> +	int rc, nsegs = seg->mr_nsegs;
> +
> +	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
> +
> +	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
> +	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
> +	invalidate_wr.opcode = IB_WR_LOCAL_INV;
> +	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
> +	DECR_CQCOUNT(&r_xprt->rx_ep);
> +
> +	read_lock(&ia->ri_qplock);
> +	while (seg1->mr_nsegs--)
> +		rpcrdma_unmap_one(ia, seg++);
> +	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
> +	read_unlock(&ia->ri_qplock);

Same question here, you are taking the lock but you don't really
check for anything and just post. Again, I might be missing something.

> +	if (rc)
> +		goto out_err;
> +	return nsegs;
> +
> +out_err:
> +	/* Force rpcrdma_buffer_get() to retry */
> +	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
> +	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
> +	return nsegs;
> +}
> +
>   /* FRWR mode conveys a list of pages per chunk segment. The
>    * maximum length of that list is the FRWR page list depth.
>    */
> @@ -116,8 +151,19 @@ out_err:
>   	return rc;
>   }
>
> +static void
> +frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> +	      unsigned int count)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; count--;)
> +		i += __frwr_unmap(r_xprt, &req->rl_segments[i]);
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>   	.ro_map				= frwr_op_map,
> +	.ro_unmap			= frwr_op_unmap,
>   	.ro_maxpages			= frwr_op_maxpages,
>   	.ro_displayname			= "frwr",
>   };
> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
> index 5a284ee..f2c15be 100644
> --- a/net/sunrpc/xprtrdma/physical_ops.c
> +++ b/net/sunrpc/xprtrdma/physical_ops.c
> @@ -44,8 +44,21 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>   	return 1;
>   }
>
> +/* Unmap a memory region, but leave it registered.
> + */
> +static void
> +physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> +		  unsigned int count)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < count; i++)
> +		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]);
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>   	.ro_map				= physical_op_map,
> +	.ro_unmap			= physical_op_unmap,
>   	.ro_maxpages			= physical_op_maxpages,
>   	.ro_displayname			= "physical",
>   };
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index 6ab1d03..7b51d9d 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -284,11 +284,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
>   	return (unsigned char *)iptr - (unsigned char *)headerp;
>
>   out:
> -	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
> -		for (pos = 0; nchunks--;)
> -			pos += rpcrdma_deregister_external(
> -					&req->rl_segments[pos], r_xprt);
> -	}
> +	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR)
> +		r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, nchunks);
>   	return n;
>   }
>
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index 9a9da40..c484671 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -571,7 +571,6 @@ xprt_rdma_free(void *buffer)
>   	struct rpcrdma_req *req;
>   	struct rpcrdma_xprt *r_xprt;
>   	struct rpcrdma_regbuf *rb;
> -	int i;
>
>   	if (buffer == NULL)
>   		return;
> @@ -582,11 +581,8 @@ xprt_rdma_free(void *buffer)
>
>   	dprintk("RPC:       %s: called on 0x%p\n", __func__, req->rl_reply);
>
> -	for (i = 0; req->rl_nchunks;) {
> -		--req->rl_nchunks;
> -		i += rpcrdma_deregister_external(
> -			&req->rl_segments[i], r_xprt);
> -	}
> +	r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, req->rl_nchunks);
> +	req->rl_nchunks = 0;
>
>   	rpcrdma_buffer_put(req);
>   }
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 851ed97..a1621fd 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1509,7 +1509,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
>   	}
>   }
>
> -/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
> +/* rpcrdma_unmap_one() was already done during deregistration.
>    * Redo only the ib_post_send().
>    */
>   static void
> @@ -1889,85 +1889,6 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
>   				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
>   }
>
> -static int
> -rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
> -			struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
> -{
> -	struct rpcrdma_mr_seg *seg1 = seg;
> -	struct ib_send_wr invalidate_wr, *bad_wr;
> -	int rc;
> -
> -	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
> -
> -	memset(&invalidate_wr, 0, sizeof invalidate_wr);
> -	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
> -	invalidate_wr.opcode = IB_WR_LOCAL_INV;
> -	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
> -	DECR_CQCOUNT(&r_xprt->rx_ep);
> -
> -	read_lock(&ia->ri_qplock);
> -	while (seg1->mr_nsegs--)
> -		rpcrdma_unmap_one(ia, seg++);
> -	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
> -	read_unlock(&ia->ri_qplock);
> -	if (rc) {
> -		/* Force rpcrdma_buffer_get() to retry */
> -		seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
> -		dprintk("RPC:       %s: failed ib_post_send for invalidate,"
> -			" status %i\n", __func__, rc);
> -	}
> -	return rc;
> -}
> -
> -static int
> -rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
> -			struct rpcrdma_ia *ia)
> -{
> -	struct rpcrdma_mr_seg *seg1 = seg;
> -	LIST_HEAD(l);
> -	int rc;
> -
> -	list_add(&seg1->rl_mw->r.fmr->list, &l);
> -	rc = ib_unmap_fmr(&l);
> -	read_lock(&ia->ri_qplock);
> -	while (seg1->mr_nsegs--)
> -		rpcrdma_unmap_one(ia, seg++);
> -	read_unlock(&ia->ri_qplock);
> -	if (rc)
> -		dprintk("RPC:       %s: failed ib_unmap_fmr,"
> -			" status %i\n", __func__, rc);
> -	return rc;
> -}
> -
> -int
> -rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
> -		struct rpcrdma_xprt *r_xprt)
> -{
> -	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> -	int nsegs = seg->mr_nsegs, rc;
> -
> -	switch (ia->ri_memreg_strategy) {
> -
> -	case RPCRDMA_ALLPHYSICAL:
> -		read_lock(&ia->ri_qplock);
> -		rpcrdma_unmap_one(ia, seg);
> -		read_unlock(&ia->ri_qplock);
> -		break;
> -
> -	case RPCRDMA_FRMR:
> -		rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
> -		break;
> -
> -	case RPCRDMA_MTHCAFMR:
> -		rc = rpcrdma_deregister_fmr_external(seg, ia);
> -		break;
> -
> -	default:
> -		break;
> -	}
> -	return nsegs;
> -}
> -
>   /*
>    * Prepost any receive buffer, then post send.
>    *
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index 7bf077b..3aabbb2 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -338,6 +338,8 @@ struct rpcrdma_xprt;
>   struct rpcrdma_memreg_ops {
>   	int		(*ro_map)(struct rpcrdma_xprt *,
>   				  struct rpcrdma_mr_seg *, int, bool);
> +	void		(*ro_unmap)(struct rpcrdma_xprt *,
> +				    struct rpcrdma_req *, unsigned int);
>   	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>   	const char	*ro_displayname;
>   };
> @@ -405,9 +407,6 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);
>   void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
>   void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
>
> -int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
> -				struct rpcrdma_xprt *);
> -
>   struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
>   					    size_t, gfp_t);
>   void rpcrdma_free_regbuf(struct rpcrdma_ia *,
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 07/16] xprtrdma: Add "init MRs" memreg op
       [not found]     ` <20150313212738.22783.34521.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-16 10:36       ` Sagi Grimberg
  0 siblings, 0 replies; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-16 10:36 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/13/2015 11:27 PM, Chuck Lever wrote:
> This method is used when setting up a new transport instance to
> create a pool of Memory Region objects that will be used to register
> memory during operation.
>
> Memory Regions are not needed for "physical" registration, since
> ->prepare and ->release are basically no-ops for that mode.
>
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   net/sunrpc/xprtrdma/fmr_ops.c      |   42 +++++++++++++++
>   net/sunrpc/xprtrdma/frwr_ops.c     |   66 +++++++++++++++++++++++
>   net/sunrpc/xprtrdma/physical_ops.c |    7 ++
>   net/sunrpc/xprtrdma/verbs.c        |  104 +-----------------------------------
>   net/sunrpc/xprtrdma/xprt_rdma.h    |    1
>   5 files changed, 119 insertions(+), 101 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index 9b983b4..1501db0 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -55,6 +55,47 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>   		     rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES);
>   }
>
> +static int
> +fmr_op_init(struct rpcrdma_xprt *r_xprt)
> +{
> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
> +	int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
> +	struct ib_fmr_attr fmr_attr = {
> +		.max_pages	= RPCRDMA_MAX_FMR_SGES,
> +		.max_maps	= 1,
> +		.page_shift	= PAGE_SHIFT
> +	};
> +	struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
> +	struct rpcrdma_mw *r;
> +	int i, rc;
> +
> +	INIT_LIST_HEAD(&buf->rb_mws);
> +	INIT_LIST_HEAD(&buf->rb_all);
> +
> +	i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
> +	dprintk("RPC:       %s: initalizing %d FMRs\n", __func__, i);
> +
> +	while (i--) {
> +		r = kzalloc(sizeof(*r), GFP_KERNEL);
> +		if (!r)
> +			return -ENOMEM;
> +
> +		r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
> +		if (IS_ERR(r->r.fmr))
> +			goto out_fmr_err;
> +
> +		list_add(&r->mw_list, &buf->rb_mws);
> +		list_add(&r->mw_all, &buf->rb_all);
> +	}
> +	return 0;
> +
> +out_fmr_err:
> +	rc = PTR_ERR(r->r.fmr);
> +	dprintk("RPC:       %s: ib_alloc_fmr status %i\n", __func__, rc);
> +	kfree(r);
> +	return rc;
> +}
> +
>   /* Use the ib_map_phys_fmr() verb to register a memory region
>    * for remote access via RDMA READ or RDMA WRITE.
>    */
> @@ -119,5 +160,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>   	.ro_map				= fmr_op_map,
>   	.ro_unmap			= fmr_op_unmap,
>   	.ro_maxpages			= fmr_op_maxpages,
> +	.ro_init			= fmr_op_init,
>   	.ro_displayname			= "fmr",
>   };
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 05b5761..975372c 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -17,6 +17,35 @@
>   # define RPCDBG_FACILITY	RPCDBG_TRANS
>   #endif
>
> +static int
> +__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
> +	    unsigned int depth)
> +{
> +	struct rpcrdma_frmr *f = &r->r.frmr;
> +	int rc;
> +
> +	f->fr_mr = ib_alloc_fast_reg_mr(pd, depth);
> +	if (IS_ERR(f->fr_mr))
> +		goto out_mr_err;
> +	f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
> +	if (IS_ERR(f->fr_pgl))
> +		goto out_list_err;
> +	return 0;
> +
> +out_mr_err:
> +	rc = PTR_ERR(f->fr_mr);
> +	dprintk("RPC:       %s: ib_alloc_fast_reg_mr status %i\n",
> +		__func__, rc);
> +	return rc;
> +
> +out_list_err:
> +	rc = PTR_ERR(f->fr_pgl);
> +	dprintk("RPC:       %s: ib_alloc_fast_reg_page_list status %i\n",
> +		__func__, rc);
> +	ib_dereg_mr(f->fr_mr);
> +	return rc;
> +}
> +
>   /* Post a LOCAL_INV Work Request to prevent further remote access
>    * via RDMA READ or RDMA WRITE.
>    */
> @@ -64,6 +93,42 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>   		     rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
>   }
>
> +static int
> +frwr_op_init(struct rpcrdma_xprt *r_xprt)
> +{
> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
> +	struct ib_device *device = r_xprt->rx_ia.ri_id->device;
> +	unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
> +	struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
> +	int i;
> +
> +	INIT_LIST_HEAD(&buf->rb_mws);
> +	INIT_LIST_HEAD(&buf->rb_all);
> +
> +	i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
> +	dprintk("RPC:       %s: initalizing %d FRMRs\n", __func__, i);
> +
> +	while (i--) {
> +		struct rpcrdma_mw *r;
> +		int rc;
> +
> +		r = kzalloc(sizeof(*r), GFP_KERNEL);
> +		if (!r)
> +			return -ENOMEM;
> +
> +		rc = __frwr_init(r, pd, device, depth);
> +		if (rc) {
> +			kfree(r);
> +			return rc;
> +		}
> +
> +		list_add(&r->mw_list, &buf->rb_mws);
> +		list_add(&r->mw_all, &buf->rb_all);
> +	}
> +
> +	return 0;
> +}
> +
>   /* Post a FAST_REG Work Request to register a memory region
>    * for remote access via RDMA READ or RDMA WRITE.
>    */
> @@ -165,5 +230,6 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>   	.ro_map				= frwr_op_map,
>   	.ro_unmap			= frwr_op_unmap,
>   	.ro_maxpages			= frwr_op_maxpages,
> +	.ro_init			= frwr_op_init,
>   	.ro_displayname			= "frwr",
>   };
> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
> index f2c15be..ae2b0bc 100644
> --- a/net/sunrpc/xprtrdma/physical_ops.c
> +++ b/net/sunrpc/xprtrdma/physical_ops.c
> @@ -28,6 +28,12 @@ physical_op_maxpages(struct rpcrdma_xprt *r_xprt)
>   		     rpcrdma_max_segments(r_xprt));
>   }
>
> +static int
> +physical_op_init(struct rpcrdma_xprt *r_xprt)
> +{
> +	return 0;
> +}
> +
>   /* The client's physical memory is already exposed for
>    * remote access via RDMA READ or RDMA WRITE.
>    */
> @@ -60,5 +66,6 @@ const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>   	.ro_map				= physical_op_map,
>   	.ro_unmap			= physical_op_unmap,
>   	.ro_maxpages			= physical_op_maxpages,
> +	.ro_init			= physical_op_init,
>   	.ro_displayname			= "physical",
>   };
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index a1621fd..d7810d6 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1123,91 +1123,6 @@ out:
>   	return ERR_PTR(rc);
>   }
>
> -static int
> -rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
> -{
> -	int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
> -	struct ib_fmr_attr fmr_attr = {
> -		.max_pages	= RPCRDMA_MAX_DATA_SEGS,
> -		.max_maps	= 1,
> -		.page_shift	= PAGE_SHIFT
> -	};
> -	struct rpcrdma_mw *r;
> -	int i, rc;
> -
> -	i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
> -	dprintk("RPC:       %s: initalizing %d FMRs\n", __func__, i);
> -
> -	while (i--) {
> -		r = kzalloc(sizeof(*r), GFP_KERNEL);
> -		if (r == NULL)
> -			return -ENOMEM;
> -
> -		r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
> -		if (IS_ERR(r->r.fmr)) {
> -			rc = PTR_ERR(r->r.fmr);
> -			dprintk("RPC:       %s: ib_alloc_fmr failed %i\n",
> -				__func__, rc);
> -			goto out_free;
> -		}
> -
> -		list_add(&r->mw_list, &buf->rb_mws);
> -		list_add(&r->mw_all, &buf->rb_all);
> -	}
> -	return 0;
> -
> -out_free:
> -	kfree(r);
> -	return rc;
> -}
> -
> -static int
> -rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
> -{
> -	struct rpcrdma_frmr *f;
> -	struct rpcrdma_mw *r;
> -	int i, rc;
> -
> -	i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
> -	dprintk("RPC:       %s: initalizing %d FRMRs\n", __func__, i);
> -
> -	while (i--) {
> -		r = kzalloc(sizeof(*r), GFP_KERNEL);
> -		if (r == NULL)
> -			return -ENOMEM;
> -		f = &r->r.frmr;
> -
> -		f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
> -						ia->ri_max_frmr_depth);
> -		if (IS_ERR(f->fr_mr)) {
> -			rc = PTR_ERR(f->fr_mr);
> -			dprintk("RPC:       %s: ib_alloc_fast_reg_mr "
> -				"failed %i\n", __func__, rc);
> -			goto out_free;
> -		}
> -
> -		f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
> -							ia->ri_max_frmr_depth);
> -		if (IS_ERR(f->fr_pgl)) {
> -			rc = PTR_ERR(f->fr_pgl);
> -			dprintk("RPC:       %s: ib_alloc_fast_reg_page_list "
> -				"failed %i\n", __func__, rc);
> -
> -			ib_dereg_mr(f->fr_mr);
> -			goto out_free;
> -		}
> -
> -		list_add(&r->mw_list, &buf->rb_mws);
> -		list_add(&r->mw_all, &buf->rb_all);
> -	}
> -
> -	return 0;
> -
> -out_free:
> -	kfree(r);
> -	return rc;
> -}
> -
>   int
>   rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
>   {
> @@ -1244,22 +1159,9 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
>   	buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
>   	p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
>
> -	INIT_LIST_HEAD(&buf->rb_mws);
> -	INIT_LIST_HEAD(&buf->rb_all);
> -	switch (ia->ri_memreg_strategy) {
> -	case RPCRDMA_FRMR:
> -		rc = rpcrdma_init_frmrs(ia, buf);
> -		if (rc)
> -			goto out;
> -		break;
> -	case RPCRDMA_MTHCAFMR:
> -		rc = rpcrdma_init_fmrs(ia, buf);
> -		if (rc)
> -			goto out;
> -		break;
> -	default:
> -		break;
> -	}
> +	rc = ia->ri_ops->ro_init(r_xprt);
> +	if (rc)
> +		goto out;
>
>   	for (i = 0; i < buf->rb_max_requests; i++) {
>   		struct rpcrdma_req *req;
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index 3aabbb2..4fe3c38 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -341,6 +341,7 @@ struct rpcrdma_memreg_ops {
>   	void		(*ro_unmap)(struct rpcrdma_xprt *,
>   				    struct rpcrdma_req *, unsigned int);
>   	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
> +	int		(*ro_init)(struct rpcrdma_xprt *);
>   	const char	*ro_displayname;
>   };
>
>

Looks good.

Reviewed-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 08/16] xprtrdma: Add "reset MRs" memreg op
       [not found]     ` <20150313212747.22783.98300.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-16 10:38       ` Sagi Grimberg
  2015-03-24 11:27       ` Devesh Sharma
  1 sibling, 0 replies; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-16 10:38 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/13/2015 11:27 PM, Chuck Lever wrote:
> This method is invoked when a transport instance is about to be
> reconnected. Each Memory Region object is reset to its initial
> state.
>
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   net/sunrpc/xprtrdma/fmr_ops.c      |   23 ++++++++
>   net/sunrpc/xprtrdma/frwr_ops.c     |   46 ++++++++++++++++
>   net/sunrpc/xprtrdma/physical_ops.c |    6 ++
>   net/sunrpc/xprtrdma/verbs.c        |  103 +-----------------------------------
>   net/sunrpc/xprtrdma/xprt_rdma.h    |    1
>   5 files changed, 78 insertions(+), 101 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index 1501db0..1ccb3de 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -156,10 +156,33 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
>   		i += __fmr_unmap(r_xprt, &req->rl_segments[i]);
>   }
>
> +/* After a disconnect, unmap all FMRs.
> + *
> + * This is invoked only in the transport connect worker in order
> + * to serialize with rpcrdma_register_fmr_external().
> + */
> +static void
> +fmr_op_reset(struct rpcrdma_xprt *r_xprt)
> +{
> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
> +	struct rpcrdma_mw *r;
> +	LIST_HEAD(list);
> +	int rc;
> +
> +	list_for_each_entry(r, &buf->rb_all, mw_all)
> +		list_add(&r->r.fmr->list, &list);
> +
> +	rc = ib_unmap_fmr(&list);
> +	if (rc)
> +		dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
> +			__func__, rc);
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>   	.ro_map				= fmr_op_map,
>   	.ro_unmap			= fmr_op_unmap,
>   	.ro_maxpages			= fmr_op_maxpages,
>   	.ro_init			= fmr_op_init,
> +	.ro_reset			= fmr_op_reset,
>   	.ro_displayname			= "fmr",
>   };
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 975372c..b4ce0e5 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -81,6 +81,18 @@ out_err:
>   	return nsegs;
>   }
>
> +static void
> +__frwr_release(struct rpcrdma_mw *r)
> +{
> +	int rc;
> +
> +	rc = ib_dereg_mr(r->r.frmr.fr_mr);
> +	if (rc)
> +		dprintk("RPC:       %s: ib_dereg_mr status %i\n",
> +			__func__, rc);
> +	ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
> +}
> +
>   /* FRWR mode conveys a list of pages per chunk segment. The
>    * maximum length of that list is the FRWR page list depth.
>    */
> @@ -226,10 +238,44 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
>   		i += __frwr_unmap(r_xprt, &req->rl_segments[i]);
>   }
>
> +/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
> + * an unusable state. Find FRMRs in this state and dereg / reg
> + * each.  FRMRs that are VALID and attached to an rpcrdma_req are
> + * also torn down.
> + *
> + * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
> + *
> + * This is invoked only in the transport connect worker in order
> + * to serialize with rpcrdma_register_frmr_external().
> + */
> +static void
> +frwr_op_reset(struct rpcrdma_xprt *r_xprt)
> +{
> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
> +	struct ib_device *device = r_xprt->rx_ia.ri_id->device;
> +	unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
> +	struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
> +	struct rpcrdma_mw *r;
> +	int rc;
> +
> +	list_for_each_entry(r, &buf->rb_all, mw_all) {
> +		if (r->r.frmr.fr_state == FRMR_IS_INVALID)
> +			continue;
> +
> +		__frwr_release(r);
> +		rc = __frwr_init(r, pd, device, depth);
> +		if (rc)
> +			continue;
> +
> +		r->r.frmr.fr_state = FRMR_IS_INVALID;
> +	}
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>   	.ro_map				= frwr_op_map,
>   	.ro_unmap			= frwr_op_unmap,
>   	.ro_maxpages			= frwr_op_maxpages,
>   	.ro_init			= frwr_op_init,
> +	.ro_reset			= frwr_op_reset,
>   	.ro_displayname			= "frwr",
>   };
> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
> index ae2b0bc..0afc691 100644
> --- a/net/sunrpc/xprtrdma/physical_ops.c
> +++ b/net/sunrpc/xprtrdma/physical_ops.c
> @@ -62,10 +62,16 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
>   		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]);
>   }
>
> +static void
> +physical_op_reset(struct rpcrdma_xprt *r_xprt)
> +{
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>   	.ro_map				= physical_op_map,
>   	.ro_unmap			= physical_op_unmap,
>   	.ro_maxpages			= physical_op_maxpages,
>   	.ro_init			= physical_op_init,
> +	.ro_reset			= physical_op_reset,
>   	.ro_displayname			= "physical",
>   };
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index d7810d6..e17d91a 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -63,9 +63,6 @@
>   # define RPCDBG_FACILITY	RPCDBG_TRANS
>   #endif
>
> -static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
> -static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
> -
>   /*
>    * internal functions
>    */
> @@ -944,21 +941,9 @@ retry:
>   		rpcrdma_ep_disconnect(ep, ia);
>   		rpcrdma_flush_cqs(ep);
>
> -		switch (ia->ri_memreg_strategy) {
> -		case RPCRDMA_FRMR:
> -			rpcrdma_reset_frmrs(ia);
> -			break;
> -		case RPCRDMA_MTHCAFMR:
> -			rpcrdma_reset_fmrs(ia);
> -			break;
> -		case RPCRDMA_ALLPHYSICAL:
> -			break;
> -		default:
> -			rc = -EIO;
> -			goto out;
> -		}
> -
>   		xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
> +		ia->ri_ops->ro_reset(xprt);
> +
>   		id = rpcrdma_create_id(xprt, ia,
>   				(struct sockaddr *)&xprt->rx_data.addr);
>   		if (IS_ERR(id)) {
> @@ -1288,90 +1273,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
>   	kfree(buf->rb_pool);
>   }
>
> -/* After a disconnect, unmap all FMRs.
> - *
> - * This is invoked only in the transport connect worker in order
> - * to serialize with rpcrdma_register_fmr_external().
> - */
> -static void
> -rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
> -{
> -	struct rpcrdma_xprt *r_xprt =
> -				container_of(ia, struct rpcrdma_xprt, rx_ia);
> -	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
> -	struct list_head *pos;
> -	struct rpcrdma_mw *r;
> -	LIST_HEAD(l);
> -	int rc;
> -
> -	list_for_each(pos, &buf->rb_all) {
> -		r = list_entry(pos, struct rpcrdma_mw, mw_all);
> -
> -		INIT_LIST_HEAD(&l);
> -		list_add(&r->r.fmr->list, &l);
> -		rc = ib_unmap_fmr(&l);
> -		if (rc)
> -			dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
> -				__func__, rc);
> -	}
> -}
> -
> -/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
> - * an unusable state. Find FRMRs in this state and dereg / reg
> - * each.  FRMRs that are VALID and attached to an rpcrdma_req are
> - * also torn down.
> - *
> - * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
> - *
> - * This is invoked only in the transport connect worker in order
> - * to serialize with rpcrdma_register_frmr_external().
> - */
> -static void
> -rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
> -{
> -	struct rpcrdma_xprt *r_xprt =
> -				container_of(ia, struct rpcrdma_xprt, rx_ia);
> -	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
> -	struct list_head *pos;
> -	struct rpcrdma_mw *r;
> -	int rc;
> -
> -	list_for_each(pos, &buf->rb_all) {
> -		r = list_entry(pos, struct rpcrdma_mw, mw_all);
> -
> -		if (r->r.frmr.fr_state == FRMR_IS_INVALID)
> -			continue;
> -
> -		rc = ib_dereg_mr(r->r.frmr.fr_mr);
> -		if (rc)
> -			dprintk("RPC:       %s: ib_dereg_mr failed %i\n",
> -				__func__, rc);
> -		ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
> -
> -		r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
> -					ia->ri_max_frmr_depth);
> -		if (IS_ERR(r->r.frmr.fr_mr)) {
> -			rc = PTR_ERR(r->r.frmr.fr_mr);
> -			dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
> -				" failed %i\n", __func__, rc);
> -			continue;
> -		}
> -		r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
> -					ia->ri_id->device,
> -					ia->ri_max_frmr_depth);
> -		if (IS_ERR(r->r.frmr.fr_pgl)) {
> -			rc = PTR_ERR(r->r.frmr.fr_pgl);
> -			dprintk("RPC:       %s: "
> -				"ib_alloc_fast_reg_page_list "
> -				"failed %i\n", __func__, rc);
> -
> -			ib_dereg_mr(r->r.frmr.fr_mr);
> -			continue;
> -		}
> -		r->r.frmr.fr_state = FRMR_IS_INVALID;
> -	}
> -}
> -
>   /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
>    * some req segments uninitialized.
>    */
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index 4fe3c38..cdf6763 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -342,6 +342,7 @@ struct rpcrdma_memreg_ops {
>   				    struct rpcrdma_req *, unsigned int);
>   	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>   	int		(*ro_init)(struct rpcrdma_xprt *);
> +	void		(*ro_reset)(struct rpcrdma_xprt *);
>   	const char	*ro_displayname;
>   };
>
>


Reviewed-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 09/16] xprtrdma: Add "destroy MRs" memreg op
       [not found]     ` <20150313212758.22783.67493.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-16 10:46       ` Sagi Grimberg
       [not found]         ` <5506B48F.6050902-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  0 siblings, 1 reply; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-16 10:46 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/13/2015 11:27 PM, Chuck Lever wrote:
> Memory Region objects associated with a transport instance are
> destroyed before the instance is shutdown and destroyed.
>
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   net/sunrpc/xprtrdma/fmr_ops.c      |   21 +++++++++++++++
>   net/sunrpc/xprtrdma/frwr_ops.c     |   17 ++++++++++++
>   net/sunrpc/xprtrdma/physical_ops.c |    6 ++++
>   net/sunrpc/xprtrdma/verbs.c        |   52 +-----------------------------------
>   net/sunrpc/xprtrdma/xprt_rdma.h    |    1 +
>   5 files changed, 46 insertions(+), 51 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index 1ccb3de..3115e4b 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -178,11 +178,32 @@ fmr_op_reset(struct rpcrdma_xprt *r_xprt)
>   			__func__, rc);
>   }
>
> +static void
> +fmr_op_destroy(struct rpcrdma_buffer *buf)
> +{
> +	struct rpcrdma_mw *r;
> +	int rc;
> +
> +	while (!list_empty(&buf->rb_all)) {
> +		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
> +		list_del(&r->mw_all);
> +		list_del(&r->mw_list);

Again, I understand this patch is just moving code, but is it
guaranteed that mr_list is in rb_mws at this point?

> +
> +		rc = ib_dealloc_fmr(r->r.fmr);
> +		if (rc)
> +			dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
> +				__func__, rc);
> +
> +		kfree(r);
> +	}
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>   	.ro_map				= fmr_op_map,
>   	.ro_unmap			= fmr_op_unmap,
>   	.ro_maxpages			= fmr_op_maxpages,
>   	.ro_init			= fmr_op_init,
>   	.ro_reset			= fmr_op_reset,
> +	.ro_destroy			= fmr_op_destroy,
>   	.ro_displayname			= "fmr",
>   };
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index b4ce0e5..fc3a228 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -271,11 +271,28 @@ frwr_op_reset(struct rpcrdma_xprt *r_xprt)
>   	}
>   }
>
> +static void
> +frwr_op_destroy(struct rpcrdma_buffer *buf)
> +{
> +	struct rpcrdma_mw *r;
> +
> +	while (!list_empty(&buf->rb_all)) {
> +		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
> +		list_del(&r->mw_all);
> +		list_del(&r->mw_list);
> +
> +		__frwr_release(r);
> +
> +		kfree(r);
> +	}
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>   	.ro_map				= frwr_op_map,
>   	.ro_unmap			= frwr_op_unmap,
>   	.ro_maxpages			= frwr_op_maxpages,
>   	.ro_init			= frwr_op_init,
>   	.ro_reset			= frwr_op_reset,
> +	.ro_destroy			= frwr_op_destroy,
>   	.ro_displayname			= "frwr",
>   };
> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
> index 0afc691..f8da8c4 100644
> --- a/net/sunrpc/xprtrdma/physical_ops.c
> +++ b/net/sunrpc/xprtrdma/physical_ops.c
> @@ -67,11 +67,17 @@ physical_op_reset(struct rpcrdma_xprt *r_xprt)
>   {
>   }
>
> +static void
> +physical_op_destroy(struct rpcrdma_buffer *buf)
> +{
> +}
> +
>   const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>   	.ro_map				= physical_op_map,
>   	.ro_unmap			= physical_op_unmap,
>   	.ro_maxpages			= physical_op_maxpages,
>   	.ro_init			= physical_op_init,
>   	.ro_reset			= physical_op_reset,
> +	.ro_destroy			= physical_op_destroy,
>   	.ro_displayname			= "physical",
>   };
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index e17d91a..dcbc736 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1198,47 +1198,6 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
>   	kfree(req);
>   }
>
> -static void
> -rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
> -{
> -	struct rpcrdma_mw *r;
> -	int rc;
> -
> -	while (!list_empty(&buf->rb_all)) {
> -		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
> -		list_del(&r->mw_all);
> -		list_del(&r->mw_list);
> -
> -		rc = ib_dealloc_fmr(r->r.fmr);
> -		if (rc)
> -			dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
> -				__func__, rc);
> -
> -		kfree(r);
> -	}
> -}
> -
> -static void
> -rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
> -{
> -	struct rpcrdma_mw *r;
> -	int rc;
> -
> -	while (!list_empty(&buf->rb_all)) {
> -		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
> -		list_del(&r->mw_all);
> -		list_del(&r->mw_list);
> -
> -		rc = ib_dereg_mr(r->r.frmr.fr_mr);
> -		if (rc)
> -			dprintk("RPC:       %s: ib_dereg_mr failed %i\n",
> -				__func__, rc);
> -		ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
> -
> -		kfree(r);
> -	}
> -}
> -
>   void
>   rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
>   {
> @@ -1259,16 +1218,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
>   			rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
>   	}
>
> -	switch (ia->ri_memreg_strategy) {
> -	case RPCRDMA_FRMR:
> -		rpcrdma_destroy_frmrs(buf);
> -		break;
> -	case RPCRDMA_MTHCAFMR:
> -		rpcrdma_destroy_fmrs(buf);
> -		break;
> -	default:
> -		break;
> -	}
> +	ia->ri_ops->ro_destroy(buf);
>
>   	kfree(buf->rb_pool);
>   }
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index cdf6763..a0e3c3e 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -343,6 +343,7 @@ struct rpcrdma_memreg_ops {
>   	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>   	int		(*ro_init)(struct rpcrdma_xprt *);
>   	void		(*ro_reset)(struct rpcrdma_xprt *);
> +	void		(*ro_destroy)(struct rpcrdma_buffer *);
>   	const char	*ro_displayname;
>   };
>
>

Other than that,

Reviewed-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 11/16] xprtrdma: Handle non-SEND completions via a callout
       [not found]     ` <20150313212816.22783.49677.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-16 10:53       ` Sagi Grimberg
  0 siblings, 0 replies; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-16 10:53 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/13/2015 11:28 PM, Chuck Lever wrote:
> Allow each memory registration mode to plug in a callout that handles
> the completion of a memory registration operation.
>
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   net/sunrpc/xprtrdma/frwr_ops.c  |   17 +++++++++++++++++
>   net/sunrpc/xprtrdma/verbs.c     |   14 +++++---------
>   net/sunrpc/xprtrdma/xprt_rdma.h |    5 +++++
>   3 files changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 9bb4b2d..6e6d8ba 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -164,6 +164,22 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>   		     rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
>   }
>
> +/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */
> +static void
> +frwr_sendcompletion(struct ib_wc *wc)
> +{
> +	struct rpcrdma_mw *r;
> +
> +	if (likely(wc->status == IB_WC_SUCCESS))
> +		return;
> +
> +	/* WARNING: Only wr_id and status are reliable at this point */
> +	r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
> +	dprintk("RPC:       %s: frmr %p (stale), status %d\n",
> +		__func__, r, wc->status);
> +	r->r.frmr.fr_state = FRMR_IS_STALE;
> +}
> +
>   static int
>   frwr_op_init(struct rpcrdma_xprt *r_xprt)
>   {
> @@ -195,6 +211,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
>
>   		list_add(&r->mw_list, &buf->rb_mws);
>   		list_add(&r->mw_all, &buf->rb_all);
> +		r->mw_sendcompletion = frwr_sendcompletion;
>   	}
>
>   	return 0;
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 17b2a29..f108a57 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -204,21 +204,17 @@ static const char * const wc_status[] = {
>   static void
>   rpcrdma_sendcq_process_wc(struct ib_wc *wc)
>   {
> -	if (likely(wc->status == IB_WC_SUCCESS))
> -		return;
> -
>   	/* WARNING: Only wr_id and status are reliable at this point */
> -	if (wc->wr_id == 0ULL) {
> -		if (wc->status != IB_WC_WR_FLUSH_ERR)
> +	if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) {
> +		if (wc->status != IB_WC_SUCCESS &&
> +		    wc->status != IB_WC_WR_FLUSH_ERR)
>   			pr_err("RPC:       %s: SEND: %s\n",
>   			       __func__, COMPLETION_MSG(wc->status));
>   	} else {
>   		struct rpcrdma_mw *r;
>
>   		r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
> -		r->r.frmr.fr_state = FRMR_IS_STALE;
> -		pr_err("RPC:       %s: frmr %p (stale): %s\n",
> -		       __func__, r, COMPLETION_MSG(wc->status));
> +		r->mw_sendcompletion(wc);
>   	}
>   }
>
> @@ -1616,7 +1612,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
>   	}
>
>   	send_wr.next = NULL;
> -	send_wr.wr_id = 0ULL;	/* no send cookie */
> +	send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION;
>   	send_wr.sg_list = req->rl_send_iov;
>   	send_wr.num_sge = req->rl_niovs;
>   	send_wr.opcode = IB_WR_SEND;
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index a53a564..40a0ee8 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -106,6 +106,10 @@ struct rpcrdma_ep {
>   #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
>   #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
>
> +/* Force completion handler to ignore the signal
> + */
> +#define RPCRDMA_IGNORE_COMPLETION	(0ULL)
> +
>   /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
>    *
>    * The below structure appears at the front of a large region of kmalloc'd
> @@ -206,6 +210,7 @@ struct rpcrdma_mw {
>   		struct ib_fmr		*fmr;
>   		struct rpcrdma_frmr	frmr;
>   	} r;
> +	void			(*mw_sendcompletion)(struct ib_wc *);
>   	struct list_head	mw_list;
>   	struct list_head	mw_all;
>   };
>

Reviewed-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 12/16] xprtrdma: Acquire FMRs in rpcrdma_fmr_register_external()
       [not found]     ` <20150313212825.22783.51384.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-16 12:28       ` Sagi Grimberg
       [not found]         ` <5506CC6C.8090106-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  0 siblings, 1 reply; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-16 12:28 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/13/2015 11:28 PM, Chuck Lever wrote:
> Acquiring 64 FMRs in rpcrdma_buffer_get() while holding the buffer
> pool lock is expensive, and unnecessary because FMR mode can
> transfer up to a 1MB payload using just a single ib_fmr.
>
> Instead, acquire ib_fmrs one-at-a-time as chunks are registered, and
> return them to rb_mws immediately during deregistration.
>
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   net/sunrpc/xprtrdma/fmr_ops.c   |   41 ++++++++++++++++++++++++++++++++++++---
>   net/sunrpc/xprtrdma/verbs.c     |   41 ++++++++++++++-------------------------
>   net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
>   3 files changed, 54 insertions(+), 29 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index 96e6cd3..9c6c2e8 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -28,10 +28,11 @@ __fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>   {
>   	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>   	struct rpcrdma_mr_seg *seg1 = seg;
> +	struct rpcrdma_mw *mw = seg1->rl_mw;
>   	int rc, nsegs = seg->mr_nsegs;
>   	LIST_HEAD(l);
>
> -	list_add(&seg1->rl_mw->r.fmr->list, &l);
> +	list_add(&mw->r.fmr->list, &l);
>   	rc = ib_unmap_fmr(&l);
>   	read_lock(&ia->ri_qplock);
>   	while (seg1->mr_nsegs--)
> @@ -39,11 +40,14 @@ __fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>   	read_unlock(&ia->ri_qplock);
>   	if (rc)
>   		goto out_err;
> +out:
> +	seg1->rl_mw = NULL;
> +	rpcrdma_put_mw(r_xprt, mw);
>   	return nsegs;
>
>   out_err:
>   	dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
> -	return nsegs;
> +	goto out;
>   }
>
>   static int
> @@ -117,6 +121,27 @@ out_fmr_err:
>   	return rc;
>   }
>
> +static struct rpcrdma_mw *
> +__fmr_get_mw(struct rpcrdma_xprt *r_xprt)

This introduces an asymmetric approach where you have fmr/frwr get_mw
routines but have a single rpcrdma_put_mw. I noticed that the
frwr_get_mw (next patch) is almost completely different - but I wander
if that should really be that different?

Just raising the question.

> +{
> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
> +	struct rpcrdma_mw *mw = NULL;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&buf->rb_lock, flags);
> +
> +	if (!list_empty(&buf->rb_mws)) {
> +		mw = list_entry(buf->rb_mws.next,
> +				struct rpcrdma_mw, mw_list);
> +		list_del_init(&mw->mw_list);
> +	} else {
> +		pr_err("RPC:       %s: no MWs available\n", __func__);
> +	}
> +
> +	spin_unlock_irqrestore(&buf->rb_lock, flags);
> +	return mw;
> +}
> +
>   /* Use the ib_map_phys_fmr() verb to register a memory region
>    * for remote access via RDMA READ or RDMA WRITE.
>    */
> @@ -126,10 +151,18 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>   {
>   	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>   	struct rpcrdma_mr_seg *seg1 = seg;
> -	struct rpcrdma_mw *mw = seg1->rl_mw;
> +	struct rpcrdma_mw *mw;
>   	u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
>   	int len, pageoff, i, rc;
>
> +	mw = __fmr_get_mw(r_xprt);
> +	if (!mw)
> +		return -ENOMEM;
> +	if (seg1->rl_mw) {
> +		rpcrdma_put_mw(r_xprt, seg1->rl_mw);
> +		seg1->rl_mw = NULL;
> +	}
> +

How can this happen? getting to op_map with existing rl_mw? and
wouldn't it be better to use rl_mw instead of getting a new mw and
putting seg1->rl_mw?

>   	pageoff = offset_in_page(seg1->mr_offset);
>   	seg1->mr_offset -= pageoff;	/* start of page */
>   	seg1->mr_len += pageoff;
> @@ -152,6 +185,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>   	if (rc)
>   		goto out_maperr;
>
> +	seg1->rl_mw = mw;
>   	seg1->mr_rkey = mw->r.fmr->rkey;
>   	seg1->mr_base = seg1->mr_dma + pageoff;
>   	seg1->mr_nsegs = i;
> @@ -164,6 +198,7 @@ out_maperr:
>   		pageoff, i, rc);
>   	while (i--)
>   		rpcrdma_unmap_one(ia, --seg);
> +	rpcrdma_put_mw(r_xprt, mw);
>   	return rc;
>   }
>
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index f108a57..f2316d8 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1171,6 +1171,21 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
>   	kfree(buf->rb_pool);
>   }
>
> +void
> +rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
> +{
> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
> +	unsigned long flags;
> +
> +	if (!list_empty(&mw->mw_list))
> +		pr_warn("RPC:       %s: mw %p still on a list!\n",
> +			__func__, mw);
> +
> +	spin_lock_irqsave(&buf->rb_lock, flags);
> +	list_add_tail(&mw->mw_list, &buf->rb_mws);
> +	spin_unlock_irqrestore(&buf->rb_lock, flags);
> +}
> +
>   /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
>    * some req segments uninitialized.
>    */
> @@ -1292,28 +1307,6 @@ rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
>   	return NULL;
>   }
>
> -static struct rpcrdma_req *
> -rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
> -{
> -	struct rpcrdma_mw *r;
> -	int i;
> -
> -	i = RPCRDMA_MAX_SEGS - 1;
> -	while (!list_empty(&buf->rb_mws)) {
> -		r = list_entry(buf->rb_mws.next,
> -			       struct rpcrdma_mw, mw_list);
> -		list_del(&r->mw_list);
> -		req->rl_segments[i].rl_mw = r;
> -		if (unlikely(i-- == 0))
> -			return req;	/* Success */
> -	}
> -
> -	/* Not enough entries on rb_mws for this req */
> -	rpcrdma_buffer_put_sendbuf(req, buf);
> -	rpcrdma_buffer_put_mrs(req, buf);
> -	return NULL;
> -}
> -
>   /*
>    * Get a set of request/reply buffers.
>    *
> @@ -1355,9 +1348,6 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
>   	case RPCRDMA_FRMR:
>   		req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
>   		break;
> -	case RPCRDMA_MTHCAFMR:
> -		req = rpcrdma_buffer_get_fmrs(req, buffers);
> -		break;
>   	default:
>   		break;
>   	}
> @@ -1382,7 +1372,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
>   	rpcrdma_buffer_put_sendbuf(req, buffers);
>   	switch (ia->ri_memreg_strategy) {
>   	case RPCRDMA_FRMR:
> -	case RPCRDMA_MTHCAFMR:
>   		rpcrdma_buffer_put_mrs(req, buffers);
>   		break;
>   	default:
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index 40a0ee8..d5aa5b4 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -426,6 +426,7 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
>   unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
>   void rpcrdma_map_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *, bool);
>   void rpcrdma_unmap_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *);
> +void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
>
>   /*
>    * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 13/16] xprtrdma: Acquire MRs in rpcrdma_register_external()
       [not found]     ` <20150313212835.22783.12326.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-16 12:44       ` Sagi Grimberg
       [not found]         ` <5506D02B.5050602-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  0 siblings, 1 reply; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-16 12:44 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/13/2015 11:28 PM, Chuck Lever wrote:
> Acquiring 64 MRs in rpcrdma_buffer_get() while holding the buffer
> pool lock is expensive, and unnecessary because most modern adapters
> can transfer 100s of KBs of payload using just a single MR.
>
> Instead, acquire MRs one-at-a-time as chunks are registered, and
> return them to rb_mws immediately during deregistration.
>
> Note: commit 539431a437d2 ("xprtrdma: Don't invalidate FRMRs if
> registration fails") is reverted: There is now a valid case where
> registration can fail (with -ENOMEM) but the QP is still in RTS.
>
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   net/sunrpc/xprtrdma/frwr_ops.c |  126 ++++++++++++++++++++++++++++++++++++---
>   net/sunrpc/xprtrdma/rpc_rdma.c |    3 -
>   net/sunrpc/xprtrdma/verbs.c    |  130 ----------------------------------------
>   3 files changed, 120 insertions(+), 139 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 6e6d8ba..d23e064 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -54,15 +54,16 @@ __frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>   {
>   	struct rpcrdma_mr_seg *seg1 = seg;
>   	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> +	struct rpcrdma_mw *mw = seg1->rl_mw;
>   	struct ib_send_wr invalidate_wr, *bad_wr;
>   	int rc, nsegs = seg->mr_nsegs;
>
> -	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
> +	mw->r.frmr.fr_state = FRMR_IS_INVALID;
>
>   	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
> -	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
> +	invalidate_wr.wr_id = (unsigned long)(void *)mw;
>   	invalidate_wr.opcode = IB_WR_LOCAL_INV;
> -	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
> +	invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey;
>   	DECR_CQCOUNT(&r_xprt->rx_ep);
>
>   	read_lock(&ia->ri_qplock);
> @@ -72,13 +73,17 @@ __frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>   	read_unlock(&ia->ri_qplock);
>   	if (rc)
>   		goto out_err;
> +
> +out:
> +	seg1->rl_mw = NULL;
> +	rpcrdma_put_mw(r_xprt, mw);
>   	return nsegs;
>
>   out_err:
>   	/* Force rpcrdma_buffer_get() to retry */
> -	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
> +	mw->r.frmr.fr_state = FRMR_IS_STALE;
>   	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
> -	return nsegs;
> +	goto out;
>   }
>
>   static void
> @@ -217,6 +222,99 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
>   	return 0;
>   }
>
> +/* rpcrdma_unmap_one() was already done by rpcrdma_frwr_releasesge().
> + * Redo only the ib_post_send().
> + */
> +static void
> +__retry_local_inv(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
> +{
> +	struct rpcrdma_xprt *r_xprt =
> +				container_of(ia, struct rpcrdma_xprt, rx_ia);
> +	struct ib_send_wr invalidate_wr, *bad_wr;
> +	int rc;
> +
> +	pr_warn("RPC:       %s: FRMR %p is stale\n", __func__, r);
> +
> +	/* When this FRMR is re-inserted into rb_mws, it is no longer stale */
> +	r->r.frmr.fr_state = FRMR_IS_INVALID;
> +
> +	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
> +	invalidate_wr.wr_id = (unsigned long)(void *)r;
> +	invalidate_wr.opcode = IB_WR_LOCAL_INV;
> +	invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
> +	DECR_CQCOUNT(&r_xprt->rx_ep);
> +
> +	pr_warn("RPC:       %s: frmr %p invalidating rkey %08x\n",
> +		__func__, r, r->r.frmr.fr_mr->rkey);
> +
> +	read_lock(&ia->ri_qplock);
> +	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
> +	read_unlock(&ia->ri_qplock);
> +	if (rc) {
> +		/* Force __frwr_get_mw() to retry */
> +		r->r.frmr.fr_state = FRMR_IS_STALE;

Why should you retry this? Why should it succeed next time around?
I would think that if you didn't succeed in post_send just destroy and
recreate the mr (in another context or just outside a spinning lock).

> +		dprintk("RPC:       %s: ib_post_send status %i\n",
> +			__func__, rc);
> +	}
> +}
> +
> +static void
> +__retry_flushed_linv(struct rpcrdma_buffer *buf, struct list_head *stale)
> +{
> +	struct rpcrdma_ia *ia = rdmab_to_ia(buf);
> +	struct list_head *pos;
> +	struct rpcrdma_mw *r;
> +	unsigned long flags;
> +	unsigned count;
> +
> +	count = 0;
> +	list_for_each(pos, stale) {
> +		r = list_entry(pos, struct rpcrdma_mw, mw_list);
> +		__retry_local_inv(ia, r);
> +		++count;
> +	}
> +
> +	pr_warn("RPC:       %s: adding %u MRs to rb_mws\n",
> +		__func__, count);
> +
> +	spin_lock_irqsave(&buf->rb_lock, flags);
> +	list_splice_tail(stale, &buf->rb_mws);
> +	spin_unlock_irqrestore(&buf->rb_lock, flags);
> +}
> +
> +static struct rpcrdma_mw *
> +__frwr_get_mw(struct rpcrdma_xprt *r_xprt)
> +{
> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
> +	struct rpcrdma_mw *mw;
> +	unsigned long flags;
> +	LIST_HEAD(stale);
> +	int count;
> +
> +	spin_lock_irqsave(&buf->rb_lock, flags);
> +	count = 0;
> +	while (!list_empty(&buf->rb_mws)) {
> +		mw = list_entry(buf->rb_mws.next, struct rpcrdma_mw, mw_list);
> +		list_del_init(&mw->mw_list);
> +		if (mw->r.frmr.fr_state == FRMR_IS_INVALID)
> +			goto out_unlock;
> +		list_add_tail(&mw->mw_list, &stale);

So one thing I don't understand is how can you get a valid frmr in the
list? I think this might be making things a bit too complicated.

I imagine that mws are added back after they are successfully
invalidated, but are they added also for the unsuccessful local_inv?

I think that by keeping only usable mws in rb_mws would make things a
lot more simple.

the logic would be:
- take frmr from rb_mws (it is invalid)
- use it
- when done - do local invalidate
   - if success return to list
   - if not: reset it (free + alloc the mr) and return it to the list

If this logic is possible then it would save you from handling stales
and invalidate retries. and it would maybe allow to use a single
rpcrdma_get_mw() which would simplify it even more...

But I might not have the full picture here..

And again, sorry for commenting on logic that wasn't introduced by the
patch itself...

> +		count++;
> +	}
> +
> +	pr_err("RPC:       %s: no INVALID MWs available\n", __func__);
> +	mw = NULL;
> +
> +out_unlock:
> +	spin_unlock_irqrestore(&buf->rb_lock, flags);
> +	if (!list_empty(&stale)) {
> +		dprintk("RPC:       %s: found %d unsuitable MWs\n",
> +			__func__, count);
> +		__retry_flushed_linv(buf, &stale);
> +	}
> +	return mw;
> +}
> +
>   /* Post a FAST_REG Work Request to register a memory region
>    * for remote access via RDMA READ or RDMA WRITE.
>    */
> @@ -226,9 +324,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>   {
>   	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>   	struct rpcrdma_mr_seg *seg1 = seg;
> -	struct rpcrdma_mw *mw = seg1->rl_mw;
> -	struct rpcrdma_frmr *frmr = &mw->r.frmr;
> -	struct ib_mr *mr = frmr->fr_mr;
> +	struct rpcrdma_frmr *frmr;
> +	struct rpcrdma_mw *mw;
> +	struct ib_mr *mr;
>   	struct ib_send_wr fastreg_wr, *bad_wr;
>   	u8 key;
>   	int len, pageoff;
> @@ -237,12 +335,21 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>   	u64 pa;
>   	int page_no;
>
> +	mw = __frwr_get_mw(r_xprt);
> +	if (!mw)
> +		return -ENOMEM;
> +	if (seg1->rl_mw) {
> +		rpcrdma_put_mw(r_xprt, seg1->rl_mw);
> +		seg1->rl_mw = NULL;
> +	}
> +
>   	pageoff = offset_in_page(seg1->mr_offset);
>   	seg1->mr_offset -= pageoff;	/* start of page */
>   	seg1->mr_len += pageoff;
>   	len = -pageoff;
>   	if (nsegs > ia->ri_max_frmr_depth)
>   		nsegs = ia->ri_max_frmr_depth;
> +	frmr = &mw->r.frmr;
>   	for (page_no = i = 0; i < nsegs;) {
>   		rpcrdma_map_one(ia, seg, writing);
>   		pa = seg->mr_dma;
> @@ -278,6 +385,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>   	fastreg_wr.wr.fast_reg.access_flags = writing ?
>   				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
>   				IB_ACCESS_REMOTE_READ;
> +	mr = frmr->fr_mr;
>   	key = (u8)(mr->rkey & 0x000000FF);
>   	ib_update_fast_reg_key(mr, ++key);
>   	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
> @@ -287,6 +395,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>   	if (rc)
>   		goto out_senderr;
>
> +	seg1->rl_mw = mw;
>   	seg1->mr_rkey = mr->rkey;
>   	seg1->mr_base = seg1->mr_dma + pageoff;
>   	seg1->mr_nsegs = i;
> @@ -301,6 +410,7 @@ out_err:
>   	frmr->fr_state = FRMR_IS_INVALID;
>   	while (i--)
>   		rpcrdma_unmap_one(ia, --seg);
> +	rpcrdma_put_mw(r_xprt, mw);
>   	return rc;
>   }
>
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index 7b51d9d..b9689ca 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -284,8 +284,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
>   	return (unsigned char *)iptr - (unsigned char *)headerp;
>
>   out:
> -	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR)
> -		r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, nchunks);
> +	r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, nchunks);
>   	return n;
>   }
>
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index f2316d8..5b9c104 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1186,33 +1186,6 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
>   	spin_unlock_irqrestore(&buf->rb_lock, flags);
>   }
>
> -/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
> - * some req segments uninitialized.
> - */
> -static void
> -rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
> -{
> -	if (*mw) {
> -		list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
> -		*mw = NULL;
> -	}
> -}
> -
> -/* Cycle mw's back in reverse order, and "spin" them.
> - * This delays and scrambles reuse as much as possible.
> - */
> -static void
> -rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
> -{
> -	struct rpcrdma_mr_seg *seg = req->rl_segments;
> -	struct rpcrdma_mr_seg *seg1 = seg;
> -	int i;
> -
> -	for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
> -		rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
> -	rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
> -}
> -
>   static void
>   rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
>   {
> @@ -1225,88 +1198,6 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
>   	}
>   }
>
> -/* rpcrdma_unmap_one() was already done during deregistration.
> - * Redo only the ib_post_send().
> - */
> -static void
> -rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
> -{
> -	struct rpcrdma_xprt *r_xprt =
> -				container_of(ia, struct rpcrdma_xprt, rx_ia);
> -	struct ib_send_wr invalidate_wr, *bad_wr;
> -	int rc;
> -
> -	dprintk("RPC:       %s: FRMR %p is stale\n", __func__, r);
> -
> -	/* When this FRMR is re-inserted into rb_mws, it is no longer stale */
> -	r->r.frmr.fr_state = FRMR_IS_INVALID;
> -
> -	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
> -	invalidate_wr.wr_id = (unsigned long)(void *)r;
> -	invalidate_wr.opcode = IB_WR_LOCAL_INV;
> -	invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
> -	DECR_CQCOUNT(&r_xprt->rx_ep);
> -
> -	dprintk("RPC:       %s: frmr %p invalidating rkey %08x\n",
> -		__func__, r, r->r.frmr.fr_mr->rkey);
> -
> -	read_lock(&ia->ri_qplock);
> -	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
> -	read_unlock(&ia->ri_qplock);
> -	if (rc) {
> -		/* Force rpcrdma_buffer_get() to retry */
> -		r->r.frmr.fr_state = FRMR_IS_STALE;
> -		dprintk("RPC:       %s: ib_post_send failed, %i\n",
> -			__func__, rc);
> -	}
> -}
> -
> -static void
> -rpcrdma_retry_flushed_linv(struct list_head *stale,
> -			   struct rpcrdma_buffer *buf)
> -{
> -	struct rpcrdma_ia *ia = rdmab_to_ia(buf);
> -	struct list_head *pos;
> -	struct rpcrdma_mw *r;
> -	unsigned long flags;
> -
> -	list_for_each(pos, stale) {
> -		r = list_entry(pos, struct rpcrdma_mw, mw_list);
> -		rpcrdma_retry_local_inv(r, ia);
> -	}
> -
> -	spin_lock_irqsave(&buf->rb_lock, flags);
> -	list_splice_tail(stale, &buf->rb_mws);
> -	spin_unlock_irqrestore(&buf->rb_lock, flags);
> -}
> -
> -static struct rpcrdma_req *
> -rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
> -			 struct list_head *stale)
> -{
> -	struct rpcrdma_mw *r;
> -	int i;
> -
> -	i = RPCRDMA_MAX_SEGS - 1;
> -	while (!list_empty(&buf->rb_mws)) {
> -		r = list_entry(buf->rb_mws.next,
> -			       struct rpcrdma_mw, mw_list);
> -		list_del(&r->mw_list);
> -		if (r->r.frmr.fr_state == FRMR_IS_STALE) {
> -			list_add(&r->mw_list, stale);
> -			continue;
> -		}
> -		req->rl_segments[i].rl_mw = r;
> -		if (unlikely(i-- == 0))
> -			return req;	/* Success */
> -	}
> -
> -	/* Not enough entries on rb_mws for this req */
> -	rpcrdma_buffer_put_sendbuf(req, buf);
> -	rpcrdma_buffer_put_mrs(req, buf);
> -	return NULL;
> -}
> -
>   /*
>    * Get a set of request/reply buffers.
>    *
> @@ -1319,12 +1210,11 @@ rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
>   struct rpcrdma_req *
>   rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
>   {
> -	struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
> -	struct list_head stale;
>   	struct rpcrdma_req *req;
>   	unsigned long flags;
>
>   	spin_lock_irqsave(&buffers->rb_lock, flags);
> +
>   	if (buffers->rb_send_index == buffers->rb_max_requests) {
>   		spin_unlock_irqrestore(&buffers->rb_lock, flags);
>   		dprintk("RPC:       %s: out of request buffers\n", __func__);
> @@ -1343,17 +1233,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
>   	}
>   	buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
>
> -	INIT_LIST_HEAD(&stale);
> -	switch (ia->ri_memreg_strategy) {
> -	case RPCRDMA_FRMR:
> -		req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
> -		break;
> -	default:
> -		break;
> -	}
>   	spin_unlock_irqrestore(&buffers->rb_lock, flags);
> -	if (!list_empty(&stale))
> -		rpcrdma_retry_flushed_linv(&stale, buffers);
>   	return req;
>   }
>
> @@ -1365,18 +1245,10 @@ void
>   rpcrdma_buffer_put(struct rpcrdma_req *req)
>   {
>   	struct rpcrdma_buffer *buffers = req->rl_buffer;
> -	struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
>   	unsigned long flags;
>
>   	spin_lock_irqsave(&buffers->rb_lock, flags);
>   	rpcrdma_buffer_put_sendbuf(req, buffers);
> -	switch (ia->ri_memreg_strategy) {
> -	case RPCRDMA_FRMR:
> -		rpcrdma_buffer_put_mrs(req, buffers);
> -		break;
> -	default:
> -		break;
> -	}
>   	spin_unlock_irqrestore(&buffers->rb_lock, flags);
>   }
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 14/16] xprtrdma: Remove rpcrdma_ia::ri_memreg_strategy
       [not found]     ` <20150313212844.22783.1438.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-16 12:45       ` Sagi Grimberg
  0 siblings, 0 replies; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-16 12:45 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/13/2015 11:28 PM, Chuck Lever wrote:
> Clean up: This field is no longer used.
>
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   include/linux/sunrpc/xprtrdma.h |    3 ++-
>   net/sunrpc/xprtrdma/verbs.c     |    3 ---
>   net/sunrpc/xprtrdma/xprt_rdma.h |    1 -
>   3 files changed, 2 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
> index 64a0a0a..10bc3cde 100644
> --- a/include/linux/sunrpc/xprtrdma.h
> +++ b/include/linux/sunrpc/xprtrdma.h
> @@ -61,7 +61,8 @@
>
>   #define RPCRDMA_INLINE_PAD_THRESH  (512)/* payload threshold to pad (bytes) */
>
> -/* memory registration strategies */
> +/* Memory registration strategies, by number.
> + * This is part of a kernel / user space API. Do not remove. */
>   enum rpcrdma_memreg {
>   	RPCRDMA_BOUNCEBUFFERS = 0,
>   	RPCRDMA_REGISTER,
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 5b9c104..21b63fe 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -669,9 +669,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
>   	dprintk("RPC:       %s: memory registration strategy is '%s'\n",
>   		__func__, ia->ri_ops->ro_displayname);
>
> -	/* Else will do memory reg/dereg for each chunk */
> -	ia->ri_memreg_strategy = memreg;
> -
>   	rwlock_init(&ia->ri_qplock);
>   	return 0;
>
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index d5aa5b4..b167e44 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -69,7 +69,6 @@ struct rpcrdma_ia {
>   	int			ri_have_dma_lkey;
>   	struct completion	ri_done;
>   	int			ri_async_rc;
> -	enum rpcrdma_memreg	ri_memreg_strategy;
>   	unsigned int		ri_max_frmr_depth;
>   	struct ib_device_attr	ri_devattr;
>   	struct ib_qp_attr	ri_qp_attr;
>

looks good,

Reviewed-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 15/16] xprtrdma: Make rpcrdma_{un}map_one() into inline functions
       [not found]     ` <20150313212853.22783.62285.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-16 12:45       ` Sagi Grimberg
  0 siblings, 0 replies; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-16 12:45 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/13/2015 11:28 PM, Chuck Lever wrote:
> These functions are called in a loop for each page transferred via
> RDMA READ or WRITE. Extract loop invariants and inline them to
> reduce CPU overhead.
>
> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   net/sunrpc/xprtrdma/fmr_ops.c      |   10 ++++++--
>   net/sunrpc/xprtrdma/frwr_ops.c     |   10 ++++++--
>   net/sunrpc/xprtrdma/physical_ops.c |   11 ++++++---
>   net/sunrpc/xprtrdma/verbs.c        |   44 ++++++-----------------------------
>   net/sunrpc/xprtrdma/xprt_rdma.h    |   45 ++++++++++++++++++++++++++++++++++--
>   5 files changed, 73 insertions(+), 47 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index 9c6c2e8..1404f20 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -29,14 +29,16 @@ __fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>   	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>   	struct rpcrdma_mr_seg *seg1 = seg;
>   	struct rpcrdma_mw *mw = seg1->rl_mw;
> +	struct ib_device *device;
>   	int rc, nsegs = seg->mr_nsegs;
>   	LIST_HEAD(l);
>
>   	list_add(&mw->r.fmr->list, &l);
>   	rc = ib_unmap_fmr(&l);
>   	read_lock(&ia->ri_qplock);
> +	device = ia->ri_id->device;
>   	while (seg1->mr_nsegs--)
> -		rpcrdma_unmap_one(ia, seg++);
> +		rpcrdma_unmap_one(device, seg++);
>   	read_unlock(&ia->ri_qplock);
>   	if (rc)
>   		goto out_err;
> @@ -150,6 +152,8 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>   	   int nsegs, bool writing)
>   {
>   	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> +	struct ib_device *device = ia->ri_id->device;
> +	enum dma_data_direction direction = rpcrdma_data_dir(writing);
>   	struct rpcrdma_mr_seg *seg1 = seg;
>   	struct rpcrdma_mw *mw;
>   	u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
> @@ -170,7 +174,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>   	if (nsegs > RPCRDMA_MAX_FMR_SGES)
>   		nsegs = RPCRDMA_MAX_FMR_SGES;
>   	for (i = 0; i < nsegs;) {
> -		rpcrdma_map_one(ia, seg, writing);
> +		rpcrdma_map_one(device, seg, direction);
>   		physaddrs[i] = seg->mr_dma;
>   		len += seg->mr_len;
>   		++seg;
> @@ -197,7 +201,7 @@ out_maperr:
>   		__func__, len, (unsigned long long)seg1->mr_dma,
>   		pageoff, i, rc);
>   	while (i--)
> -		rpcrdma_unmap_one(ia, --seg);
> +		rpcrdma_unmap_one(device, --seg);
>   	rpcrdma_put_mw(r_xprt, mw);
>   	return rc;
>   }
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index d23e064..4494668 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -55,6 +55,7 @@ __frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>   	struct rpcrdma_mr_seg *seg1 = seg;
>   	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>   	struct rpcrdma_mw *mw = seg1->rl_mw;
> +	struct ib_device *device;
>   	struct ib_send_wr invalidate_wr, *bad_wr;
>   	int rc, nsegs = seg->mr_nsegs;
>
> @@ -67,8 +68,9 @@ __frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>   	DECR_CQCOUNT(&r_xprt->rx_ep);
>
>   	read_lock(&ia->ri_qplock);
> +	device = ia->ri_id->device;
>   	while (seg1->mr_nsegs--)
> -		rpcrdma_unmap_one(ia, seg++);
> +		rpcrdma_unmap_one(device, seg++);
>   	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
>   	read_unlock(&ia->ri_qplock);
>   	if (rc)
> @@ -323,6 +325,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>   	    int nsegs, bool writing)
>   {
>   	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> +	struct ib_device *device = ia->ri_id->device;
> +	enum dma_data_direction direction = rpcrdma_data_dir(writing);
>   	struct rpcrdma_mr_seg *seg1 = seg;
>   	struct rpcrdma_frmr *frmr;
>   	struct rpcrdma_mw *mw;
> @@ -351,7 +355,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>   		nsegs = ia->ri_max_frmr_depth;
>   	frmr = &mw->r.frmr;
>   	for (page_no = i = 0; i < nsegs;) {
> -		rpcrdma_map_one(ia, seg, writing);
> +		rpcrdma_map_one(device, seg, direction);
>   		pa = seg->mr_dma;
>   		for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
>   			frmr->fr_pgl->page_list[page_no++] = pa;
> @@ -409,7 +413,7 @@ out_senderr:
>   out_err:
>   	frmr->fr_state = FRMR_IS_INVALID;
>   	while (i--)
> -		rpcrdma_unmap_one(ia, --seg);
> +		rpcrdma_unmap_one(device, --seg);
>   	rpcrdma_put_mw(r_xprt, mw);
>   	return rc;
>   }
> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
> index 0998f4f..9ce7dfc 100644
> --- a/net/sunrpc/xprtrdma/physical_ops.c
> +++ b/net/sunrpc/xprtrdma/physical_ops.c
> @@ -64,7 +64,8 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>   {
>   	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>
> -	rpcrdma_map_one(ia, seg, writing);
> +	rpcrdma_map_one(ia->ri_id->device, seg,
> +			rpcrdma_data_dir(writing));
>   	seg->mr_rkey = ia->ri_bind_mem->rkey;
>   	seg->mr_base = seg->mr_dma;
>   	seg->mr_nsegs = 1;
> @@ -77,10 +78,14 @@ static void
>   physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
>   		  unsigned int count)
>   {
> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>   	unsigned int i;
>
> -	for (i = 0; i < count; i++)
> -		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]);
> +	for (i = 0; i < count; i++) {
> +		read_lock(&ia->ri_qplock);
> +		rpcrdma_unmap_one(ia->ri_id->device, &req->rl_segments[i]);
> +		read_unlock(&ia->ri_qplock);
> +	}
>   }
>
>   static void
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 21b63fe..460c917 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1288,6 +1288,14 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
>    * Wrappers for internal-use kmalloc memory registration, used by buffer code.
>    */
>
> +void
> +rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg)
> +{
> +	dprintk("RPC:       map_one: offset %p iova %llx len %zu\n",
> +		seg->mr_offset,
> +		(unsigned long long)seg->mr_dma, seg->mr_dmalen);
> +}
> +
>   static int
>   rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
>   				struct ib_mr **mrp, struct ib_sge *iov)
> @@ -1413,42 +1421,6 @@ rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
>   }
>
>   /*
> - * Wrappers for chunk registration, shared by read/write chunk code.
> - */
> -
> -void
> -rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, bool writing)
> -{
> -	seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
> -	seg->mr_dmalen = seg->mr_len;
> -	if (seg->mr_page)
> -		seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
> -				seg->mr_page, offset_in_page(seg->mr_offset),
> -				seg->mr_dmalen, seg->mr_dir);
> -	else
> -		seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
> -				seg->mr_offset,
> -				seg->mr_dmalen, seg->mr_dir);
> -	if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
> -		dprintk("RPC:       %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
> -			__func__,
> -			(unsigned long long)seg->mr_dma,
> -			seg->mr_offset, seg->mr_dmalen);
> -	}
> -}
> -
> -void
> -rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
> -{
> -	if (seg->mr_page)
> -		ib_dma_unmap_page(ia->ri_id->device,
> -				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
> -	else
> -		ib_dma_unmap_single(ia->ri_id->device,
> -				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
> -}
> -
> -/*
>    * Prepost any receive buffer, then post send.
>    *
>    * Receive buffer is donated to hardware, reclaimed upon recv completion.
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index b167e44..10c4fa5 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -423,11 +423,52 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
>   			 struct rpcrdma_regbuf *);
>
>   unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
> -void rpcrdma_map_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *, bool);
> -void rpcrdma_unmap_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *);
>   void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
>
>   /*
> + * Wrappers for chunk registration, shared by read/write chunk code.
> + */
> +
> +void rpcrdma_mapping_error(struct rpcrdma_mr_seg *);
> +
> +static inline enum dma_data_direction
> +rpcrdma_data_dir(bool writing)
> +{
> +	return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
> +}
> +
> +static inline void
> +rpcrdma_map_one(struct ib_device *device, struct rpcrdma_mr_seg *seg,
> +		enum dma_data_direction direction)
> +{
> +	seg->mr_dir = direction;
> +	seg->mr_dmalen = seg->mr_len;
> +
> +	if (seg->mr_page)
> +		seg->mr_dma = ib_dma_map_page(device,
> +				seg->mr_page, offset_in_page(seg->mr_offset),
> +				seg->mr_dmalen, seg->mr_dir);
> +	else
> +		seg->mr_dma = ib_dma_map_single(device,
> +				seg->mr_offset,
> +				seg->mr_dmalen, seg->mr_dir);
> +
> +	if (ib_dma_mapping_error(device, seg->mr_dma))
> +		rpcrdma_mapping_error(seg);
> +}
> +
> +static inline void
> +rpcrdma_unmap_one(struct ib_device *device, struct rpcrdma_mr_seg *seg)
> +{
> +	if (seg->mr_page)
> +		ib_dma_unmap_page(device,
> +				  seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
> +	else
> +		ib_dma_unmap_single(device,
> +				    seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
> +}
> +
> +/*
>    * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
>    */
>   void rpcrdma_connect_worker(struct work_struct *);
>

Looks good,

Reviewed-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 01/16] xprtrdma: Display IPv6 addresses and port numbers correctly
       [not found]         ` <5504F37A.20803-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2015-03-16 16:34           ` Chuck Lever
  0 siblings, 0 replies; 52+ messages in thread
From: Chuck Lever @ 2015-03-16 16:34 UTC (permalink / raw)
  To: Sagi Grimberg; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA


On Mar 14, 2015, at 7:50 PM, Sagi Grimberg <sagig-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote:

> On 3/13/2015 11:26 PM, Chuck Lever wrote:
>> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> ---
>>  net/sunrpc/xprtrdma/transport.c |   47 ++++++++++++++++++++++++++++++++-------
>>  net/sunrpc/xprtrdma/verbs.c     |   21 +++++++----------
>>  2 files changed, 47 insertions(+), 21 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
>> index 2e192ba..26a62e7 100644
>> --- a/net/sunrpc/xprtrdma/transport.c
>> +++ b/net/sunrpc/xprtrdma/transport.c
>> @@ -157,12 +157,47 @@ static struct ctl_table sunrpc_table[] = {
>>  static struct rpc_xprt_ops xprt_rdma_procs;	/* forward reference */
>> 
>>  static void
>> +xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
>> +{
>> +	struct sockaddr_in *sin = (struct sockaddr_in *)sap;
>> +	char buf[20];
>> +
>> +	snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
>> +	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
>> +
>> +	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
>> +}
>> +
>> +static void
>> +xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
>> +{
>> +	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
>> +	char buf[40];
>> +
>> +	snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
> 
> Don't you prefer %pIS can handle both IPv4/v6 instead of two different
> routines? or maybe even %pISp can be better?
> 
>> +	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);

The string does not contain a presentation address, which is what
we’d get from %pIS. These are non-standard hexadecimal representations
of the addresses. So, need separate functions for IPv4 and IPv6.

>> +
>> +	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma6";
> 
> Was RPC_DISPLAY_NETID "rdma6" before this patch?

Section 12 of RFC 5666 defines NETIDs for RPC/RDMA with IPv4 and with
IPv6 addressing. I can cite that section in a comment.

Actually, I should define these in include/linux/sunrpc/msg_prot.h
with the other netids, and use the macro instead.

> 
>> +}
>> +
>> +static void
>>  xprt_rdma_format_addresses(struct rpc_xprt *xprt)
>>  {
>>  	struct sockaddr *sap = (struct sockaddr *)
>>  					&rpcx_to_rdmad(xprt).addr;
>> -	struct sockaddr_in *sin = (struct sockaddr_in *)sap;
>> -	char buf[64];
>> +	char buf[128];
>> +
>> +	switch (sap->sa_family) {
>> +	case AF_INET:
>> +		xprt_rdma_format_addresses4(xprt, sap);
>> +		break;
>> +	case AF_INET6:
>> +		xprt_rdma_format_addresses6(xprt, sap);
>> +		break;
>> +	default:
>> +		pr_err("rpcrdma: Unrecognized address family\n");
>> +		return;
>> +	}
>> 
>>  	(void)rpc_ntop(sap, buf, sizeof(buf));
>>  	xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
>> @@ -170,16 +205,10 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
>>  	snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
>>  	xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
>> 
>> -	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
>> -
>> -	snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
>> -	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
>> -
>>  	snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
>>  	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
>> 
>> -	/* netid */
>> -	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
>> +	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
>>  }
>> 
>>  static void
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>> index 124676c..1aa55b7 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -50,6 +50,7 @@
>>  #include <linux/interrupt.h>
>>  #include <linux/slab.h>
>>  #include <linux/prefetch.h>
>> +#include <linux/sunrpc/addr.h>
>>  #include <asm/bitops.h>
>> 
>>  #include "xprt_rdma.h"
>> @@ -424,7 +425,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
>>  	struct rpcrdma_ia *ia = &xprt->rx_ia;
>>  	struct rpcrdma_ep *ep = &xprt->rx_ep;
>>  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
>> -	struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
>> +	struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
>>  #endif
>>  	struct ib_qp_attr *attr = &ia->ri_qp_attr;
>>  	struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
>> @@ -480,9 +481,8 @@ connected:
>>  		wake_up_all(&ep->rep_connect_wait);
>>  		/*FALLTHROUGH*/
>>  	default:
>> -		dprintk("RPC:       %s: %pI4:%u (ep 0x%p): %s\n",
>> -			__func__, &addr->sin_addr.s_addr,
>> -			ntohs(addr->sin_port), ep,
>> +		dprintk("RPC:       %s: %pIS:%u (ep 0x%p): %s\n",
>> +			__func__, sap, rpc_get_port(sap), ep,
>>  			CONNECTION_MSG(event->event));
>>  		break;
>>  	}
>> @@ -491,19 +491,16 @@ connected:
>>  	if (connstate == 1) {
>>  		int ird = attr->max_dest_rd_atomic;
>>  		int tird = ep->rep_remote_cma.responder_resources;
>> -		printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
>> -			"on %s, memreg %d slots %d ird %d%s\n",
>> -			&addr->sin_addr.s_addr,
>> -			ntohs(addr->sin_port),
>> +
>> +		pr_info("rpcrdma: connection to %pIS:%u on %s, memreg %d slots %d ird %d%s\n",
>> +			sap, rpc_get_port(sap),
>>  			ia->ri_id->device->name,
>>  			ia->ri_memreg_strategy,
>>  			xprt->rx_buf.rb_max_requests,
>>  			ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
>>  	} else if (connstate < 0) {
>> -		printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
>> -			&addr->sin_addr.s_addr,
>> -			ntohs(addr->sin_port),
>> -			connstate);
>> +		pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
>> +			sap, rpc_get_port(sap), connstate);
>>  	}
>>  #endif
>> 
>> 
> 
> Other than the minors above, Looks good.
> 
> Reviewed-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 05/16] xprtrdma: Add a "register_external" op for each memreg mode
       [not found]         ` <5506B036.1040905-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2015-03-16 16:48           ` Chuck Lever
       [not found]             ` <982A021D-1B85-4AAF-89A3-302A21CF2B36-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-16 16:48 UTC (permalink / raw)
  To: Sagi Grimberg; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA


On Mar 16, 2015, at 3:28 AM, Sagi Grimberg <sagig-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote:

> On 3/13/2015 11:27 PM, Chuck Lever wrote:
>> There is very little common processing among the different external
>> memory registration functions. Have rpcrdma_create_chunks() call
>> the registration method directly. This removes a stack frame and a
>> switch statement from the external registration path.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> ---
>>  net/sunrpc/xprtrdma/fmr_ops.c      |   51 +++++++++++
>>  net/sunrpc/xprtrdma/frwr_ops.c     |   88 ++++++++++++++++++
>>  net/sunrpc/xprtrdma/physical_ops.c |   17 ++++
>>  net/sunrpc/xprtrdma/rpc_rdma.c     |    5 +
>>  net/sunrpc/xprtrdma/verbs.c        |  172 +-----------------------------------
>>  net/sunrpc/xprtrdma/xprt_rdma.h    |    6 +
>>  6 files changed, 166 insertions(+), 173 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
>> index eec2660..45fb646 100644
>> --- a/net/sunrpc/xprtrdma/fmr_ops.c
>> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
>> @@ -29,7 +29,58 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>>  		     rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES);
>>  }
>> 
>> +/* Use the ib_map_phys_fmr() verb to register a memory region
>> + * for remote access via RDMA READ or RDMA WRITE.
>> + */
>> +static int
>> +fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>> +	   int nsegs, bool writing)
>> +{
>> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> +	struct rpcrdma_mr_seg *seg1 = seg;
>> +	struct rpcrdma_mw *mw = seg1->rl_mw;
>> +	u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
>> +	int len, pageoff, i, rc;
>> +
>> +	pageoff = offset_in_page(seg1->mr_offset);
>> +	seg1->mr_offset -= pageoff;	/* start of page */
>> +	seg1->mr_len += pageoff;
>> +	len = -pageoff;
>> +	if (nsegs > RPCRDMA_MAX_FMR_SGES)
>> +		nsegs = RPCRDMA_MAX_FMR_SGES;
>> +	for (i = 0; i < nsegs;) {
>> +		rpcrdma_map_one(ia, seg, writing);
>> +		physaddrs[i] = seg->mr_dma;
>> +		len += seg->mr_len;
>> +		++seg;
>> +		++i;
>> +		/* Check for holes */
>> +		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
>> +		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
>> +			break;
>> +	}
>> +
>> +	rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma);
>> +	if (rc)
>> +		goto out_maperr;
>> +
>> +	seg1->mr_rkey = mw->r.fmr->rkey;
>> +	seg1->mr_base = seg1->mr_dma + pageoff;
>> +	seg1->mr_nsegs = i;
>> +	seg1->mr_len = len;
>> +	return i;
>> +
>> +out_maperr:
>> +	dprintk("RPC:       %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
>> +		__func__, len, (unsigned long long)seg1->mr_dma,
>> +		pageoff, i, rc);
>> +	while (i--)
>> +		rpcrdma_unmap_one(ia, --seg);
>> +	return rc;
>> +}
>> +
>>  const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>> +	.ro_map				= fmr_op_map,
>>  	.ro_maxpages			= fmr_op_maxpages,
>>  	.ro_displayname			= "fmr",
>>  };
>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
>> index 73a5ac8..2b5ccb0 100644
>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>> @@ -29,7 +29,95 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>>  		     rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
>>  }
>> 
>> +/* Post a FAST_REG Work Request to register a memory region
>> + * for remote access via RDMA READ or RDMA WRITE.
>> + */
>> +static int
>> +frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>> +	    int nsegs, bool writing)
>> +{
>> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> +	struct rpcrdma_mr_seg *seg1 = seg;
>> +	struct rpcrdma_mw *mw = seg1->rl_mw;
>> +	struct rpcrdma_frmr *frmr = &mw->r.frmr;
>> +	struct ib_mr *mr = frmr->fr_mr;
>> +	struct ib_send_wr fastreg_wr, *bad_wr;
>> +	u8 key;
>> +	int len, pageoff;
>> +	int i, rc;
>> +	int seg_len;
>> +	u64 pa;
>> +	int page_no;
>> +
>> +	pageoff = offset_in_page(seg1->mr_offset);
>> +	seg1->mr_offset -= pageoff;	/* start of page */
>> +	seg1->mr_len += pageoff;
>> +	len = -pageoff;
>> +	if (nsegs > ia->ri_max_frmr_depth)
>> +		nsegs = ia->ri_max_frmr_depth;
>> +	for (page_no = i = 0; i < nsegs;) {
>> +		rpcrdma_map_one(ia, seg, writing);
>> +		pa = seg->mr_dma;
>> +		for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
>> +			frmr->fr_pgl->page_list[page_no++] = pa;
>> +			pa += PAGE_SIZE;
>> +		}
>> +		len += seg->mr_len;
>> +		++seg;
>> +		++i;
>> +		/* Check for holes */
>> +		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
>> +		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
>> +			break;
>> +	}
>> +	dprintk("RPC:       %s: Using frmr %p to map %d segments\n",
>> +		__func__, mw, i);
>> +
>> +	frmr->fr_state = FRMR_IS_VALID;
>> +
>> +	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
>> +	fastreg_wr.wr_id = (unsigned long)(void *)mw;
>> +	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
>> +	fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
>> +	fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
>> +	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
>> +	fastreg_wr.wr.fast_reg.page_list_len = page_no;
>> +	fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
> 
> Umm, is this necessarily true? will the region length be full pages?
> Why don't you assign fast_reg.length = len?
> I might be missing something here…

Don’t know. This goes back to when FRWR was introduced in this code,
with commit 3197d309 in 2008.


> 
>> +	if (fastreg_wr.wr.fast_reg.length < len) {
>> +		rc = -EIO;
>> +		goto out_err;
>> +	}
>> +	fastreg_wr.wr.fast_reg.access_flags = writing ?
>> +				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
>> +				IB_ACCESS_REMOTE_READ;
>> +	key = (u8)(mr->rkey & 0x000000FF);
>> +	ib_update_fast_reg_key(mr, ++key);
>> +	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
>> +
>> +	DECR_CQCOUNT(&r_xprt->rx_ep);
>> +	rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
>> +	if (rc)
>> +		goto out_senderr;
>> +
>> +	seg1->mr_rkey = mr->rkey;
>> +	seg1->mr_base = seg1->mr_dma + pageoff;
>> +	seg1->mr_nsegs = i;
>> +	seg1->mr_len = len;
>> +	return i;
>> +
>> +out_senderr:
>> +	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
>> +	ib_update_fast_reg_key(mr, --key);
>> +
>> +out_err:
>> +	frmr->fr_state = FRMR_IS_INVALID;
>> +	while (i--)
>> +		rpcrdma_unmap_one(ia, --seg);
>> +	return rc;
>> +}
>> +
>>  const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>> +	.ro_map				= frwr_op_map,
>>  	.ro_maxpages			= frwr_op_maxpages,
>>  	.ro_displayname			= "frwr",
>>  };
>> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
>> index 28ade19..5a284ee 100644
>> --- a/net/sunrpc/xprtrdma/physical_ops.c
>> +++ b/net/sunrpc/xprtrdma/physical_ops.c
>> @@ -28,7 +28,24 @@ physical_op_maxpages(struct rpcrdma_xprt *r_xprt)
>>  		     rpcrdma_max_segments(r_xprt));
>>  }
>> 
>> +/* The client's physical memory is already exposed for
>> + * remote access via RDMA READ or RDMA WRITE.
>> + */
>> +static int
>> +physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>> +		int nsegs, bool writing)
>> +{
>> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> +
>> +	rpcrdma_map_one(ia, seg, writing);
>> +	seg->mr_rkey = ia->ri_bind_mem->rkey;
>> +	seg->mr_base = seg->mr_dma;
>> +	seg->mr_nsegs = 1;
>> +	return 1;
>> +}
>> +
>>  const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>> +	.ro_map				= physical_op_map,
>>  	.ro_maxpages			= physical_op_maxpages,
>>  	.ro_displayname			= "physical",
>>  };
>> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
>> index 41456d9..6ab1d03 100644
>> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
>> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
>> @@ -187,6 +187,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
>>  	struct rpcrdma_write_array *warray = NULL;
>>  	struct rpcrdma_write_chunk *cur_wchunk = NULL;
>>  	__be32 *iptr = headerp->rm_body.rm_chunks;
>> +	int (*map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool);
>> 
>>  	if (type == rpcrdma_readch || type == rpcrdma_areadch) {
>>  		/* a read chunk - server will RDMA Read our memory */
>> @@ -209,9 +210,9 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
>>  	if (nsegs < 0)
>>  		return nsegs;
>> 
>> +	map = r_xprt->rx_ia.ri_ops->ro_map;
>>  	do {
>> -		n = rpcrdma_register_external(seg, nsegs,
>> -						cur_wchunk != NULL, r_xprt);
>> +		n = map(r_xprt, seg, nsegs, cur_wchunk != NULL);
>>  		if (n <= 0)
>>  			goto out;
>>  		if (cur_rchunk) {	/* read */
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>> index 837d4ea..851ed97 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -1857,8 +1857,8 @@ rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
>>   * Wrappers for chunk registration, shared by read/write chunk code.
>>   */
>> 
>> -static void
>> -rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
>> +void
>> +rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, bool writing)
>>  {
>>  	seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
>>  	seg->mr_dmalen = seg->mr_len;
>> @@ -1878,7 +1878,7 @@ rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
>>  	}
>>  }
>> 
>> -static void
>> +void
>>  rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
>>  {
>>  	if (seg->mr_page)
>> @@ -1890,93 +1890,6 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
>>  }
>> 
>>  static int
>> -rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
>> -			int *nsegs, int writing, struct rpcrdma_ia *ia,
>> -			struct rpcrdma_xprt *r_xprt)
>> -{
>> -	struct rpcrdma_mr_seg *seg1 = seg;
>> -	struct rpcrdma_mw *mw = seg1->rl_mw;
>> -	struct rpcrdma_frmr *frmr = &mw->r.frmr;
>> -	struct ib_mr *mr = frmr->fr_mr;
>> -	struct ib_send_wr fastreg_wr, *bad_wr;
>> -	u8 key;
>> -	int len, pageoff;
>> -	int i, rc;
>> -	int seg_len;
>> -	u64 pa;
>> -	int page_no;
>> -
>> -	pageoff = offset_in_page(seg1->mr_offset);
>> -	seg1->mr_offset -= pageoff;	/* start of page */
>> -	seg1->mr_len += pageoff;
>> -	len = -pageoff;
>> -	if (*nsegs > ia->ri_max_frmr_depth)
>> -		*nsegs = ia->ri_max_frmr_depth;
>> -	for (page_no = i = 0; i < *nsegs;) {
>> -		rpcrdma_map_one(ia, seg, writing);
>> -		pa = seg->mr_dma;
>> -		for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
>> -			frmr->fr_pgl->page_list[page_no++] = pa;
>> -			pa += PAGE_SIZE;
>> -		}
>> -		len += seg->mr_len;
>> -		++seg;
>> -		++i;
>> -		/* Check for holes */
>> -		if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
>> -		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
>> -			break;
>> -	}
>> -	dprintk("RPC:       %s: Using frmr %p to map %d segments\n",
>> -		__func__, mw, i);
>> -
>> -	frmr->fr_state = FRMR_IS_VALID;
>> -
>> -	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
>> -	fastreg_wr.wr_id = (unsigned long)(void *)mw;
>> -	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
>> -	fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
>> -	fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
>> -	fastreg_wr.wr.fast_reg.page_list_len = page_no;
>> -	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
>> -	fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
>> -	if (fastreg_wr.wr.fast_reg.length < len) {
>> -		rc = -EIO;
>> -		goto out_err;
>> -	}
>> -
>> -	/* Bump the key */
>> -	key = (u8)(mr->rkey & 0x000000FF);
>> -	ib_update_fast_reg_key(mr, ++key);
>> -
>> -	fastreg_wr.wr.fast_reg.access_flags = (writing ?
>> -				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
>> -				IB_ACCESS_REMOTE_READ);
>> -	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
>> -	DECR_CQCOUNT(&r_xprt->rx_ep);
>> -
>> -	rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
>> -	if (rc) {
>> -		dprintk("RPC:       %s: failed ib_post_send for register,"
>> -			" status %i\n", __func__, rc);
>> -		ib_update_fast_reg_key(mr, --key);
>> -		goto out_err;
>> -	} else {
>> -		seg1->mr_rkey = mr->rkey;
>> -		seg1->mr_base = seg1->mr_dma + pageoff;
>> -		seg1->mr_nsegs = i;
>> -		seg1->mr_len = len;
>> -	}
>> -	*nsegs = i;
>> -	return 0;
>> -out_err:
>> -	frmr->fr_state = FRMR_IS_INVALID;
>> -	while (i--)
>> -		rpcrdma_unmap_one(ia, --seg);
>> -	return rc;
>> -}
>> -
>> -static int
>>  rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
>>  			struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
>>  {
>> @@ -2007,49 +1920,6 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
>>  }
>> 
>>  static int
>> -rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
>> -			int *nsegs, int writing, struct rpcrdma_ia *ia)
>> -{
>> -	struct rpcrdma_mr_seg *seg1 = seg;
>> -	u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
>> -	int len, pageoff, i, rc;
>> -
>> -	pageoff = offset_in_page(seg1->mr_offset);
>> -	seg1->mr_offset -= pageoff;	/* start of page */
>> -	seg1->mr_len += pageoff;
>> -	len = -pageoff;
>> -	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
>> -		*nsegs = RPCRDMA_MAX_DATA_SEGS;
>> -	for (i = 0; i < *nsegs;) {
>> -		rpcrdma_map_one(ia, seg, writing);
>> -		physaddrs[i] = seg->mr_dma;
>> -		len += seg->mr_len;
>> -		++seg;
>> -		++i;
>> -		/* Check for holes */
>> -		if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
>> -		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
>> -			break;
>> -	}
>> -	rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma);
>> -	if (rc) {
>> -		dprintk("RPC:       %s: failed ib_map_phys_fmr "
>> -			"%u@0x%llx+%i (%d)... status %i\n", __func__,
>> -			len, (unsigned long long)seg1->mr_dma,
>> -			pageoff, i, rc);
>> -		while (i--)
>> -			rpcrdma_unmap_one(ia, --seg);
>> -	} else {
>> -		seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey;
>> -		seg1->mr_base = seg1->mr_dma + pageoff;
>> -		seg1->mr_nsegs = i;
>> -		seg1->mr_len = len;
>> -	}
>> -	*nsegs = i;
>> -	return rc;
>> -}
>> -
>> -static int
>>  rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
>>  			struct rpcrdma_ia *ia)
>>  {
>> @@ -2070,42 +1940,6 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
>>  }
>> 
>>  int
>> -rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
>> -			int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
>> -{
>> -	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> -	int rc = 0;
>> -
>> -	switch (ia->ri_memreg_strategy) {
>> -
>> -	case RPCRDMA_ALLPHYSICAL:
>> -		rpcrdma_map_one(ia, seg, writing);
>> -		seg->mr_rkey = ia->ri_bind_mem->rkey;
>> -		seg->mr_base = seg->mr_dma;
>> -		seg->mr_nsegs = 1;
>> -		nsegs = 1;
>> -		break;
>> -
>> -	/* Registration using frmr registration */
>> -	case RPCRDMA_FRMR:
>> -		rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
>> -		break;
>> -
>> -	/* Registration using fmr memory registration */
>> -	case RPCRDMA_MTHCAFMR:
>> -		rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
>> -		break;
>> -
>> -	default:
>> -		return -EIO;
>> -	}
>> -	if (rc)
>> -		return rc;
>> -
>> -	return nsegs;
>> -}
>> -
>> -int
>>  rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
>>  		struct rpcrdma_xprt *r_xprt)
>>  {
>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
>> index 59e627e..7bf077b 100644
>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>> @@ -336,6 +336,8 @@ struct rpcrdma_stats {
>>   */
>>  struct rpcrdma_xprt;
>>  struct rpcrdma_memreg_ops {
>> +	int		(*ro_map)(struct rpcrdma_xprt *,
>> +				  struct rpcrdma_mr_seg *, int, bool);
>>  	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>>  	const char	*ro_displayname;
>>  };
>> @@ -403,8 +405,6 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);
>>  void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
>>  void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
>> 
>> -int rpcrdma_register_external(struct rpcrdma_mr_seg *,
>> -				int, int, struct rpcrdma_xprt *);
>>  int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
>>  				struct rpcrdma_xprt *);
>> 
>> @@ -414,6 +414,8 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
>>  			 struct rpcrdma_regbuf *);
>> 
>>  unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
>> +void rpcrdma_map_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *, bool);
>> +void rpcrdma_unmap_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *);
>> 
>>  /*
>>   * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 06/16] xprtrdma: Add a "deregister_external" op for each memreg mode
       [not found]         ` <5506B19F.80105-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2015-03-16 16:57           ` Chuck Lever
  0 siblings, 0 replies; 52+ messages in thread
From: Chuck Lever @ 2015-03-16 16:57 UTC (permalink / raw)
  To: Sagi Grimberg; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA


On Mar 16, 2015, at 3:34 AM, Sagi Grimberg <sagig-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote:

> On 3/13/2015 11:27 PM, Chuck Lever wrote:
>> There is very little common processing among the different external
>> memory deregistration functions.
>> 
>> In addition, instead of calling the deregistration function for each
>> segment, have one call release all segments for a request. This makes
>> the API a little asymmetrical, but a hair faster.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> ---
>>  net/sunrpc/xprtrdma/fmr_ops.c      |   37 ++++++++++++++++
>>  net/sunrpc/xprtrdma/frwr_ops.c     |   46 ++++++++++++++++++++
>>  net/sunrpc/xprtrdma/physical_ops.c |   13 ++++++
>>  net/sunrpc/xprtrdma/rpc_rdma.c     |    7 +--
>>  net/sunrpc/xprtrdma/transport.c    |    8 +---
>>  net/sunrpc/xprtrdma/verbs.c        |   81 ------------------------------------
>>  net/sunrpc/xprtrdma/xprt_rdma.h    |    5 +-
>>  7 files changed, 103 insertions(+), 94 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
>> index 45fb646..9b983b4 100644
>> --- a/net/sunrpc/xprtrdma/fmr_ops.c
>> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
>> @@ -20,6 +20,32 @@
>>  /* Maximum scatter/gather per FMR */
>>  #define RPCRDMA_MAX_FMR_SGES	(64)
>> 
>> +/* Use the ib_unmap_fmr() verb to prevent further remote
>> + * access via RDMA READ or RDMA WRITE.
>> + */
>> +static int
>> +__fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>> +{
>> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> +	struct rpcrdma_mr_seg *seg1 = seg;
>> +	int rc, nsegs = seg->mr_nsegs;
>> +	LIST_HEAD(l);
>> +
>> +	list_add(&seg1->rl_mw->r.fmr->list, &l);
>> +	rc = ib_unmap_fmr(&l);
>> +	read_lock(&ia->ri_qplock);
>> +	while (seg1->mr_nsegs--)
>> +		rpcrdma_unmap_one(ia, seg++);
>> +	read_unlock(&ia->ri_qplock);
> 
> So I know you are just moving things around here, but can you explain
> why the read_lock is taken here? Why do you need this protection?

The deregistration method is not serialized with transport reconnect.
That means ->ri_id and ->qp can be NULL in some corner cases when the
upper layer invokes this code path.

rpcrdma_unmap_one() dereferences ->ri_id, so it has to be protected.

The expedient fix in commit 73806c88 was to ensure ->qp was always
able to be dereferenced by adding the rwlock here.

The longer term fix is what we discussed last week: use a work queue
sentinel to co-ordinate the destruction of the QP with callers like
this one. That would also protect our upcall code, and we could remove
the rwlock.

> 
>> +	if (rc)
>> +		goto out_err;
>> +	return nsegs;
>> +
>> +out_err:
>> +	dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
>> +	return nsegs;
>> +}
>> +
>>  /* FMR mode conveys up to 64 pages of payload per chunk segment.
>>   */
>>  static size_t
>> @@ -79,8 +105,19 @@ out_maperr:
>>  	return rc;
>>  }
>> 
>> +static void
>> +fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
>> +	     unsigned int count)
>> +{
>> +	unsigned int i;
>> +
>> +	for (i = 0; count--;)
>> +		i += __fmr_unmap(r_xprt, &req->rl_segments[i]);
>> +}
>> +
>>  const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>>  	.ro_map				= fmr_op_map,
>> +	.ro_unmap			= fmr_op_unmap,
>>  	.ro_maxpages			= fmr_op_maxpages,
>>  	.ro_displayname			= "fmr",
>>  };
>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
>> index 2b5ccb0..05b5761 100644
>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>> @@ -17,6 +17,41 @@
>>  # define RPCDBG_FACILITY	RPCDBG_TRANS
>>  #endif
>> 
>> +/* Post a LOCAL_INV Work Request to prevent further remote access
>> + * via RDMA READ or RDMA WRITE.
>> + */
>> +static int
>> +__frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>> +{
>> +	struct rpcrdma_mr_seg *seg1 = seg;
>> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> +	struct ib_send_wr invalidate_wr, *bad_wr;
>> +	int rc, nsegs = seg->mr_nsegs;
>> +
>> +	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
>> +
>> +	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
>> +	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
>> +	invalidate_wr.opcode = IB_WR_LOCAL_INV;
>> +	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
>> +	DECR_CQCOUNT(&r_xprt->rx_ep);
>> +
>> +	read_lock(&ia->ri_qplock);
>> +	while (seg1->mr_nsegs--)
>> +		rpcrdma_unmap_one(ia, seg++);
>> +	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
>> +	read_unlock(&ia->ri_qplock);
> 
> Same question here, you are taking the lock but you don't really
> check for anything and just post. Again, I might be missing something.
> 
>> +	if (rc)
>> +		goto out_err;
>> +	return nsegs;
>> +
>> +out_err:
>> +	/* Force rpcrdma_buffer_get() to retry */
>> +	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
>> +	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
>> +	return nsegs;
>> +}
>> +
>>  /* FRWR mode conveys a list of pages per chunk segment. The
>>   * maximum length of that list is the FRWR page list depth.
>>   */
>> @@ -116,8 +151,19 @@ out_err:
>>  	return rc;
>>  }
>> 
>> +static void
>> +frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
>> +	      unsigned int count)
>> +{
>> +	unsigned int i;
>> +
>> +	for (i = 0; count--;)
>> +		i += __frwr_unmap(r_xprt, &req->rl_segments[i]);
>> +}
>> +
>>  const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>>  	.ro_map				= frwr_op_map,
>> +	.ro_unmap			= frwr_op_unmap,
>>  	.ro_maxpages			= frwr_op_maxpages,
>>  	.ro_displayname			= "frwr",
>>  };
>> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
>> index 5a284ee..f2c15be 100644
>> --- a/net/sunrpc/xprtrdma/physical_ops.c
>> +++ b/net/sunrpc/xprtrdma/physical_ops.c
>> @@ -44,8 +44,21 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>>  	return 1;
>>  }
>> 
>> +/* Unmap a memory region, but leave it registered.
>> + */
>> +static void
>> +physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
>> +		  unsigned int count)
>> +{
>> +	unsigned int i;
>> +
>> +	for (i = 0; i < count; i++)
>> +		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]);
>> +}
>> +
>>  const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>>  	.ro_map				= physical_op_map,
>> +	.ro_unmap			= physical_op_unmap,
>>  	.ro_maxpages			= physical_op_maxpages,
>>  	.ro_displayname			= "physical",
>>  };
>> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
>> index 6ab1d03..7b51d9d 100644
>> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
>> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
>> @@ -284,11 +284,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
>>  	return (unsigned char *)iptr - (unsigned char *)headerp;
>> 
>>  out:
>> -	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
>> -		for (pos = 0; nchunks--;)
>> -			pos += rpcrdma_deregister_external(
>> -					&req->rl_segments[pos], r_xprt);
>> -	}
>> +	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR)
>> +		r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, nchunks);
>>  	return n;
>>  }
>> 
>> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
>> index 9a9da40..c484671 100644
>> --- a/net/sunrpc/xprtrdma/transport.c
>> +++ b/net/sunrpc/xprtrdma/transport.c
>> @@ -571,7 +571,6 @@ xprt_rdma_free(void *buffer)
>>  	struct rpcrdma_req *req;
>>  	struct rpcrdma_xprt *r_xprt;
>>  	struct rpcrdma_regbuf *rb;
>> -	int i;
>> 
>>  	if (buffer == NULL)
>>  		return;
>> @@ -582,11 +581,8 @@ xprt_rdma_free(void *buffer)
>> 
>>  	dprintk("RPC:       %s: called on 0x%p\n", __func__, req->rl_reply);
>> 
>> -	for (i = 0; req->rl_nchunks;) {
>> -		--req->rl_nchunks;
>> -		i += rpcrdma_deregister_external(
>> -			&req->rl_segments[i], r_xprt);
>> -	}
>> +	r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, req->rl_nchunks);
>> +	req->rl_nchunks = 0;
>> 
>>  	rpcrdma_buffer_put(req);
>>  }
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>> index 851ed97..a1621fd 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -1509,7 +1509,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
>>  	}
>>  }
>> 
>> -/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
>> +/* rpcrdma_unmap_one() was already done during deregistration.
>>   * Redo only the ib_post_send().
>>   */
>>  static void
>> @@ -1889,85 +1889,6 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
>>  				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
>>  }
>> 
>> -static int
>> -rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
>> -			struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
>> -{
>> -	struct rpcrdma_mr_seg *seg1 = seg;
>> -	struct ib_send_wr invalidate_wr, *bad_wr;
>> -	int rc;
>> -
>> -	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
>> -
>> -	memset(&invalidate_wr, 0, sizeof invalidate_wr);
>> -	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
>> -	invalidate_wr.opcode = IB_WR_LOCAL_INV;
>> -	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
>> -	DECR_CQCOUNT(&r_xprt->rx_ep);
>> -
>> -	read_lock(&ia->ri_qplock);
>> -	while (seg1->mr_nsegs--)
>> -		rpcrdma_unmap_one(ia, seg++);
>> -	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
>> -	read_unlock(&ia->ri_qplock);
>> -	if (rc) {
>> -		/* Force rpcrdma_buffer_get() to retry */
>> -		seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
>> -		dprintk("RPC:       %s: failed ib_post_send for invalidate,"
>> -			" status %i\n", __func__, rc);
>> -	}
>> -	return rc;
>> -}
>> -
>> -static int
>> -rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
>> -			struct rpcrdma_ia *ia)
>> -{
>> -	struct rpcrdma_mr_seg *seg1 = seg;
>> -	LIST_HEAD(l);
>> -	int rc;
>> -
>> -	list_add(&seg1->rl_mw->r.fmr->list, &l);
>> -	rc = ib_unmap_fmr(&l);
>> -	read_lock(&ia->ri_qplock);
>> -	while (seg1->mr_nsegs--)
>> -		rpcrdma_unmap_one(ia, seg++);
>> -	read_unlock(&ia->ri_qplock);
>> -	if (rc)
>> -		dprintk("RPC:       %s: failed ib_unmap_fmr,"
>> -			" status %i\n", __func__, rc);
>> -	return rc;
>> -}
>> -
>> -int
>> -rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
>> -		struct rpcrdma_xprt *r_xprt)
>> -{
>> -	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> -	int nsegs = seg->mr_nsegs, rc;
>> -
>> -	switch (ia->ri_memreg_strategy) {
>> -
>> -	case RPCRDMA_ALLPHYSICAL:
>> -		read_lock(&ia->ri_qplock);
>> -		rpcrdma_unmap_one(ia, seg);
>> -		read_unlock(&ia->ri_qplock);
>> -		break;
>> -
>> -	case RPCRDMA_FRMR:
>> -		rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
>> -		break;
>> -
>> -	case RPCRDMA_MTHCAFMR:
>> -		rc = rpcrdma_deregister_fmr_external(seg, ia);
>> -		break;
>> -
>> -	default:
>> -		break;
>> -	}
>> -	return nsegs;
>> -}
>> -
>>  /*
>>   * Prepost any receive buffer, then post send.
>>   *
>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
>> index 7bf077b..3aabbb2 100644
>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>> @@ -338,6 +338,8 @@ struct rpcrdma_xprt;
>>  struct rpcrdma_memreg_ops {
>>  	int		(*ro_map)(struct rpcrdma_xprt *,
>>  				  struct rpcrdma_mr_seg *, int, bool);
>> +	void		(*ro_unmap)(struct rpcrdma_xprt *,
>> +				    struct rpcrdma_req *, unsigned int);
>>  	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>>  	const char	*ro_displayname;
>>  };
>> @@ -405,9 +407,6 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);
>>  void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
>>  void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
>> 
>> -int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
>> -				struct rpcrdma_xprt *);
>> -
>>  struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
>>  					    size_t, gfp_t);
>>  void rpcrdma_free_regbuf(struct rpcrdma_ia *,
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> 
> 

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 09/16] xprtrdma: Add "destroy MRs" memreg op
       [not found]         ` <5506B48F.6050902-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2015-03-16 16:59           ` Chuck Lever
  0 siblings, 0 replies; 52+ messages in thread
From: Chuck Lever @ 2015-03-16 16:59 UTC (permalink / raw)
  To: Sagi Grimberg; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA


On Mar 16, 2015, at 3:46 AM, Sagi Grimberg <sagig-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote:

> On 3/13/2015 11:27 PM, Chuck Lever wrote:
>> Memory Region objects associated with a transport instance are
>> destroyed before the instance is shutdown and destroyed.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> ---
>>  net/sunrpc/xprtrdma/fmr_ops.c      |   21 +++++++++++++++
>>  net/sunrpc/xprtrdma/frwr_ops.c     |   17 ++++++++++++
>>  net/sunrpc/xprtrdma/physical_ops.c |    6 ++++
>>  net/sunrpc/xprtrdma/verbs.c        |   52 +-----------------------------------
>>  net/sunrpc/xprtrdma/xprt_rdma.h    |    1 +
>>  5 files changed, 46 insertions(+), 51 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
>> index 1ccb3de..3115e4b 100644
>> --- a/net/sunrpc/xprtrdma/fmr_ops.c
>> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
>> @@ -178,11 +178,32 @@ fmr_op_reset(struct rpcrdma_xprt *r_xprt)
>>  			__func__, rc);
>>  }
>> 
>> +static void
>> +fmr_op_destroy(struct rpcrdma_buffer *buf)
>> +{
>> +	struct rpcrdma_mw *r;
>> +	int rc;
>> +
>> +	while (!list_empty(&buf->rb_all)) {
>> +		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
>> +		list_del(&r->mw_all);
>> +		list_del(&r->mw_list);
> 
> Again, I understand this patch is just moving code, but is it
> guaranteed that mr_list is in rb_mws at this point?

Good call, there probably is no such guarantee in the current code base.
Since the transport is being destroyed anyway, I think I can just remove
that list_del(&r->mw_list).


> 
>> +
>> +		rc = ib_dealloc_fmr(r->r.fmr);
>> +		if (rc)
>> +			dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
>> +				__func__, rc);
>> +
>> +		kfree(r);
>> +	}
>> +}
>> +
>>  const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>>  	.ro_map				= fmr_op_map,
>>  	.ro_unmap			= fmr_op_unmap,
>>  	.ro_maxpages			= fmr_op_maxpages,
>>  	.ro_init			= fmr_op_init,
>>  	.ro_reset			= fmr_op_reset,
>> +	.ro_destroy			= fmr_op_destroy,
>>  	.ro_displayname			= "fmr",
>>  };
>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
>> index b4ce0e5..fc3a228 100644
>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>> @@ -271,11 +271,28 @@ frwr_op_reset(struct rpcrdma_xprt *r_xprt)
>>  	}
>>  }
>> 
>> +static void
>> +frwr_op_destroy(struct rpcrdma_buffer *buf)
>> +{
>> +	struct rpcrdma_mw *r;
>> +
>> +	while (!list_empty(&buf->rb_all)) {
>> +		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
>> +		list_del(&r->mw_all);
>> +		list_del(&r->mw_list);
>> +
>> +		__frwr_release(r);
>> +
>> +		kfree(r);
>> +	}
>> +}
>> +
>>  const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>>  	.ro_map				= frwr_op_map,
>>  	.ro_unmap			= frwr_op_unmap,
>>  	.ro_maxpages			= frwr_op_maxpages,
>>  	.ro_init			= frwr_op_init,
>>  	.ro_reset			= frwr_op_reset,
>> +	.ro_destroy			= frwr_op_destroy,
>>  	.ro_displayname			= "frwr",
>>  };
>> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
>> index 0afc691..f8da8c4 100644
>> --- a/net/sunrpc/xprtrdma/physical_ops.c
>> +++ b/net/sunrpc/xprtrdma/physical_ops.c
>> @@ -67,11 +67,17 @@ physical_op_reset(struct rpcrdma_xprt *r_xprt)
>>  {
>>  }
>> 
>> +static void
>> +physical_op_destroy(struct rpcrdma_buffer *buf)
>> +{
>> +}
>> +
>>  const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>>  	.ro_map				= physical_op_map,
>>  	.ro_unmap			= physical_op_unmap,
>>  	.ro_maxpages			= physical_op_maxpages,
>>  	.ro_init			= physical_op_init,
>>  	.ro_reset			= physical_op_reset,
>> +	.ro_destroy			= physical_op_destroy,
>>  	.ro_displayname			= "physical",
>>  };
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>> index e17d91a..dcbc736 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -1198,47 +1198,6 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
>>  	kfree(req);
>>  }
>> 
>> -static void
>> -rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
>> -{
>> -	struct rpcrdma_mw *r;
>> -	int rc;
>> -
>> -	while (!list_empty(&buf->rb_all)) {
>> -		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
>> -		list_del(&r->mw_all);
>> -		list_del(&r->mw_list);
>> -
>> -		rc = ib_dealloc_fmr(r->r.fmr);
>> -		if (rc)
>> -			dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
>> -				__func__, rc);
>> -
>> -		kfree(r);
>> -	}
>> -}
>> -
>> -static void
>> -rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
>> -{
>> -	struct rpcrdma_mw *r;
>> -	int rc;
>> -
>> -	while (!list_empty(&buf->rb_all)) {
>> -		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
>> -		list_del(&r->mw_all);
>> -		list_del(&r->mw_list);
>> -
>> -		rc = ib_dereg_mr(r->r.frmr.fr_mr);
>> -		if (rc)
>> -			dprintk("RPC:       %s: ib_dereg_mr failed %i\n",
>> -				__func__, rc);
>> -		ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
>> -
>> -		kfree(r);
>> -	}
>> -}
>> -
>>  void
>>  rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
>>  {
>> @@ -1259,16 +1218,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
>>  			rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
>>  	}
>> 
>> -	switch (ia->ri_memreg_strategy) {
>> -	case RPCRDMA_FRMR:
>> -		rpcrdma_destroy_frmrs(buf);
>> -		break;
>> -	case RPCRDMA_MTHCAFMR:
>> -		rpcrdma_destroy_fmrs(buf);
>> -		break;
>> -	default:
>> -		break;
>> -	}
>> +	ia->ri_ops->ro_destroy(buf);
>> 
>>  	kfree(buf->rb_pool);
>>  }
>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
>> index cdf6763..a0e3c3e 100644
>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>> @@ -343,6 +343,7 @@ struct rpcrdma_memreg_ops {
>>  	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>>  	int		(*ro_init)(struct rpcrdma_xprt *);
>>  	void		(*ro_reset)(struct rpcrdma_xprt *);
>> +	void		(*ro_destroy)(struct rpcrdma_buffer *);
>>  	const char	*ro_displayname;
>>  };
>> 
>> 
> 
> Other than that,
> 
> Reviewed-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 12/16] xprtrdma: Acquire FMRs in rpcrdma_fmr_register_external()
       [not found]         ` <5506CC6C.8090106-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2015-03-16 17:03           ` Chuck Lever
  0 siblings, 0 replies; 52+ messages in thread
From: Chuck Lever @ 2015-03-16 17:03 UTC (permalink / raw)
  To: Sagi Grimberg; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA


On Mar 16, 2015, at 5:28 AM, Sagi Grimberg <sagig-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote:

> On 3/13/2015 11:28 PM, Chuck Lever wrote:
>> Acquiring 64 FMRs in rpcrdma_buffer_get() while holding the buffer
>> pool lock is expensive, and unnecessary because FMR mode can
>> transfer up to a 1MB payload using just a single ib_fmr.
>> 
>> Instead, acquire ib_fmrs one-at-a-time as chunks are registered, and
>> return them to rb_mws immediately during deregistration.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> ---
>>  net/sunrpc/xprtrdma/fmr_ops.c   |   41 ++++++++++++++++++++++++++++++++++++---
>>  net/sunrpc/xprtrdma/verbs.c     |   41 ++++++++++++++-------------------------
>>  net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
>>  3 files changed, 54 insertions(+), 29 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
>> index 96e6cd3..9c6c2e8 100644
>> --- a/net/sunrpc/xprtrdma/fmr_ops.c
>> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
>> @@ -28,10 +28,11 @@ __fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>>  {
>>  	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>>  	struct rpcrdma_mr_seg *seg1 = seg;
>> +	struct rpcrdma_mw *mw = seg1->rl_mw;
>>  	int rc, nsegs = seg->mr_nsegs;
>>  	LIST_HEAD(l);
>> 
>> -	list_add(&seg1->rl_mw->r.fmr->list, &l);
>> +	list_add(&mw->r.fmr->list, &l);
>>  	rc = ib_unmap_fmr(&l);
>>  	read_lock(&ia->ri_qplock);
>>  	while (seg1->mr_nsegs--)
>> @@ -39,11 +40,14 @@ __fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>>  	read_unlock(&ia->ri_qplock);
>>  	if (rc)
>>  		goto out_err;
>> +out:
>> +	seg1->rl_mw = NULL;
>> +	rpcrdma_put_mw(r_xprt, mw);
>>  	return nsegs;
>> 
>>  out_err:
>>  	dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
>> -	return nsegs;
>> +	goto out;
>>  }
>> 
>>  static int
>> @@ -117,6 +121,27 @@ out_fmr_err:
>>  	return rc;
>>  }
>> 
>> +static struct rpcrdma_mw *
>> +__fmr_get_mw(struct rpcrdma_xprt *r_xprt)
> 
> This introduces an asymmetric approach where you have fmr/frwr get_mw
> routines but have a single rpcrdma_put_mw. I noticed that the
> frwr_get_mw (next patch) is almost completely different - but I wander
> if that should really be that different?

FMR doesn’t need to deal with asynchronous LOCAL_INV getting flushed
when the transport disconnects.

I will explain this further in response to 13/16.

> Just raising the question.
> 
>> +{
>> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
>> +	struct rpcrdma_mw *mw = NULL;
>> +	unsigned long flags;
>> +
>> +	spin_lock_irqsave(&buf->rb_lock, flags);
>> +
>> +	if (!list_empty(&buf->rb_mws)) {
>> +		mw = list_entry(buf->rb_mws.next,
>> +				struct rpcrdma_mw, mw_list);
>> +		list_del_init(&mw->mw_list);
>> +	} else {
>> +		pr_err("RPC:       %s: no MWs available\n", __func__);
>> +	}
>> +
>> +	spin_unlock_irqrestore(&buf->rb_lock, flags);
>> +	return mw;
>> +}
>> +
>>  /* Use the ib_map_phys_fmr() verb to register a memory region
>>   * for remote access via RDMA READ or RDMA WRITE.
>>   */
>> @@ -126,10 +151,18 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>>  {
>>  	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>>  	struct rpcrdma_mr_seg *seg1 = seg;
>> -	struct rpcrdma_mw *mw = seg1->rl_mw;
>> +	struct rpcrdma_mw *mw;
>>  	u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
>>  	int len, pageoff, i, rc;
>> 
>> +	mw = __fmr_get_mw(r_xprt);
>> +	if (!mw)
>> +		return -ENOMEM;
>> +	if (seg1->rl_mw) {
>> +		rpcrdma_put_mw(r_xprt, seg1->rl_mw);
>> +		seg1->rl_mw = NULL;
>> +	}
>> +
> 
> How can this happen? getting to op_map with existing rl_mw? and
> wouldn't it be better to use rl_mw instead of getting a new mw and
> putting seg1->rl_mw?
> 
>>  	pageoff = offset_in_page(seg1->mr_offset);
>>  	seg1->mr_offset -= pageoff;	/* start of page */
>>  	seg1->mr_len += pageoff;
>> @@ -152,6 +185,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>>  	if (rc)
>>  		goto out_maperr;
>> 
>> +	seg1->rl_mw = mw;
>>  	seg1->mr_rkey = mw->r.fmr->rkey;
>>  	seg1->mr_base = seg1->mr_dma + pageoff;
>>  	seg1->mr_nsegs = i;
>> @@ -164,6 +198,7 @@ out_maperr:
>>  		pageoff, i, rc);
>>  	while (i--)
>>  		rpcrdma_unmap_one(ia, --seg);
>> +	rpcrdma_put_mw(r_xprt, mw);
>>  	return rc;
>>  }
>> 
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>> index f108a57..f2316d8 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -1171,6 +1171,21 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
>>  	kfree(buf->rb_pool);
>>  }
>> 
>> +void
>> +rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
>> +{
>> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
>> +	unsigned long flags;
>> +
>> +	if (!list_empty(&mw->mw_list))
>> +		pr_warn("RPC:       %s: mw %p still on a list!\n",
>> +			__func__, mw);
>> +
>> +	spin_lock_irqsave(&buf->rb_lock, flags);
>> +	list_add_tail(&mw->mw_list, &buf->rb_mws);
>> +	spin_unlock_irqrestore(&buf->rb_lock, flags);
>> +}
>> +
>>  /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
>>   * some req segments uninitialized.
>>   */
>> @@ -1292,28 +1307,6 @@ rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
>>  	return NULL;
>>  }
>> 
>> -static struct rpcrdma_req *
>> -rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
>> -{
>> -	struct rpcrdma_mw *r;
>> -	int i;
>> -
>> -	i = RPCRDMA_MAX_SEGS - 1;
>> -	while (!list_empty(&buf->rb_mws)) {
>> -		r = list_entry(buf->rb_mws.next,
>> -			       struct rpcrdma_mw, mw_list);
>> -		list_del(&r->mw_list);
>> -		req->rl_segments[i].rl_mw = r;
>> -		if (unlikely(i-- == 0))
>> -			return req;	/* Success */
>> -	}
>> -
>> -	/* Not enough entries on rb_mws for this req */
>> -	rpcrdma_buffer_put_sendbuf(req, buf);
>> -	rpcrdma_buffer_put_mrs(req, buf);
>> -	return NULL;
>> -}
>> -
>>  /*
>>   * Get a set of request/reply buffers.
>>   *
>> @@ -1355,9 +1348,6 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
>>  	case RPCRDMA_FRMR:
>>  		req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
>>  		break;
>> -	case RPCRDMA_MTHCAFMR:
>> -		req = rpcrdma_buffer_get_fmrs(req, buffers);
>> -		break;
>>  	default:
>>  		break;
>>  	}
>> @@ -1382,7 +1372,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
>>  	rpcrdma_buffer_put_sendbuf(req, buffers);
>>  	switch (ia->ri_memreg_strategy) {
>>  	case RPCRDMA_FRMR:
>> -	case RPCRDMA_MTHCAFMR:
>>  		rpcrdma_buffer_put_mrs(req, buffers);
>>  		break;
>>  	default:
>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
>> index 40a0ee8..d5aa5b4 100644
>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>> @@ -426,6 +426,7 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
>>  unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
>>  void rpcrdma_map_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *, bool);
>>  void rpcrdma_unmap_one(struct rpcrdma_ia *, struct rpcrdma_mr_seg *);
>> +void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
>> 
>>  /*
>>   * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> 
> 

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 13/16] xprtrdma: Acquire MRs in rpcrdma_register_external()
       [not found]         ` <5506D02B.5050602-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2015-03-16 18:14           ` Chuck Lever
  0 siblings, 0 replies; 52+ messages in thread
From: Chuck Lever @ 2015-03-16 18:14 UTC (permalink / raw)
  To: Sagi Grimberg; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA


On Mar 16, 2015, at 5:44 AM, Sagi Grimberg <sagig-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote:

> On 3/13/2015 11:28 PM, Chuck Lever wrote:
>> Acquiring 64 MRs in rpcrdma_buffer_get() while holding the buffer
>> pool lock is expensive, and unnecessary because most modern adapters
>> can transfer 100s of KBs of payload using just a single MR.
>> 
>> Instead, acquire MRs one-at-a-time as chunks are registered, and
>> return them to rb_mws immediately during deregistration.
>> 
>> Note: commit 539431a437d2 ("xprtrdma: Don't invalidate FRMRs if
>> registration fails") is reverted: There is now a valid case where
>> registration can fail (with -ENOMEM) but the QP is still in RTS.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> ---
>>  net/sunrpc/xprtrdma/frwr_ops.c |  126 ++++++++++++++++++++++++++++++++++++---
>>  net/sunrpc/xprtrdma/rpc_rdma.c |    3 -
>>  net/sunrpc/xprtrdma/verbs.c    |  130 ----------------------------------------
>>  3 files changed, 120 insertions(+), 139 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
>> index 6e6d8ba..d23e064 100644
>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>> @@ -54,15 +54,16 @@ __frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>>  {
>>  	struct rpcrdma_mr_seg *seg1 = seg;
>>  	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> +	struct rpcrdma_mw *mw = seg1->rl_mw;
>>  	struct ib_send_wr invalidate_wr, *bad_wr;
>>  	int rc, nsegs = seg->mr_nsegs;
>> 
>> -	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
>> +	mw->r.frmr.fr_state = FRMR_IS_INVALID;
>> 
>>  	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
>> -	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
>> +	invalidate_wr.wr_id = (unsigned long)(void *)mw;
>>  	invalidate_wr.opcode = IB_WR_LOCAL_INV;
>> -	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
>> +	invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey;
>>  	DECR_CQCOUNT(&r_xprt->rx_ep);
>> 
>>  	read_lock(&ia->ri_qplock);
>> @@ -72,13 +73,17 @@ __frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>>  	read_unlock(&ia->ri_qplock);
>>  	if (rc)
>>  		goto out_err;
>> +
>> +out:
>> +	seg1->rl_mw = NULL;
>> +	rpcrdma_put_mw(r_xprt, mw);
>>  	return nsegs;
>> 
>>  out_err:
>>  	/* Force rpcrdma_buffer_get() to retry */
>> -	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
>> +	mw->r.frmr.fr_state = FRMR_IS_STALE;
>>  	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
>> -	return nsegs;
>> +	goto out;
>>  }
>> 
>>  static void
>> @@ -217,6 +222,99 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
>>  	return 0;
>>  }
>> 
>> +/* rpcrdma_unmap_one() was already done by rpcrdma_frwr_releasesge().
>> + * Redo only the ib_post_send().
>> + */
>> +static void
>> +__retry_local_inv(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
>> +{
>> +	struct rpcrdma_xprt *r_xprt =
>> +				container_of(ia, struct rpcrdma_xprt, rx_ia);
>> +	struct ib_send_wr invalidate_wr, *bad_wr;
>> +	int rc;
>> +
>> +	pr_warn("RPC:       %s: FRMR %p is stale\n", __func__, r);
>> +
>> +	/* When this FRMR is re-inserted into rb_mws, it is no longer stale */
>> +	r->r.frmr.fr_state = FRMR_IS_INVALID;
>> +
>> +	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
>> +	invalidate_wr.wr_id = (unsigned long)(void *)r;
>> +	invalidate_wr.opcode = IB_WR_LOCAL_INV;
>> +	invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
>> +	DECR_CQCOUNT(&r_xprt->rx_ep);
>> +
>> +	pr_warn("RPC:       %s: frmr %p invalidating rkey %08x\n",
>> +		__func__, r, r->r.frmr.fr_mr->rkey);
>> +
>> +	read_lock(&ia->ri_qplock);
>> +	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
>> +	read_unlock(&ia->ri_qplock);
>> +	if (rc) {
>> +		/* Force __frwr_get_mw() to retry */
>> +		r->r.frmr.fr_state = FRMR_IS_STALE;
> 
> Why should you retry this? Why should it succeed next time around?
> I would think that if you didn't succeed in post_send just destroy and
> recreate the mr (in another context or just outside a spinning lock).

This context can’t sleep. We have to recycle the MR in a context that
can sleep.

> 
>> +		dprintk("RPC:       %s: ib_post_send status %i\n",
>> +			__func__, rc);
>> +	}
>> +}
>> +
>> +static void
>> +__retry_flushed_linv(struct rpcrdma_buffer *buf, struct list_head *stale)
>> +{
>> +	struct rpcrdma_ia *ia = rdmab_to_ia(buf);
>> +	struct list_head *pos;
>> +	struct rpcrdma_mw *r;
>> +	unsigned long flags;
>> +	unsigned count;
>> +
>> +	count = 0;
>> +	list_for_each(pos, stale) {
>> +		r = list_entry(pos, struct rpcrdma_mw, mw_list);
>> +		__retry_local_inv(ia, r);
>> +		++count;
>> +	}
>> +
>> +	pr_warn("RPC:       %s: adding %u MRs to rb_mws\n",
>> +		__func__, count);
>> +
>> +	spin_lock_irqsave(&buf->rb_lock, flags);
>> +	list_splice_tail(stale, &buf->rb_mws);
>> +	spin_unlock_irqrestore(&buf->rb_lock, flags);
>> +}
>> +
>> +static struct rpcrdma_mw *
>> +__frwr_get_mw(struct rpcrdma_xprt *r_xprt)
>> +{
>> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
>> +	struct rpcrdma_mw *mw;
>> +	unsigned long flags;
>> +	LIST_HEAD(stale);
>> +	int count;
>> +
>> +	spin_lock_irqsave(&buf->rb_lock, flags);
>> +	count = 0;
>> +	while (!list_empty(&buf->rb_mws)) {
>> +		mw = list_entry(buf->rb_mws.next, struct rpcrdma_mw, mw_list);
>> +		list_del_init(&mw->mw_list);
>> +		if (mw->r.frmr.fr_state == FRMR_IS_INVALID)
>> +			goto out_unlock;
>> +		list_add_tail(&mw->mw_list, &stale);
> 
> So one thing I don't understand is how can you get a valid frmr in the
> list?

Probably only STALE or INVALID MRs appear in the mw_list these days.

> I think this might be making things a bit too complicated.

Yes, quite possibly.

> I imagine that mws are added back after they are successfully
> invalidated, but are they added also for the unsuccessful local_inv?
> 
> I think that by keeping only usable mws in rb_mws would make things a
> lot more simple.

That was my original design, but it was not adequate. Maybe
I’m smarter now and can think this through again.

> the logic would be:
> - take frmr from rb_mws (it is invalid)
> - use it
> - when done - do local invalidate
>  - if success return to list
>  - if not: reset it (free + alloc the mr) and return it to the list

At one point I thought I could get rid of frmr_state, and just use the
free list for normal operation.

> If this logic is possible then it would save you from handling stales
> and invalidate retries. and it would maybe allow to use a single
> rpcrdma_get_mw() which would simplify it even more...
> 
> But I might not have the full picture here..

When a transport disconnect occurs, LOCAL_INV WRs are flushed. We can’t
sleep in here, so “free + alloc” has to be left to another context.

The transport reconnect context is allowed to sleep, so it attempts
to find all STALE MRs, and recycle them as you describe above, before
reconnecting.

This is not a 100% perfect solution, however. There are some rare cases
during transport disconnect where FASTREG completes but flushes anyway.
And a second disconnect immediately after connecting is sometimes a
problem. My memory is hazy about exactly what the problematic failure
modes were.

I think if it can be guaranteed that all MRs are reset before transport
is reconnected, the extra recovery logic can be tossed. I’ll look into it.

> And again, sorry for commenting on logic that wasn't introduced by the
> patch itself...

Actually, I’m very happy to have additional review.


>> +		count++;
>> +	}
>> +
>> +	pr_err("RPC:       %s: no INVALID MWs available\n", __func__);
>> +	mw = NULL;
>> +
>> +out_unlock:
>> +	spin_unlock_irqrestore(&buf->rb_lock, flags);
>> +	if (!list_empty(&stale)) {
>> +		dprintk("RPC:       %s: found %d unsuitable MWs\n",
>> +			__func__, count);
>> +		__retry_flushed_linv(buf, &stale);
>> +	}
>> +	return mw;
>> +}
>> +
>>  /* Post a FAST_REG Work Request to register a memory region
>>   * for remote access via RDMA READ or RDMA WRITE.
>>   */
>> @@ -226,9 +324,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>>  {
>>  	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>>  	struct rpcrdma_mr_seg *seg1 = seg;
>> -	struct rpcrdma_mw *mw = seg1->rl_mw;
>> -	struct rpcrdma_frmr *frmr = &mw->r.frmr;
>> -	struct ib_mr *mr = frmr->fr_mr;
>> +	struct rpcrdma_frmr *frmr;
>> +	struct rpcrdma_mw *mw;
>> +	struct ib_mr *mr;
>>  	struct ib_send_wr fastreg_wr, *bad_wr;
>>  	u8 key;
>>  	int len, pageoff;
>> @@ -237,12 +335,21 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>>  	u64 pa;
>>  	int page_no;
>> 
>> +	mw = __frwr_get_mw(r_xprt);
>> +	if (!mw)
>> +		return -ENOMEM;
>> +	if (seg1->rl_mw) {
>> +		rpcrdma_put_mw(r_xprt, seg1->rl_mw);
>> +		seg1->rl_mw = NULL;
>> +	}
>> +
>>  	pageoff = offset_in_page(seg1->mr_offset);
>>  	seg1->mr_offset -= pageoff;	/* start of page */
>>  	seg1->mr_len += pageoff;
>>  	len = -pageoff;
>>  	if (nsegs > ia->ri_max_frmr_depth)
>>  		nsegs = ia->ri_max_frmr_depth;
>> +	frmr = &mw->r.frmr;
>>  	for (page_no = i = 0; i < nsegs;) {
>>  		rpcrdma_map_one(ia, seg, writing);
>>  		pa = seg->mr_dma;
>> @@ -278,6 +385,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>>  	fastreg_wr.wr.fast_reg.access_flags = writing ?
>>  				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
>>  				IB_ACCESS_REMOTE_READ;
>> +	mr = frmr->fr_mr;
>>  	key = (u8)(mr->rkey & 0x000000FF);
>>  	ib_update_fast_reg_key(mr, ++key);
>>  	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
>> @@ -287,6 +395,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>>  	if (rc)
>>  		goto out_senderr;
>> 
>> +	seg1->rl_mw = mw;
>>  	seg1->mr_rkey = mr->rkey;
>>  	seg1->mr_base = seg1->mr_dma + pageoff;
>>  	seg1->mr_nsegs = i;
>> @@ -301,6 +410,7 @@ out_err:
>>  	frmr->fr_state = FRMR_IS_INVALID;
>>  	while (i--)
>>  		rpcrdma_unmap_one(ia, --seg);
>> +	rpcrdma_put_mw(r_xprt, mw);
>>  	return rc;
>>  }
>> 
>> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
>> index 7b51d9d..b9689ca 100644
>> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
>> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
>> @@ -284,8 +284,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
>>  	return (unsigned char *)iptr - (unsigned char *)headerp;
>> 
>>  out:
>> -	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR)
>> -		r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, nchunks);
>> +	r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, nchunks);
>>  	return n;
>>  }
>> 
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>> index f2316d8..5b9c104 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -1186,33 +1186,6 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
>>  	spin_unlock_irqrestore(&buf->rb_lock, flags);
>>  }
>> 
>> -/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
>> - * some req segments uninitialized.
>> - */
>> -static void
>> -rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
>> -{
>> -	if (*mw) {
>> -		list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
>> -		*mw = NULL;
>> -	}
>> -}
>> -
>> -/* Cycle mw's back in reverse order, and "spin" them.
>> - * This delays and scrambles reuse as much as possible.
>> - */
>> -static void
>> -rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
>> -{
>> -	struct rpcrdma_mr_seg *seg = req->rl_segments;
>> -	struct rpcrdma_mr_seg *seg1 = seg;
>> -	int i;
>> -
>> -	for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
>> -		rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
>> -	rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
>> -}
>> -
>>  static void
>>  rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
>>  {
>> @@ -1225,88 +1198,6 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
>>  	}
>>  }
>> 
>> -/* rpcrdma_unmap_one() was already done during deregistration.
>> - * Redo only the ib_post_send().
>> - */
>> -static void
>> -rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
>> -{
>> -	struct rpcrdma_xprt *r_xprt =
>> -				container_of(ia, struct rpcrdma_xprt, rx_ia);
>> -	struct ib_send_wr invalidate_wr, *bad_wr;
>> -	int rc;
>> -
>> -	dprintk("RPC:       %s: FRMR %p is stale\n", __func__, r);
>> -
>> -	/* When this FRMR is re-inserted into rb_mws, it is no longer stale */
>> -	r->r.frmr.fr_state = FRMR_IS_INVALID;
>> -
>> -	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
>> -	invalidate_wr.wr_id = (unsigned long)(void *)r;
>> -	invalidate_wr.opcode = IB_WR_LOCAL_INV;
>> -	invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
>> -	DECR_CQCOUNT(&r_xprt->rx_ep);
>> -
>> -	dprintk("RPC:       %s: frmr %p invalidating rkey %08x\n",
>> -		__func__, r, r->r.frmr.fr_mr->rkey);
>> -
>> -	read_lock(&ia->ri_qplock);
>> -	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
>> -	read_unlock(&ia->ri_qplock);
>> -	if (rc) {
>> -		/* Force rpcrdma_buffer_get() to retry */
>> -		r->r.frmr.fr_state = FRMR_IS_STALE;
>> -		dprintk("RPC:       %s: ib_post_send failed, %i\n",
>> -			__func__, rc);
>> -	}
>> -}
>> -
>> -static void
>> -rpcrdma_retry_flushed_linv(struct list_head *stale,
>> -			   struct rpcrdma_buffer *buf)
>> -{
>> -	struct rpcrdma_ia *ia = rdmab_to_ia(buf);
>> -	struct list_head *pos;
>> -	struct rpcrdma_mw *r;
>> -	unsigned long flags;
>> -
>> -	list_for_each(pos, stale) {
>> -		r = list_entry(pos, struct rpcrdma_mw, mw_list);
>> -		rpcrdma_retry_local_inv(r, ia);
>> -	}
>> -
>> -	spin_lock_irqsave(&buf->rb_lock, flags);
>> -	list_splice_tail(stale, &buf->rb_mws);
>> -	spin_unlock_irqrestore(&buf->rb_lock, flags);
>> -}
>> -
>> -static struct rpcrdma_req *
>> -rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
>> -			 struct list_head *stale)
>> -{
>> -	struct rpcrdma_mw *r;
>> -	int i;
>> -
>> -	i = RPCRDMA_MAX_SEGS - 1;
>> -	while (!list_empty(&buf->rb_mws)) {
>> -		r = list_entry(buf->rb_mws.next,
>> -			       struct rpcrdma_mw, mw_list);
>> -		list_del(&r->mw_list);
>> -		if (r->r.frmr.fr_state == FRMR_IS_STALE) {
>> -			list_add(&r->mw_list, stale);
>> -			continue;
>> -		}
>> -		req->rl_segments[i].rl_mw = r;
>> -		if (unlikely(i-- == 0))
>> -			return req;	/* Success */
>> -	}
>> -
>> -	/* Not enough entries on rb_mws for this req */
>> -	rpcrdma_buffer_put_sendbuf(req, buf);
>> -	rpcrdma_buffer_put_mrs(req, buf);
>> -	return NULL;
>> -}
>> -
>>  /*
>>   * Get a set of request/reply buffers.
>>   *
>> @@ -1319,12 +1210,11 @@ rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
>>  struct rpcrdma_req *
>>  rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
>>  {
>> -	struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
>> -	struct list_head stale;
>>  	struct rpcrdma_req *req;
>>  	unsigned long flags;
>> 
>>  	spin_lock_irqsave(&buffers->rb_lock, flags);
>> +
>>  	if (buffers->rb_send_index == buffers->rb_max_requests) {
>>  		spin_unlock_irqrestore(&buffers->rb_lock, flags);
>>  		dprintk("RPC:       %s: out of request buffers\n", __func__);
>> @@ -1343,17 +1233,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
>>  	}
>>  	buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
>> 
>> -	INIT_LIST_HEAD(&stale);
>> -	switch (ia->ri_memreg_strategy) {
>> -	case RPCRDMA_FRMR:
>> -		req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
>> -		break;
>> -	default:
>> -		break;
>> -	}
>>  	spin_unlock_irqrestore(&buffers->rb_lock, flags);
>> -	if (!list_empty(&stale))
>> -		rpcrdma_retry_flushed_linv(&stale, buffers);
>>  	return req;
>>  }
>> 
>> @@ -1365,18 +1245,10 @@ void
>>  rpcrdma_buffer_put(struct rpcrdma_req *req)
>>  {
>>  	struct rpcrdma_buffer *buffers = req->rl_buffer;
>> -	struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
>>  	unsigned long flags;
>> 
>>  	spin_lock_irqsave(&buffers->rb_lock, flags);
>>  	rpcrdma_buffer_put_sendbuf(req, buffers);
>> -	switch (ia->ri_memreg_strategy) {
>> -	case RPCRDMA_FRMR:
>> -		rpcrdma_buffer_put_mrs(req, buffers);
>> -		break;
>> -	default:
>> -		break;
>> -	}
>>  	spin_unlock_irqrestore(&buffers->rb_lock, flags);
>>  }
>> 
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> 
> 

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 05/16] xprtrdma: Add a "register_external" op for each memreg mode
       [not found]             ` <982A021D-1B85-4AAF-89A3-302A21CF2B36-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
@ 2015-03-16 18:15               ` Sagi Grimberg
       [not found]                 ` <55071DBB.4050500-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  0 siblings, 1 reply; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-16 18:15 UTC (permalink / raw)
  To: Chuck Lever; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/16/2015 6:48 PM, Chuck Lever wrote:
>
> On Mar 16, 2015, at 3:28 AM, Sagi Grimberg <sagig-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> wrote:
>
>> On 3/13/2015 11:27 PM, Chuck Lever wrote:
>>> There is very little common processing among the different external
>>> memory registration functions. Have rpcrdma_create_chunks() call
>>> the registration method directly. This removes a stack frame and a
>>> switch statement from the external registration path.
>>>
>>> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>>> ---
>>>   net/sunrpc/xprtrdma/fmr_ops.c      |   51 +++++++++++
>>>   net/sunrpc/xprtrdma/frwr_ops.c     |   88 ++++++++++++++++++
>>>   net/sunrpc/xprtrdma/physical_ops.c |   17 ++++
>>>   net/sunrpc/xprtrdma/rpc_rdma.c     |    5 +
>>>   net/sunrpc/xprtrdma/verbs.c        |  172 +-----------------------------------
>>>   net/sunrpc/xprtrdma/xprt_rdma.h    |    6 +
>>>   6 files changed, 166 insertions(+), 173 deletions(-)
>>>
>>> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
>>> index eec2660..45fb646 100644
>>> --- a/net/sunrpc/xprtrdma/fmr_ops.c
>>> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
>>> @@ -29,7 +29,58 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>>>   		     rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES);
>>>   }
>>>
>>> +/* Use the ib_map_phys_fmr() verb to register a memory region
>>> + * for remote access via RDMA READ or RDMA WRITE.
>>> + */
>>> +static int
>>> +fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>>> +	   int nsegs, bool writing)
>>> +{
>>> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>>> +	struct rpcrdma_mr_seg *seg1 = seg;
>>> +	struct rpcrdma_mw *mw = seg1->rl_mw;
>>> +	u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
>>> +	int len, pageoff, i, rc;
>>> +
>>> +	pageoff = offset_in_page(seg1->mr_offset);
>>> +	seg1->mr_offset -= pageoff;	/* start of page */
>>> +	seg1->mr_len += pageoff;
>>> +	len = -pageoff;
>>> +	if (nsegs > RPCRDMA_MAX_FMR_SGES)
>>> +		nsegs = RPCRDMA_MAX_FMR_SGES;
>>> +	for (i = 0; i < nsegs;) {
>>> +		rpcrdma_map_one(ia, seg, writing);
>>> +		physaddrs[i] = seg->mr_dma;
>>> +		len += seg->mr_len;
>>> +		++seg;
>>> +		++i;
>>> +		/* Check for holes */
>>> +		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
>>> +		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
>>> +			break;
>>> +	}
>>> +
>>> +	rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma);
>>> +	if (rc)
>>> +		goto out_maperr;
>>> +
>>> +	seg1->mr_rkey = mw->r.fmr->rkey;
>>> +	seg1->mr_base = seg1->mr_dma + pageoff;
>>> +	seg1->mr_nsegs = i;
>>> +	seg1->mr_len = len;
>>> +	return i;
>>> +
>>> +out_maperr:
>>> +	dprintk("RPC:       %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
>>> +		__func__, len, (unsigned long long)seg1->mr_dma,
>>> +		pageoff, i, rc);
>>> +	while (i--)
>>> +		rpcrdma_unmap_one(ia, --seg);
>>> +	return rc;
>>> +}
>>> +
>>>   const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>>> +	.ro_map				= fmr_op_map,
>>>   	.ro_maxpages			= fmr_op_maxpages,
>>>   	.ro_displayname			= "fmr",
>>>   };
>>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
>>> index 73a5ac8..2b5ccb0 100644
>>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>>> @@ -29,7 +29,95 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>>>   		     rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
>>>   }
>>>
>>> +/* Post a FAST_REG Work Request to register a memory region
>>> + * for remote access via RDMA READ or RDMA WRITE.
>>> + */
>>> +static int
>>> +frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>>> +	    int nsegs, bool writing)
>>> +{
>>> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>>> +	struct rpcrdma_mr_seg *seg1 = seg;
>>> +	struct rpcrdma_mw *mw = seg1->rl_mw;
>>> +	struct rpcrdma_frmr *frmr = &mw->r.frmr;
>>> +	struct ib_mr *mr = frmr->fr_mr;
>>> +	struct ib_send_wr fastreg_wr, *bad_wr;
>>> +	u8 key;
>>> +	int len, pageoff;
>>> +	int i, rc;
>>> +	int seg_len;
>>> +	u64 pa;
>>> +	int page_no;
>>> +
>>> +	pageoff = offset_in_page(seg1->mr_offset);
>>> +	seg1->mr_offset -= pageoff;	/* start of page */
>>> +	seg1->mr_len += pageoff;
>>> +	len = -pageoff;
>>> +	if (nsegs > ia->ri_max_frmr_depth)
>>> +		nsegs = ia->ri_max_frmr_depth;
>>> +	for (page_no = i = 0; i < nsegs;) {
>>> +		rpcrdma_map_one(ia, seg, writing);
>>> +		pa = seg->mr_dma;
>>> +		for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
>>> +			frmr->fr_pgl->page_list[page_no++] = pa;
>>> +			pa += PAGE_SIZE;
>>> +		}
>>> +		len += seg->mr_len;
>>> +		++seg;
>>> +		++i;
>>> +		/* Check for holes */
>>> +		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
>>> +		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
>>> +			break;
>>> +	}
>>> +	dprintk("RPC:       %s: Using frmr %p to map %d segments\n",
>>> +		__func__, mw, i);
>>> +
>>> +	frmr->fr_state = FRMR_IS_VALID;
>>> +
>>> +	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
>>> +	fastreg_wr.wr_id = (unsigned long)(void *)mw;
>>> +	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
>>> +	fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
>>> +	fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
>>> +	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
>>> +	fastreg_wr.wr.fast_reg.page_list_len = page_no;
>>> +	fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
>>
>> Umm, is this necessarily true? will the region length be full pages?
>> Why don't you assign fast_reg.length = len?
>> I might be missing something here…
>
> Don’t know. This goes back to when FRWR was introduced in this code,
> with commit 3197d309 in 2008.

I see.

So from inspecting at the code it seems that if you had a buffer of
length PAGE_SIZE + 512, the client would register two pages. So in case
the server attempts a write that exceeds this length the client might
get a data corruption instead of a much nicer remote access error
completion...

But then again, I may be missing something here...

Sagi.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 05/16] xprtrdma: Add a "register_external" op for each memreg mode
       [not found]                 ` <55071DBB.4050500-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2015-03-16 20:13                   ` Steve Wise
       [not found]                     ` <55073966.30006-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
  0 siblings, 1 reply; 52+ messages in thread
From: Steve Wise @ 2015-03-16 20:13 UTC (permalink / raw)
  To: Sagi Grimberg, Chuck Lever; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/16/2015 1:15 PM, Sagi Grimberg wrote:
> On 3/16/2015 6:48 PM, Chuck Lever wrote:
>>
>> On Mar 16, 2015, at 3:28 AM, Sagi Grimberg <sagig-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org> 
>> wrote:
>>
>>> On 3/13/2015 11:27 PM, Chuck Lever wrote:
>>>> There is very little common processing among the different external
>>>> memory registration functions. Have rpcrdma_create_chunks() call
>>>> the registration method directly. This removes a stack frame and a
>>>> switch statement from the external registration path.
>>>>
>>>> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>>>> ---
>>>>   net/sunrpc/xprtrdma/fmr_ops.c      |   51 +++++++++++
>>>>   net/sunrpc/xprtrdma/frwr_ops.c     |   88 ++++++++++++++++++
>>>>   net/sunrpc/xprtrdma/physical_ops.c |   17 ++++
>>>>   net/sunrpc/xprtrdma/rpc_rdma.c     |    5 +
>>>>   net/sunrpc/xprtrdma/verbs.c        |  172 
>>>> +-----------------------------------
>>>>   net/sunrpc/xprtrdma/xprt_rdma.h    |    6 +
>>>>   6 files changed, 166 insertions(+), 173 deletions(-)
>>>>
>>>> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c 
>>>> b/net/sunrpc/xprtrdma/fmr_ops.c
>>>> index eec2660..45fb646 100644
>>>> --- a/net/sunrpc/xprtrdma/fmr_ops.c
>>>> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
>>>> @@ -29,7 +29,58 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>>>>                rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES);
>>>>   }
>>>>
>>>> +/* Use the ib_map_phys_fmr() verb to register a memory region
>>>> + * for remote access via RDMA READ or RDMA WRITE.
>>>> + */
>>>> +static int
>>>> +fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>>>> +       int nsegs, bool writing)
>>>> +{
>>>> +    struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>>>> +    struct rpcrdma_mr_seg *seg1 = seg;
>>>> +    struct rpcrdma_mw *mw = seg1->rl_mw;
>>>> +    u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
>>>> +    int len, pageoff, i, rc;
>>>> +
>>>> +    pageoff = offset_in_page(seg1->mr_offset);
>>>> +    seg1->mr_offset -= pageoff;    /* start of page */
>>>> +    seg1->mr_len += pageoff;
>>>> +    len = -pageoff;
>>>> +    if (nsegs > RPCRDMA_MAX_FMR_SGES)
>>>> +        nsegs = RPCRDMA_MAX_FMR_SGES;
>>>> +    for (i = 0; i < nsegs;) {
>>>> +        rpcrdma_map_one(ia, seg, writing);
>>>> +        physaddrs[i] = seg->mr_dma;
>>>> +        len += seg->mr_len;
>>>> +        ++seg;
>>>> +        ++i;
>>>> +        /* Check for holes */
>>>> +        if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
>>>> +            offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
>>>> +            break;
>>>> +    }
>>>> +
>>>> +    rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma);
>>>> +    if (rc)
>>>> +        goto out_maperr;
>>>> +
>>>> +    seg1->mr_rkey = mw->r.fmr->rkey;
>>>> +    seg1->mr_base = seg1->mr_dma + pageoff;
>>>> +    seg1->mr_nsegs = i;
>>>> +    seg1->mr_len = len;
>>>> +    return i;
>>>> +
>>>> +out_maperr:
>>>> +    dprintk("RPC:       %s: ib_map_phys_fmr %u@0x%llx+%i (%d) 
>>>> status %i\n",
>>>> +        __func__, len, (unsigned long long)seg1->mr_dma,
>>>> +        pageoff, i, rc);
>>>> +    while (i--)
>>>> +        rpcrdma_unmap_one(ia, --seg);
>>>> +    return rc;
>>>> +}
>>>> +
>>>>   const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>>>> +    .ro_map                = fmr_op_map,
>>>>       .ro_maxpages            = fmr_op_maxpages,
>>>>       .ro_displayname            = "fmr",
>>>>   };
>>>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c 
>>>> b/net/sunrpc/xprtrdma/frwr_ops.c
>>>> index 73a5ac8..2b5ccb0 100644
>>>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>>>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>>>> @@ -29,7 +29,95 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>>>>                rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
>>>>   }
>>>>
>>>> +/* Post a FAST_REG Work Request to register a memory region
>>>> + * for remote access via RDMA READ or RDMA WRITE.
>>>> + */
>>>> +static int
>>>> +frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>>>> +        int nsegs, bool writing)
>>>> +{
>>>> +    struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>>>> +    struct rpcrdma_mr_seg *seg1 = seg;
>>>> +    struct rpcrdma_mw *mw = seg1->rl_mw;
>>>> +    struct rpcrdma_frmr *frmr = &mw->r.frmr;
>>>> +    struct ib_mr *mr = frmr->fr_mr;
>>>> +    struct ib_send_wr fastreg_wr, *bad_wr;
>>>> +    u8 key;
>>>> +    int len, pageoff;
>>>> +    int i, rc;
>>>> +    int seg_len;
>>>> +    u64 pa;
>>>> +    int page_no;
>>>> +
>>>> +    pageoff = offset_in_page(seg1->mr_offset);
>>>> +    seg1->mr_offset -= pageoff;    /* start of page */
>>>> +    seg1->mr_len += pageoff;
>>>> +    len = -pageoff;
>>>> +    if (nsegs > ia->ri_max_frmr_depth)
>>>> +        nsegs = ia->ri_max_frmr_depth;
>>>> +    for (page_no = i = 0; i < nsegs;) {
>>>> +        rpcrdma_map_one(ia, seg, writing);
>>>> +        pa = seg->mr_dma;
>>>> +        for (seg_len = seg->mr_len; seg_len > 0; seg_len -= 
>>>> PAGE_SIZE) {
>>>> +            frmr->fr_pgl->page_list[page_no++] = pa;
>>>> +            pa += PAGE_SIZE;
>>>> +        }
>>>> +        len += seg->mr_len;
>>>> +        ++seg;
>>>> +        ++i;
>>>> +        /* Check for holes */
>>>> +        if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
>>>> +            offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
>>>> +            break;
>>>> +    }
>>>> +    dprintk("RPC:       %s: Using frmr %p to map %d segments\n",
>>>> +        __func__, mw, i);
>>>> +
>>>> +    frmr->fr_state = FRMR_IS_VALID;
>>>> +
>>>> +    memset(&fastreg_wr, 0, sizeof(fastreg_wr));
>>>> +    fastreg_wr.wr_id = (unsigned long)(void *)mw;
>>>> +    fastreg_wr.opcode = IB_WR_FAST_REG_MR;
>>>> +    fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
>>>> +    fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
>>>> +    fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
>>>> +    fastreg_wr.wr.fast_reg.page_list_len = page_no;
>>>> +    fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
>>>
>>> Umm, is this necessarily true? will the region length be full pages?
>>> Why don't you assign fast_reg.length = len?
>>> I might be missing something here…
>>
>> Don’t know. This goes back to when FRWR was introduced in this code,
>> with commit 3197d309 in 2008.
>
> I see.
>
> So from inspecting at the code it seems that if you had a buffer of
> length PAGE_SIZE + 512, the client would register two pages. So in case
> the server attempts a write that exceeds this length the client might
> get a data corruption instead of a much nicer remote access error
> completion...
>
> But then again, I may be missing something here...
>

I think you're correct.   The length does not need to be page aligned...


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 05/16] xprtrdma: Add a "register_external" op for each memreg mode
       [not found]                     ` <55073966.30006-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
@ 2015-03-16 22:11                       ` Chuck Lever
       [not found]                         ` <7595A8CB-B38B-4F01-A132-CE3BE143A897-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-16 22:11 UTC (permalink / raw)
  To: Steve Wise; +Cc: Sagi Grimberg, linux-rdma-u79uwXL29TY76Z2rM5mHXA


On Mar 16, 2015, at 1:13 PM, Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org> wrote:

> On 3/16/2015 1:15 PM, Sagi Grimberg wrote:
>> On 3/16/2015 6:48 PM, Chuck Lever wrote:
>>> 
>>> On Mar 16, 2015, at 3:28 AM, Sagi Grimberg <sagig-LDSdmyG8hGV8YrgS2mwiifUpdFzICT1y@public.gmane.orgl> wrote:
>>> 
>>>> On 3/13/2015 11:27 PM, Chuck Lever wrote:
>>>>> There is very little common processing among the different external
>>>>> memory registration functions. Have rpcrdma_create_chunks() call
>>>>> the registration method directly. This removes a stack frame and a
>>>>> switch statement from the external registration path.
>>>>> 
>>>>> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>>>>> ---
>>>>>  net/sunrpc/xprtrdma/fmr_ops.c      |   51 +++++++++++
>>>>>  net/sunrpc/xprtrdma/frwr_ops.c     |   88 ++++++++++++++++++
>>>>>  net/sunrpc/xprtrdma/physical_ops.c |   17 ++++
>>>>>  net/sunrpc/xprtrdma/rpc_rdma.c     |    5 +
>>>>>  net/sunrpc/xprtrdma/verbs.c        |  172 +-----------------------------------
>>>>>  net/sunrpc/xprtrdma/xprt_rdma.h    |    6 +
>>>>>  6 files changed, 166 insertions(+), 173 deletions(-)
>>>>> 
>>>>> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
>>>>> index eec2660..45fb646 100644
>>>>> --- a/net/sunrpc/xprtrdma/fmr_ops.c
>>>>> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
>>>>> @@ -29,7 +29,58 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>>>>>               rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES);
>>>>>  }
>>>>> 
>>>>> +/* Use the ib_map_phys_fmr() verb to register a memory region
>>>>> + * for remote access via RDMA READ or RDMA WRITE.
>>>>> + */
>>>>> +static int
>>>>> +fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>>>>> +       int nsegs, bool writing)
>>>>> +{
>>>>> +    struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>>>>> +    struct rpcrdma_mr_seg *seg1 = seg;
>>>>> +    struct rpcrdma_mw *mw = seg1->rl_mw;
>>>>> +    u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
>>>>> +    int len, pageoff, i, rc;
>>>>> +
>>>>> +    pageoff = offset_in_page(seg1->mr_offset);
>>>>> +    seg1->mr_offset -= pageoff;    /* start of page */
>>>>> +    seg1->mr_len += pageoff;
>>>>> +    len = -pageoff;
>>>>> +    if (nsegs > RPCRDMA_MAX_FMR_SGES)
>>>>> +        nsegs = RPCRDMA_MAX_FMR_SGES;
>>>>> +    for (i = 0; i < nsegs;) {
>>>>> +        rpcrdma_map_one(ia, seg, writing);
>>>>> +        physaddrs[i] = seg->mr_dma;
>>>>> +        len += seg->mr_len;
>>>>> +        ++seg;
>>>>> +        ++i;
>>>>> +        /* Check for holes */
>>>>> +        if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
>>>>> +            offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
>>>>> +            break;
>>>>> +    }
>>>>> +
>>>>> +    rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma);
>>>>> +    if (rc)
>>>>> +        goto out_maperr;
>>>>> +
>>>>> +    seg1->mr_rkey = mw->r.fmr->rkey;
>>>>> +    seg1->mr_base = seg1->mr_dma + pageoff;
>>>>> +    seg1->mr_nsegs = i;
>>>>> +    seg1->mr_len = len;
>>>>> +    return i;
>>>>> +
>>>>> +out_maperr:
>>>>> +    dprintk("RPC:       %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
>>>>> +        __func__, len, (unsigned long long)seg1->mr_dma,
>>>>> +        pageoff, i, rc);
>>>>> +    while (i--)
>>>>> +        rpcrdma_unmap_one(ia, --seg);
>>>>> +    return rc;
>>>>> +}
>>>>> +
>>>>>  const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>>>>> +    .ro_map                = fmr_op_map,
>>>>>      .ro_maxpages            = fmr_op_maxpages,
>>>>>      .ro_displayname            = "fmr",
>>>>>  };
>>>>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
>>>>> index 73a5ac8..2b5ccb0 100644
>>>>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>>>>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>>>>> @@ -29,7 +29,95 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>>>>>               rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
>>>>>  }
>>>>> 
>>>>> +/* Post a FAST_REG Work Request to register a memory region
>>>>> + * for remote access via RDMA READ or RDMA WRITE.
>>>>> + */
>>>>> +static int
>>>>> +frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>>>>> +        int nsegs, bool writing)
>>>>> +{
>>>>> +    struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>>>>> +    struct rpcrdma_mr_seg *seg1 = seg;
>>>>> +    struct rpcrdma_mw *mw = seg1->rl_mw;
>>>>> +    struct rpcrdma_frmr *frmr = &mw->r.frmr;
>>>>> +    struct ib_mr *mr = frmr->fr_mr;
>>>>> +    struct ib_send_wr fastreg_wr, *bad_wr;
>>>>> +    u8 key;
>>>>> +    int len, pageoff;
>>>>> +    int i, rc;
>>>>> +    int seg_len;
>>>>> +    u64 pa;
>>>>> +    int page_no;
>>>>> +
>>>>> +    pageoff = offset_in_page(seg1->mr_offset);
>>>>> +    seg1->mr_offset -= pageoff;    /* start of page */
>>>>> +    seg1->mr_len += pageoff;
>>>>> +    len = -pageoff;
>>>>> +    if (nsegs > ia->ri_max_frmr_depth)
>>>>> +        nsegs = ia->ri_max_frmr_depth;
>>>>> +    for (page_no = i = 0; i < nsegs;) {
>>>>> +        rpcrdma_map_one(ia, seg, writing);
>>>>> +        pa = seg->mr_dma;
>>>>> +        for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
>>>>> +            frmr->fr_pgl->page_list[page_no++] = pa;
>>>>> +            pa += PAGE_SIZE;
>>>>> +        }
>>>>> +        len += seg->mr_len;
>>>>> +        ++seg;
>>>>> +        ++i;
>>>>> +        /* Check for holes */
>>>>> +        if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
>>>>> +            offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
>>>>> +            break;
>>>>> +    }
>>>>> +    dprintk("RPC:       %s: Using frmr %p to map %d segments\n",
>>>>> +        __func__, mw, i);
>>>>> +
>>>>> +    frmr->fr_state = FRMR_IS_VALID;
>>>>> +
>>>>> +    memset(&fastreg_wr, 0, sizeof(fastreg_wr));
>>>>> +    fastreg_wr.wr_id = (unsigned long)(void *)mw;
>>>>> +    fastreg_wr.opcode = IB_WR_FAST_REG_MR;
>>>>> +    fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
>>>>> +    fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
>>>>> +    fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
>>>>> +    fastreg_wr.wr.fast_reg.page_list_len = page_no;
>>>>> +    fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
>>>> 
>>>> Umm, is this necessarily true? will the region length be full pages?
>>>> Why don't you assign fast_reg.length = len?
>>>> I might be missing something here…
>>> 
>>> Don’t know. This goes back to when FRWR was introduced in this code,
>>> with commit 3197d309 in 2008.
>> 
>> I see.
>> 
>> So from inspecting at the code it seems that if you had a buffer of
>> length PAGE_SIZE + 512, the client would register two pages. So in case
>> the server attempts a write that exceeds this length the client might
>> get a data corruption instead of a much nicer remote access error
>> completion...
>> 
>> But then again, I may be missing something here...
>> 
> 
> I think you're correct.   The length does not need to be page aligned…

Anyone want to send a patch on top of this series? If not, I can look
into this after I get back home next week.

We need to confirm that there are no pervasive server bugs that
rely on the protection slop. If there are, a fix here could break
interoperability until the server is fixed.

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 05/16] xprtrdma: Add a "register_external" op for each memreg mode
       [not found]                         ` <7595A8CB-B38B-4F01-A132-CE3BE143A897-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
@ 2015-03-21 11:53                           ` Sagi Grimberg
  0 siblings, 0 replies; 52+ messages in thread
From: Sagi Grimberg @ 2015-03-21 11:53 UTC (permalink / raw)
  To: Chuck Lever, Steve Wise; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 3/17/2015 12:11 AM, Chuck Lever wrote:
<SNIP>
>
> Anyone want to send a patch on top of this series? If not, I can look
> into this after I get back home next week.

I'll see if I can find a time to get it (no promises though...)
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* RE: [PATCH v1 01/16] xprtrdma: Display IPv6 addresses and port numbers correctly
       [not found]     ` <20150313212641.22783.93522.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-15  2:50       ` Sagi Grimberg
@ 2015-03-24  8:27       ` Devesh Sharma
       [not found]         ` <EE7902D3F51F404C82415C4803930ACD5DC3A952-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
  1 sibling, 1 reply; 52+ messages in thread
From: Devesh Sharma @ 2015-03-24  8:27 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 6415 bytes --]

I see in the svcrdma code, there is a big check to abort creating listener if AF is not AF_INET,
Do we have plans to address this on the server as well?

static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
                                        struct net *net,
                                        struct sockaddr *sa, int salen,
                                        int flags)
{
        struct rdma_cm_id *listen_id;
        struct svcxprt_rdma *cma_xprt;
        int ret;

        dprintk("svcrdma: Creating RDMA socket\n");
        if (sa->sa_family != AF_INET) {
                dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family);
                return ERR_PTR(-EAFNOSUPPORT);

-Regards
Devesh

> -----Original Message-----
> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
> owner@vger.kernel.org] On Behalf Of Chuck Lever
> Sent: Saturday, March 14, 2015 2:57 AM
> To: linux-rdma@vger.kernel.org
> Subject: [PATCH v1 01/16] xprtrdma: Display IPv6 addresses and port numbers
> correctly
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  net/sunrpc/xprtrdma/transport.c |   47
> ++++++++++++++++++++++++++++++++-------
>  net/sunrpc/xprtrdma/verbs.c     |   21 +++++++----------
>  2 files changed, 47 insertions(+), 21 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index 2e192ba..26a62e7 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -157,12 +157,47 @@ static struct ctl_table sunrpc_table[] = {
>  static struct rpc_xprt_ops xprt_rdma_procs;	/* forward reference */
> 
>  static void
> +xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr
> +*sap) {
> +	struct sockaddr_in *sin = (struct sockaddr_in *)sap;
> +	char buf[20];
> +
> +	snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
> +	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf,
> +GFP_KERNEL);
> +
> +	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; }
> +
> +static void
> +xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr
> +*sap) {
> +	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
> +	char buf[40];
> +
> +	snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
> +	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf,
> +GFP_KERNEL);
> +
> +	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma6"; }
> +
> +static void
>  xprt_rdma_format_addresses(struct rpc_xprt *xprt)  {
>  	struct sockaddr *sap = (struct sockaddr *)
>  					&rpcx_to_rdmad(xprt).addr;
> -	struct sockaddr_in *sin = (struct sockaddr_in *)sap;
> -	char buf[64];
> +	char buf[128];
> +
> +	switch (sap->sa_family) {
> +	case AF_INET:
> +		xprt_rdma_format_addresses4(xprt, sap);
> +		break;
> +	case AF_INET6:
> +		xprt_rdma_format_addresses6(xprt, sap);
> +		break;
> +	default:
> +		pr_err("rpcrdma: Unrecognized address family\n");
> +		return;
> +	}
> 
>  	(void)rpc_ntop(sap, buf, sizeof(buf));
>  	xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf,
> GFP_KERNEL); @@ -170,16 +205,10 @@ xprt_rdma_format_addresses(struct
> rpc_xprt *xprt)
>  	snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
>  	xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf,
> GFP_KERNEL);
> 
> -	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
> -
> -	snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
> -	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf,
> GFP_KERNEL);
> -
>  	snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
>  	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf,
> GFP_KERNEL);
> 
> -	/* netid */
> -	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
> +	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
>  }
> 
>  static void
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index
> 124676c..1aa55b7 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -50,6 +50,7 @@
>  #include <linux/interrupt.h>
>  #include <linux/slab.h>
>  #include <linux/prefetch.h>
> +#include <linux/sunrpc/addr.h>
>  #include <asm/bitops.h>
> 
>  #include "xprt_rdma.h"
> @@ -424,7 +425,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct
> rdma_cm_event *event)
>  	struct rpcrdma_ia *ia = &xprt->rx_ia;
>  	struct rpcrdma_ep *ep = &xprt->rx_ep;
>  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
> -	struct sockaddr_in *addr = (struct sockaddr_in *) &ep-
> >rep_remote_addr;
> +	struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
>  #endif
>  	struct ib_qp_attr *attr = &ia->ri_qp_attr;
>  	struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; @@ -480,9 +481,8
> @@ connected:
>  		wake_up_all(&ep->rep_connect_wait);
>  		/*FALLTHROUGH*/
>  	default:
> -		dprintk("RPC:       %s: %pI4:%u (ep 0x%p): %s\n",
> -			__func__, &addr->sin_addr.s_addr,
> -			ntohs(addr->sin_port), ep,
> +		dprintk("RPC:       %s: %pIS:%u (ep 0x%p): %s\n",
> +			__func__, sap, rpc_get_port(sap), ep,
>  			CONNECTION_MSG(event->event));
>  		break;
>  	}
> @@ -491,19 +491,16 @@ connected:
>  	if (connstate == 1) {
>  		int ird = attr->max_dest_rd_atomic;
>  		int tird = ep->rep_remote_cma.responder_resources;
> -		printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
> -			"on %s, memreg %d slots %d ird %d%s\n",
> -			&addr->sin_addr.s_addr,
> -			ntohs(addr->sin_port),
> +
> +		pr_info("rpcrdma: connection to %pIS:%u on %s, memreg %d
> slots %d ird %d%s\n",
> +			sap, rpc_get_port(sap),
>  			ia->ri_id->device->name,
>  			ia->ri_memreg_strategy,
>  			xprt->rx_buf.rb_max_requests,
>  			ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
>  	} else if (connstate < 0) {
> -		printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed
> (%d)\n",
> -			&addr->sin_addr.s_addr,
> -			ntohs(addr->sin_port),
> -			connstate);
> +		pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
> +			sap, rpc_get_port(sap), connstate);
>  	}
>  #endif
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body
> of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html
N‹§²æìr¸›yúèšØb²X¬¶Ç§vØ^–)Þº{.nÇ+‰·¥Š{±­ÙšŠ{ayº\x1dʇڙë,j\a­¢f£¢·hš‹»öì\x17/oSc¾™Ú³9˜uÀ¦æå‰È&jw¨®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿïêäz¹Þ–Šàþf£¢·hšˆ§~ˆmš

^ permalink raw reply	[flat|nested] 52+ messages in thread

* RE: [PATCH v1 06/16] xprtrdma: Add a "deregister_external" op for each memreg mode
       [not found]     ` <20150313212728.22783.11821.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-16 10:34       ` Sagi Grimberg
@ 2015-03-24 11:12       ` Devesh Sharma
       [not found]         ` <EE7902D3F51F404C82415C4803930ACD5DC3A9C4-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
  1 sibling, 1 reply; 52+ messages in thread
From: Devesh Sharma @ 2015-03-24 11:12 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 11644 bytes --]

> -----Original Message-----
> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
> owner@vger.kernel.org] On Behalf Of Chuck Lever
> Sent: Saturday, March 14, 2015 2:57 AM
> To: linux-rdma@vger.kernel.org
> Subject: [PATCH v1 06/16] xprtrdma: Add a "deregister_external" op for each
> memreg mode
> 
> There is very little common processing among the different external memory
> deregistration functions.
> 
> In addition, instead of calling the deregistration function for each segment,
> have one call release all segments for a request. This makes the API a little
> asymmetrical, but a hair faster.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  net/sunrpc/xprtrdma/fmr_ops.c      |   37 ++++++++++++++++
>  net/sunrpc/xprtrdma/frwr_ops.c     |   46 ++++++++++++++++++++
>  net/sunrpc/xprtrdma/physical_ops.c |   13 ++++++
>  net/sunrpc/xprtrdma/rpc_rdma.c     |    7 +--
>  net/sunrpc/xprtrdma/transport.c    |    8 +---
>  net/sunrpc/xprtrdma/verbs.c        |   81 ------------------------------------
>  net/sunrpc/xprtrdma/xprt_rdma.h    |    5 +-
>  7 files changed, 103 insertions(+), 94 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index 45fb646..9b983b4 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -20,6 +20,32 @@
>  /* Maximum scatter/gather per FMR */
>  #define RPCRDMA_MAX_FMR_SGES	(64)
> 
> +/* Use the ib_unmap_fmr() verb to prevent further remote
> + * access via RDMA READ or RDMA WRITE.
> + */
> +static int
> +__fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) {
> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> +	struct rpcrdma_mr_seg *seg1 = seg;
> +	int rc, nsegs = seg->mr_nsegs;
> +	LIST_HEAD(l);
> +
> +	list_add(&seg1->rl_mw->r.fmr->list, &l);
> +	rc = ib_unmap_fmr(&l);
> +	read_lock(&ia->ri_qplock);
> +	while (seg1->mr_nsegs--)
> +		rpcrdma_unmap_one(ia, seg++);
> +	read_unlock(&ia->ri_qplock);
> +	if (rc)
> +		goto out_err;
> +	return nsegs;
> +
> +out_err:
> +	dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
> +	return nsegs;
> +}
> +
>  /* FMR mode conveys up to 64 pages of payload per chunk segment.
>   */
>  static size_t
> @@ -79,8 +105,19 @@ out_maperr:
>  	return rc;
>  }
> 
> +static void
> +fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> +	     unsigned int count)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; count--;)
> +		i += __fmr_unmap(r_xprt, &req->rl_segments[i]); }
> +
>  const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>  	.ro_map				= fmr_op_map,
> +	.ro_unmap			= fmr_op_unmap,
>  	.ro_maxpages			= fmr_op_maxpages,
>  	.ro_displayname			= "fmr",
>  };
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 2b5ccb0..05b5761 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -17,6 +17,41 @@
>  # define RPCDBG_FACILITY	RPCDBG_TRANS
>  #endif
> 
> +/* Post a LOCAL_INV Work Request to prevent further remote access
> + * via RDMA READ or RDMA WRITE.
> + */
> +static int
> +__frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) {
> +	struct rpcrdma_mr_seg *seg1 = seg;
> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> +	struct ib_send_wr invalidate_wr, *bad_wr;
> +	int rc, nsegs = seg->mr_nsegs;
> +
> +	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
> +
> +	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
> +	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
> +	invalidate_wr.opcode = IB_WR_LOCAL_INV;
> +	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
> +	DECR_CQCOUNT(&r_xprt->rx_ep);
> +
> +	read_lock(&ia->ri_qplock);
> +	while (seg1->mr_nsegs--)
> +		rpcrdma_unmap_one(ia, seg++);
> +	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
> +	read_unlock(&ia->ri_qplock);
> +	if (rc)
> +		goto out_err;
> +	return nsegs;
> +
> +out_err:
> +	/* Force rpcrdma_buffer_get() to retry */
> +	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
> +	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
> +	return nsegs;
> +}
> +
>  /* FRWR mode conveys a list of pages per chunk segment. The
>   * maximum length of that list is the FRWR page list depth.
>   */
> @@ -116,8 +151,19 @@ out_err:
>  	return rc;
>  }
> 
> +static void
> +frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> +	      unsigned int count)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; count--;)
> +		i += __frwr_unmap(r_xprt, &req->rl_segments[i]); }
> +
>  const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>  	.ro_map				= frwr_op_map,
> +	.ro_unmap			= frwr_op_unmap,
>  	.ro_maxpages			= frwr_op_maxpages,
>  	.ro_displayname			= "frwr",
>  };
> diff --git a/net/sunrpc/xprtrdma/physical_ops.c
> b/net/sunrpc/xprtrdma/physical_ops.c
> index 5a284ee..f2c15be 100644
> --- a/net/sunrpc/xprtrdma/physical_ops.c
> +++ b/net/sunrpc/xprtrdma/physical_ops.c
> @@ -44,8 +44,21 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct
> rpcrdma_mr_seg *seg,
>  	return 1;
>  }
> 
> +/* Unmap a memory region, but leave it registered.
> + */
> +static void
> +physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> +		  unsigned int count)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < count; i++)
> +		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]); }
> +
>  const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>  	.ro_map				= physical_op_map,
> +	.ro_unmap			= physical_op_unmap,
>  	.ro_maxpages			= physical_op_maxpages,
>  	.ro_displayname			= "physical",
>  };
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c
> b/net/sunrpc/xprtrdma/rpc_rdma.c index 6ab1d03..7b51d9d 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -284,11 +284,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct
> xdr_buf *target,
>  	return (unsigned char *)iptr - (unsigned char *)headerp;
> 
>  out:
> -	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
> -		for (pos = 0; nchunks--;)
> -			pos += rpcrdma_deregister_external(
> -					&req->rl_segments[pos], r_xprt);
> -	}
> +	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR)
> +		r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, nchunks);

Do we still need to check for memreg_strategy in this exit path? Am I missing something..?

>  	return n;
>  }
> 
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index 9a9da40..c484671 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -571,7 +571,6 @@ xprt_rdma_free(void *buffer)
>  	struct rpcrdma_req *req;
>  	struct rpcrdma_xprt *r_xprt;
>  	struct rpcrdma_regbuf *rb;
> -	int i;
> 
>  	if (buffer == NULL)
>  		return;
> @@ -582,11 +581,8 @@ xprt_rdma_free(void *buffer)
> 
>  	dprintk("RPC:       %s: called on 0x%p\n", __func__, req->rl_reply);
> 
> -	for (i = 0; req->rl_nchunks;) {
> -		--req->rl_nchunks;
> -		i += rpcrdma_deregister_external(
> -			&req->rl_segments[i], r_xprt);
> -	}
> +	r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, req->rl_nchunks);
> +	req->rl_nchunks = 0;
> 
>  	rpcrdma_buffer_put(req);
>  }
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index
> 851ed97..a1621fd 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1509,7 +1509,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req
> *req, struct rpcrdma_buffer *buf)
>  	}
>  }
> 
> -/* rpcrdma_unmap_one() was already done by
> rpcrdma_deregister_frmr_external().
> +/* rpcrdma_unmap_one() was already done during deregistration.
>   * Redo only the ib_post_send().
>   */
>  static void
> @@ -1889,85 +1889,6 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct
> rpcrdma_mr_seg *seg)
>  				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
> }
> 
> -static int
> -rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
> -			struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
> -{
> -	struct rpcrdma_mr_seg *seg1 = seg;
> -	struct ib_send_wr invalidate_wr, *bad_wr;
> -	int rc;
> -
> -	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
> -
> -	memset(&invalidate_wr, 0, sizeof invalidate_wr);
> -	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
> -	invalidate_wr.opcode = IB_WR_LOCAL_INV;
> -	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
> -	DECR_CQCOUNT(&r_xprt->rx_ep);
> -
> -	read_lock(&ia->ri_qplock);
> -	while (seg1->mr_nsegs--)
> -		rpcrdma_unmap_one(ia, seg++);
> -	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
> -	read_unlock(&ia->ri_qplock);
> -	if (rc) {
> -		/* Force rpcrdma_buffer_get() to retry */
> -		seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
> -		dprintk("RPC:       %s: failed ib_post_send for invalidate,"
> -			" status %i\n", __func__, rc);
> -	}
> -	return rc;
> -}
> -
> -static int
> -rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
> -			struct rpcrdma_ia *ia)
> -{
> -	struct rpcrdma_mr_seg *seg1 = seg;
> -	LIST_HEAD(l);
> -	int rc;
> -
> -	list_add(&seg1->rl_mw->r.fmr->list, &l);
> -	rc = ib_unmap_fmr(&l);
> -	read_lock(&ia->ri_qplock);
> -	while (seg1->mr_nsegs--)
> -		rpcrdma_unmap_one(ia, seg++);
> -	read_unlock(&ia->ri_qplock);
> -	if (rc)
> -		dprintk("RPC:       %s: failed ib_unmap_fmr,"
> -			" status %i\n", __func__, rc);
> -	return rc;
> -}
> -
> -int
> -rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
> -		struct rpcrdma_xprt *r_xprt)
> -{
> -	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> -	int nsegs = seg->mr_nsegs, rc;
> -
> -	switch (ia->ri_memreg_strategy) {
> -
> -	case RPCRDMA_ALLPHYSICAL:
> -		read_lock(&ia->ri_qplock);
> -		rpcrdma_unmap_one(ia, seg);
> -		read_unlock(&ia->ri_qplock);
> -		break;
> -
> -	case RPCRDMA_FRMR:
> -		rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
> -		break;
> -
> -	case RPCRDMA_MTHCAFMR:
> -		rc = rpcrdma_deregister_fmr_external(seg, ia);
> -		break;
> -
> -	default:
> -		break;
> -	}
> -	return nsegs;
> -}
> -
>  /*
>   * Prepost any receive buffer, then post send.
>   *
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h
> b/net/sunrpc/xprtrdma/xprt_rdma.h index 7bf077b..3aabbb2 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -338,6 +338,8 @@ struct rpcrdma_xprt;  struct rpcrdma_memreg_ops {
>  	int		(*ro_map)(struct rpcrdma_xprt *,
>  				  struct rpcrdma_mr_seg *, int, bool);
> +	void		(*ro_unmap)(struct rpcrdma_xprt *,
> +				    struct rpcrdma_req *, unsigned int);
>  	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>  	const char	*ro_displayname;
>  };
> @@ -405,9 +407,6 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);  void
> rpcrdma_recv_buffer_get(struct rpcrdma_req *);  void
> rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
> 
> -int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
> -				struct rpcrdma_xprt *);
> -
>  struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
>  					    size_t, gfp_t);
>  void rpcrdma_free_regbuf(struct rpcrdma_ia *,
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body
> of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html
N‹§²æìr¸›yúèšØb²X¬¶Ç§vØ^–)Þº{.nÇ+‰·¥Š{±­ÙšŠ{ayº\x1dʇڙë,j\a­¢f£¢·hš‹»öì\x17/oSc¾™Ú³9˜uÀ¦æå‰È&jw¨®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿïêäz¹Þ–Šàþf£¢·hšˆ§~ˆmš

^ permalink raw reply	[flat|nested] 52+ messages in thread

* RE: [PATCH v1 08/16] xprtrdma: Add "reset MRs" memreg op
       [not found]     ` <20150313212747.22783.98300.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
  2015-03-16 10:38       ` Sagi Grimberg
@ 2015-03-24 11:27       ` Devesh Sharma
       [not found]         ` <EE7902D3F51F404C82415C4803930ACD5DC3A9E3-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
  1 sibling, 1 reply; 52+ messages in thread
From: Devesh Sharma @ 2015-03-24 11:27 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 9802 bytes --]

> -----Original Message-----
> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
> owner@vger.kernel.org] On Behalf Of Chuck Lever
> Sent: Saturday, March 14, 2015 2:58 AM
> To: linux-rdma@vger.kernel.org
> Subject: [PATCH v1 08/16] xprtrdma: Add "reset MRs" memreg op
> 
> This method is invoked when a transport instance is about to be reconnected.
> Each Memory Region object is reset to its initial state.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  net/sunrpc/xprtrdma/fmr_ops.c      |   23 ++++++++
>  net/sunrpc/xprtrdma/frwr_ops.c     |   46 ++++++++++++++++
>  net/sunrpc/xprtrdma/physical_ops.c |    6 ++
>  net/sunrpc/xprtrdma/verbs.c        |  103 +-----------------------------------
>  net/sunrpc/xprtrdma/xprt_rdma.h    |    1
>  5 files changed, 78 insertions(+), 101 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index 1501db0..1ccb3de 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -156,10 +156,33 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct
> rpcrdma_req *req,
>  		i += __fmr_unmap(r_xprt, &req->rl_segments[i]);  }
> 
> +/* After a disconnect, unmap all FMRs.
> + *
> + * This is invoked only in the transport connect worker in order
> + * to serialize with rpcrdma_register_fmr_external().
> + */
> +static void
> +fmr_op_reset(struct rpcrdma_xprt *r_xprt) {
> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
> +	struct rpcrdma_mw *r;
> +	LIST_HEAD(list);
> +	int rc;
> +
> +	list_for_each_entry(r, &buf->rb_all, mw_all)
> +		list_add(&r->r.fmr->list, &list);
> +
> +	rc = ib_unmap_fmr(&list);
> +	if (rc)
> +		dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
> +			__func__, rc);
> +}
> +
>  const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>  	.ro_map				= fmr_op_map,
>  	.ro_unmap			= fmr_op_unmap,
>  	.ro_maxpages			= fmr_op_maxpages,
>  	.ro_init			= fmr_op_init,
> +	.ro_reset			= fmr_op_reset,
>  	.ro_displayname			= "fmr",
>  };
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 975372c..b4ce0e5 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -81,6 +81,18 @@ out_err:
>  	return nsegs;
>  }
> 
> +static void
> +__frwr_release(struct rpcrdma_mw *r)
> +{
> +	int rc;
> +
> +	rc = ib_dereg_mr(r->r.frmr.fr_mr);
> +	if (rc)
> +		dprintk("RPC:       %s: ib_dereg_mr status %i\n",
> +			__func__, rc);
> +	ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
> +}
> +
>  /* FRWR mode conveys a list of pages per chunk segment. The
>   * maximum length of that list is the FRWR page list depth.
>   */
> @@ -226,10 +238,44 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct
> rpcrdma_req *req,
>  		i += __frwr_unmap(r_xprt, &req->rl_segments[i]);  }
> 
> +/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
> + * an unusable state. Find FRMRs in this state and dereg / reg
> + * each.  FRMRs that are VALID and attached to an rpcrdma_req are
> + * also torn down.
> + *
> + * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
> + *
> + * This is invoked only in the transport connect worker in order
> + * to serialize with rpcrdma_register_frmr_external().
> + */
> +static void
> +frwr_op_reset(struct rpcrdma_xprt *r_xprt) {
> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
> +	struct ib_device *device = r_xprt->rx_ia.ri_id->device;
> +	unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
> +	struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
> +	struct rpcrdma_mw *r;
> +	int rc;
> +
> +	list_for_each_entry(r, &buf->rb_all, mw_all) {
> +		if (r->r.frmr.fr_state == FRMR_IS_INVALID)
> +			continue;
> +
> +		__frwr_release(r);
> +		rc = __frwr_init(r, pd, device, depth);
> +		if (rc)
> +			continue;

Should we print something here e.g. "failed to allocate frmr, mount will work with less number of frmr, performance hit is expected"?

> +
> +		r->r.frmr.fr_state = FRMR_IS_INVALID;
> +	}
> +}
> +
>  const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>  	.ro_map				= frwr_op_map,
>  	.ro_unmap			= frwr_op_unmap,
>  	.ro_maxpages			= frwr_op_maxpages,
>  	.ro_init			= frwr_op_init,
> +	.ro_reset			= frwr_op_reset,
>  	.ro_displayname			= "frwr",
>  };
> diff --git a/net/sunrpc/xprtrdma/physical_ops.c
> b/net/sunrpc/xprtrdma/physical_ops.c
> index ae2b0bc..0afc691 100644
> --- a/net/sunrpc/xprtrdma/physical_ops.c
> +++ b/net/sunrpc/xprtrdma/physical_ops.c
> @@ -62,10 +62,16 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct
> rpcrdma_req *req,
>  		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]);  }
> 
> +static void
> +physical_op_reset(struct rpcrdma_xprt *r_xprt) { }
> +
>  const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>  	.ro_map				= physical_op_map,
>  	.ro_unmap			= physical_op_unmap,
>  	.ro_maxpages			= physical_op_maxpages,
>  	.ro_init			= physical_op_init,
> +	.ro_reset			= physical_op_reset,
>  	.ro_displayname			= "physical",
>  };
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index
> d7810d6..e17d91a 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -63,9 +63,6 @@
>  # define RPCDBG_FACILITY	RPCDBG_TRANS
>  #endif
> 
> -static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); -static void
> rpcrdma_reset_fmrs(struct rpcrdma_ia *);
> -
>  /*
>   * internal functions
>   */
> @@ -944,21 +941,9 @@ retry:
>  		rpcrdma_ep_disconnect(ep, ia);
>  		rpcrdma_flush_cqs(ep);
> 
> -		switch (ia->ri_memreg_strategy) {
> -		case RPCRDMA_FRMR:
> -			rpcrdma_reset_frmrs(ia);
> -			break;
> -		case RPCRDMA_MTHCAFMR:
> -			rpcrdma_reset_fmrs(ia);
> -			break;
> -		case RPCRDMA_ALLPHYSICAL:
> -			break;
> -		default:
> -			rc = -EIO;
> -			goto out;
> -		}
> -
>  		xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
> +		ia->ri_ops->ro_reset(xprt);
> +
>  		id = rpcrdma_create_id(xprt, ia,
>  				(struct sockaddr *)&xprt->rx_data.addr);
>  		if (IS_ERR(id)) {
> @@ -1288,90 +1273,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer
> *buf)
>  	kfree(buf->rb_pool);
>  }
> 
> -/* After a disconnect, unmap all FMRs.
> - *
> - * This is invoked only in the transport connect worker in order
> - * to serialize with rpcrdma_register_fmr_external().
> - */
> -static void
> -rpcrdma_reset_fmrs(struct rpcrdma_ia *ia) -{
> -	struct rpcrdma_xprt *r_xprt =
> -				container_of(ia, struct rpcrdma_xprt, rx_ia);
> -	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
> -	struct list_head *pos;
> -	struct rpcrdma_mw *r;
> -	LIST_HEAD(l);
> -	int rc;
> -
> -	list_for_each(pos, &buf->rb_all) {
> -		r = list_entry(pos, struct rpcrdma_mw, mw_all);
> -
> -		INIT_LIST_HEAD(&l);
> -		list_add(&r->r.fmr->list, &l);
> -		rc = ib_unmap_fmr(&l);
> -		if (rc)
> -			dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
> -				__func__, rc);
> -	}
> -}
> -
> -/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
> - * an unusable state. Find FRMRs in this state and dereg / reg
> - * each.  FRMRs that are VALID and attached to an rpcrdma_req are
> - * also torn down.
> - *
> - * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
> - *
> - * This is invoked only in the transport connect worker in order
> - * to serialize with rpcrdma_register_frmr_external().
> - */
> -static void
> -rpcrdma_reset_frmrs(struct rpcrdma_ia *ia) -{
> -	struct rpcrdma_xprt *r_xprt =
> -				container_of(ia, struct rpcrdma_xprt, rx_ia);
> -	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
> -	struct list_head *pos;
> -	struct rpcrdma_mw *r;
> -	int rc;
> -
> -	list_for_each(pos, &buf->rb_all) {
> -		r = list_entry(pos, struct rpcrdma_mw, mw_all);
> -
> -		if (r->r.frmr.fr_state == FRMR_IS_INVALID)
> -			continue;
> -
> -		rc = ib_dereg_mr(r->r.frmr.fr_mr);
> -		if (rc)
> -			dprintk("RPC:       %s: ib_dereg_mr failed %i\n",
> -				__func__, rc);
> -		ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
> -
> -		r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
> -					ia->ri_max_frmr_depth);
> -		if (IS_ERR(r->r.frmr.fr_mr)) {
> -			rc = PTR_ERR(r->r.frmr.fr_mr);
> -			dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
> -				" failed %i\n", __func__, rc);
> -			continue;
> -		}
> -		r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
> -					ia->ri_id->device,
> -					ia->ri_max_frmr_depth);
> -		if (IS_ERR(r->r.frmr.fr_pgl)) {
> -			rc = PTR_ERR(r->r.frmr.fr_pgl);
> -			dprintk("RPC:       %s: "
> -				"ib_alloc_fast_reg_page_list "
> -				"failed %i\n", __func__, rc);
> -
> -			ib_dereg_mr(r->r.frmr.fr_mr);
> -			continue;
> -		}
> -		r->r.frmr.fr_state = FRMR_IS_INVALID;
> -	}
> -}
> -
>  /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
>   * some req segments uninitialized.
>   */
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h
> b/net/sunrpc/xprtrdma/xprt_rdma.h index 4fe3c38..cdf6763 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -342,6 +342,7 @@ struct rpcrdma_memreg_ops {
>  				    struct rpcrdma_req *, unsigned int);
>  	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>  	int		(*ro_init)(struct rpcrdma_xprt *);
> +	void		(*ro_reset)(struct rpcrdma_xprt *);
>  	const char	*ro_displayname;
>  };
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body
> of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html
N‹§²æìr¸›yúèšØb²X¬¶Ç§vØ^–)Þº{.nÇ+‰·¥Š{±­ÙšŠ{ayº\x1dʇڙë,j\a­¢f£¢·hš‹»öì\x17/oSc¾™Ú³9˜uÀ¦æå‰È&jw¨®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿïêäz¹Þ–Šàþf£¢·hšˆ§~ˆmš

^ permalink raw reply	[flat|nested] 52+ messages in thread

* RE: [PATCH v1 10/16] xprtrdma: Add "open" memreg op
       [not found]     ` <20150313212807.22783.61881.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
@ 2015-03-24 11:34       ` Devesh Sharma
       [not found]         ` <EE7902D3F51F404C82415C4803930ACD5DC3A9FF-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
  0 siblings, 1 reply; 52+ messages in thread
From: Devesh Sharma @ 2015-03-24 11:34 UTC (permalink / raw)
  To: Chuck Lever, linux-rdma-u79uwXL29TY76Z2rM5mHXA

> -----Original Message-----
> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
> owner@vger.kernel.org] On Behalf Of Chuck Lever
> Sent: Saturday, March 14, 2015 2:58 AM
> To: linux-rdma@vger.kernel.org
> Subject: [PATCH v1 10/16] xprtrdma: Add "open" memreg op
> 
> The open op determines the size of various transport data structures based on
> device capabilities and memory registration mode.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  net/sunrpc/xprtrdma/fmr_ops.c      |   22 +++++++++++++
>  net/sunrpc/xprtrdma/frwr_ops.c     |   60
> ++++++++++++++++++++++++++++++++++++
>  net/sunrpc/xprtrdma/physical_ops.c |   22 +++++++++++++
>  net/sunrpc/xprtrdma/verbs.c        |   54 ++------------------------------
>  net/sunrpc/xprtrdma/xprt_rdma.h    |    3 ++
>  5 files changed, 110 insertions(+), 51 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index 3115e4b..96e6cd3 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -46,6 +46,27 @@ out_err:
>  	return nsegs;
>  }
> 
> +static int
> +fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
> +	    struct rpcrdma_create_data_internal *cdata) {
> +	struct ib_device_attr *devattr = &ia->ri_devattr;
> +	unsigned int wrs, max_wrs;
> +
> +	max_wrs = devattr->max_qp_wr;
> +	if (cdata->max_requests > max_wrs)
> +		cdata->max_requests = max_wrs;
> +
> +	wrs = cdata->max_requests;
> +	ep->rep_attr.cap.max_send_wr = wrs;
> +	ep->rep_attr.cap.max_recv_wr = wrs;
> +
> +	dprintk("RPC:       %s: pre-allocating %u send WRs, %u recv WRs\n",
> +		__func__, ep->rep_attr.cap.max_send_wr,
> +		ep->rep_attr.cap.max_recv_wr);
> +	return 0;
> +}
> +
>  /* FMR mode conveys up to 64 pages of payload per chunk segment.
>   */
>  static size_t
> @@ -201,6 +222,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)  const
> struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>  	.ro_map				= fmr_op_map,
>  	.ro_unmap			= fmr_op_unmap,
> +	.ro_open			= fmr_op_open,
>  	.ro_maxpages			= fmr_op_maxpages,
>  	.ro_init			= fmr_op_init,
>  	.ro_reset			= fmr_op_reset,
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index fc3a228..9bb4b2d 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -93,6 +93,65 @@ __frwr_release(struct rpcrdma_mw *r)
>  	ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
>  }
> 
> +static int
> +frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
> +	     struct rpcrdma_create_data_internal *cdata) {
> +	struct ib_device_attr *devattr = &ia->ri_devattr;
> +	unsigned int wrs, max_wrs;
> +	int depth = 7;
> +
> +	max_wrs = devattr->max_qp_wr;
> +	if (cdata->max_requests > max_wrs)
> +		cdata->max_requests = max_wrs;
> +
> +	wrs = cdata->max_requests;
> +	ep->rep_attr.cap.max_send_wr = wrs;
> +	ep->rep_attr.cap.max_recv_wr = wrs;
> +
> +	ia->ri_max_frmr_depth =
> +			min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
> +			      devattr->max_fast_reg_page_list_len);
> +	dprintk("RPC:       %s: device's max FR page list len = %u\n",
> +		__func__, ia->ri_max_frmr_depth);
> +
> +	/* Add room for frmr register and invalidate WRs.
> +	 * 1. FRMR reg WR for head
> +	 * 2. FRMR invalidate WR for head
> +	 * 3. N FRMR reg WRs for pagelist
> +	 * 4. N FRMR invalidate WRs for pagelist
> +	 * 5. FRMR reg WR for tail
> +	 * 6. FRMR invalidate WR for tail
> +	 * 7. The RDMA_SEND WR
> +	 */
> +
> +	/* Calculate N if the device max FRMR depth is smaller than
> +	 * RPCRDMA_MAX_DATA_SEGS.
> +	 */
> +	if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
> +		int delta = RPCRDMA_MAX_DATA_SEGS - ia-
> >ri_max_frmr_depth;
> +
> +		do {
> +			depth += 2; /* FRMR reg + invalidate */
> +			delta -= ia->ri_max_frmr_depth;
> +		} while (delta > 0);

Please add a check on ia->ri_max_frmr_depth to check it non-zero. A bug in provider (if it reports max_frmr_depth = 0 in query_device), would form an infinite loop here and mount will be stuck.

> +	}
> +
> +	ep->rep_attr.cap.max_send_wr *= depth;
> +	if (ep->rep_attr.cap.max_send_wr > max_wrs) {
> +		cdata->max_requests = max_wrs / depth;
> +		if (!cdata->max_requests)
> +			return -EINVAL;
> +		ep->rep_attr.cap.max_send_wr = cdata->max_requests *
> +					       depth;
> +	}
> +
> +	dprintk("RPC:       %s: pre-allocating %u send WRs, %u recv WRs\n",
> +		__func__, ep->rep_attr.cap.max_send_wr,
> +		ep->rep_attr.cap.max_recv_wr);
> +	return 0;
> +}
> +
>  /* FRWR mode conveys a list of pages per chunk segment. The
>   * maximum length of that list is the FRWR page list depth.
>   */
> @@ -290,6 +349,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf)  const
> struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>  	.ro_map				= frwr_op_map,
>  	.ro_unmap			= frwr_op_unmap,
> +	.ro_open			= frwr_op_open,
>  	.ro_maxpages			= frwr_op_maxpages,
>  	.ro_init			= frwr_op_init,
>  	.ro_reset			= frwr_op_reset,
> diff --git a/net/sunrpc/xprtrdma/physical_ops.c
> b/net/sunrpc/xprtrdma/physical_ops.c
> index f8da8c4..0998f4f 100644
> --- a/net/sunrpc/xprtrdma/physical_ops.c
> +++ b/net/sunrpc/xprtrdma/physical_ops.c
> @@ -19,6 +19,27 @@
>  # define RPCDBG_FACILITY	RPCDBG_TRANS
>  #endif
> 
> +static int
> +physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
> +		 struct rpcrdma_create_data_internal *cdata) {
> +	struct ib_device_attr *devattr = &ia->ri_devattr;
> +	unsigned int wrs, max_wrs;
> +
> +	max_wrs = devattr->max_qp_wr;
> +	if (cdata->max_requests > max_wrs)
> +		cdata->max_requests = max_wrs;
> +
> +	wrs = cdata->max_requests;
> +	ep->rep_attr.cap.max_send_wr = wrs;
> +	ep->rep_attr.cap.max_recv_wr = wrs;
> +
> +	dprintk("RPC:       %s: pre-allocating %u send WRs, %u recv WRs\n",
> +		__func__, ep->rep_attr.cap.max_send_wr,
> +		ep->rep_attr.cap.max_recv_wr);
> +	return 0;
> +}
> +
>  /* PHYSICAL memory registration conveys one page per chunk segment.
>   */
>  static size_t
> @@ -75,6 +96,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf)  const
> struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>  	.ro_map				= physical_op_map,
>  	.ro_unmap			= physical_op_unmap,
> +	.ro_open			= physical_op_open,
>  	.ro_maxpages			= physical_op_maxpages,
>  	.ro_init			= physical_op_init,
>  	.ro_reset			= physical_op_reset,
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index
> dcbc736..17b2a29 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -621,11 +621,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct
> sockaddr *addr, int memreg)
>  			dprintk("RPC:       %s: FRMR registration "
>  				"not supported by HCA\n", __func__);
>  			memreg = RPCRDMA_MTHCAFMR;
> -		} else {
> -			/* Mind the ia limit on FRMR page list depth */
> -			ia->ri_max_frmr_depth = min_t(unsigned int,
> -				RPCRDMA_MAX_DATA_SEGS,
> -				devattr->max_fast_reg_page_list_len);
>  		}
>  	}
>  	if (memreg == RPCRDMA_MTHCAFMR) {
> @@ -734,56 +729,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct
> rpcrdma_ia *ia,
>  	struct ib_cq *sendcq, *recvcq;
>  	int rc, err;
> 
> -	/* check provider's send/recv wr limits */
> -	if (cdata->max_requests > devattr->max_qp_wr)
> -		cdata->max_requests = devattr->max_qp_wr;
> +	rc = ia->ri_ops->ro_open(ia, ep, cdata);
> +	if (rc)
> +		return rc;
> 
>  	ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
>  	ep->rep_attr.qp_context = ep;
> -	/* send_cq and recv_cq initialized below */
>  	ep->rep_attr.srq = NULL;
> -	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
> -	switch (ia->ri_memreg_strategy) {
> -	case RPCRDMA_FRMR: {
> -		int depth = 7;
> -
> -		/* Add room for frmr register and invalidate WRs.
> -		 * 1. FRMR reg WR for head
> -		 * 2. FRMR invalidate WR for head
> -		 * 3. N FRMR reg WRs for pagelist
> -		 * 4. N FRMR invalidate WRs for pagelist
> -		 * 5. FRMR reg WR for tail
> -		 * 6. FRMR invalidate WR for tail
> -		 * 7. The RDMA_SEND WR
> -		 */
> -
> -		/* Calculate N if the device max FRMR depth is smaller than
> -		 * RPCRDMA_MAX_DATA_SEGS.
> -		 */
> -		if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
> -			int delta = RPCRDMA_MAX_DATA_SEGS -
> -				    ia->ri_max_frmr_depth;
> -
> -			do {
> -				depth += 2; /* FRMR reg + invalidate */
> -				delta -= ia->ri_max_frmr_depth;
> -			} while (delta > 0);
> -
> -		}
> -		ep->rep_attr.cap.max_send_wr *= depth;
> -		if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
> -			cdata->max_requests = devattr->max_qp_wr / depth;
> -			if (!cdata->max_requests)
> -				return -EINVAL;
> -			ep->rep_attr.cap.max_send_wr = cdata-
> >max_requests *
> -						       depth;
> -		}
> -		break;
> -	}
> -	default:
> -		break;
> -	}
> -	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
>  	ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
>  	ep->rep_attr.cap.max_recv_sge = 1;
>  	ep->rep_attr.cap.max_inline_data = 0;
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h
> b/net/sunrpc/xprtrdma/xprt_rdma.h index a0e3c3e..a53a564 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -340,6 +340,9 @@ struct rpcrdma_memreg_ops {
>  				  struct rpcrdma_mr_seg *, int, bool);
>  	void		(*ro_unmap)(struct rpcrdma_xprt *,
>  				    struct rpcrdma_req *, unsigned int);
> +	int		(*ro_open)(struct rpcrdma_ia *,
> +				   struct rpcrdma_ep *,
> +				   struct rpcrdma_create_data_internal *);
>  	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>  	int		(*ro_init)(struct rpcrdma_xprt *);
>  	void		(*ro_reset)(struct rpcrdma_xprt *);
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body
> of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 01/16] xprtrdma: Display IPv6 addresses and port numbers correctly
       [not found]         ` <EE7902D3F51F404C82415C4803930ACD5DC3A952-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
@ 2015-03-24 14:43           ` Chuck Lever
  0 siblings, 0 replies; 52+ messages in thread
From: Chuck Lever @ 2015-03-24 14:43 UTC (permalink / raw)
  To: Devesh Sharma; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA


On Mar 24, 2015, at 4:27 AM, Devesh Sharma <Devesh.Sharma-iH1Dq9VlAzfQT0dZR+AlfA@public.gmane.org> wrote:

> I see in the svcrdma code, there is a big check to abort creating listener if AF is not AF_INET,
> Do we have plans to address this on the server as well?

No specific plans yet, but yes, the server needs IPv6 updates as well.


> static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
>                                        struct net *net,
>                                        struct sockaddr *sa, int salen,
>                                        int flags)
> {
>        struct rdma_cm_id *listen_id;
>        struct svcxprt_rdma *cma_xprt;
>        int ret;
> 
>        dprintk("svcrdma: Creating RDMA socket\n");
>        if (sa->sa_family != AF_INET) {
>                dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family);
>                return ERR_PTR(-EAFNOSUPPORT);
> 
> -Regards
> Devesh
> 
>> -----Original Message-----
>> From: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org [mailto:linux-rdma-
>> owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Chuck Lever
>> Sent: Saturday, March 14, 2015 2:57 AM
>> To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> Subject: [PATCH v1 01/16] xprtrdma: Display IPv6 addresses and port numbers
>> correctly
>> 
>> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> ---
>> net/sunrpc/xprtrdma/transport.c |   47
>> ++++++++++++++++++++++++++++++++-------
>> net/sunrpc/xprtrdma/verbs.c     |   21 +++++++----------
>> 2 files changed, 47 insertions(+), 21 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
>> index 2e192ba..26a62e7 100644
>> --- a/net/sunrpc/xprtrdma/transport.c
>> +++ b/net/sunrpc/xprtrdma/transport.c
>> @@ -157,12 +157,47 @@ static struct ctl_table sunrpc_table[] = {
>> static struct rpc_xprt_ops xprt_rdma_procs;	/* forward reference */
>> 
>> static void
>> +xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr
>> +*sap) {
>> +	struct sockaddr_in *sin = (struct sockaddr_in *)sap;
>> +	char buf[20];
>> +
>> +	snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
>> +	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf,
>> +GFP_KERNEL);
>> +
>> +	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; }
>> +
>> +static void
>> +xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr
>> +*sap) {
>> +	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
>> +	char buf[40];
>> +
>> +	snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
>> +	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf,
>> +GFP_KERNEL);
>> +
>> +	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma6"; }
>> +
>> +static void
>> xprt_rdma_format_addresses(struct rpc_xprt *xprt)  {
>> 	struct sockaddr *sap = (struct sockaddr *)
>> 					&rpcx_to_rdmad(xprt).addr;
>> -	struct sockaddr_in *sin = (struct sockaddr_in *)sap;
>> -	char buf[64];
>> +	char buf[128];
>> +
>> +	switch (sap->sa_family) {
>> +	case AF_INET:
>> +		xprt_rdma_format_addresses4(xprt, sap);
>> +		break;
>> +	case AF_INET6:
>> +		xprt_rdma_format_addresses6(xprt, sap);
>> +		break;
>> +	default:
>> +		pr_err("rpcrdma: Unrecognized address family\n");
>> +		return;
>> +	}
>> 
>> 	(void)rpc_ntop(sap, buf, sizeof(buf));
>> 	xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf,
>> GFP_KERNEL); @@ -170,16 +205,10 @@ xprt_rdma_format_addresses(struct
>> rpc_xprt *xprt)
>> 	snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
>> 	xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf,
>> GFP_KERNEL);
>> 
>> -	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
>> -
>> -	snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
>> -	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf,
>> GFP_KERNEL);
>> -
>> 	snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
>> 	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf,
>> GFP_KERNEL);
>> 
>> -	/* netid */
>> -	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
>> +	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
>> }
>> 
>> static void
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index
>> 124676c..1aa55b7 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -50,6 +50,7 @@
>> #include <linux/interrupt.h>
>> #include <linux/slab.h>
>> #include <linux/prefetch.h>
>> +#include <linux/sunrpc/addr.h>
>> #include <asm/bitops.h>
>> 
>> #include "xprt_rdma.h"
>> @@ -424,7 +425,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct
>> rdma_cm_event *event)
>> 	struct rpcrdma_ia *ia = &xprt->rx_ia;
>> 	struct rpcrdma_ep *ep = &xprt->rx_ep;
>> #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
>> -	struct sockaddr_in *addr = (struct sockaddr_in *) &ep-
>>> rep_remote_addr;
>> +	struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
>> #endif
>> 	struct ib_qp_attr *attr = &ia->ri_qp_attr;
>> 	struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; @@ -480,9 +481,8
>> @@ connected:
>> 		wake_up_all(&ep->rep_connect_wait);
>> 		/*FALLTHROUGH*/
>> 	default:
>> -		dprintk("RPC:       %s: %pI4:%u (ep 0x%p): %s\n",
>> -			__func__, &addr->sin_addr.s_addr,
>> -			ntohs(addr->sin_port), ep,
>> +		dprintk("RPC:       %s: %pIS:%u (ep 0x%p): %s\n",
>> +			__func__, sap, rpc_get_port(sap), ep,
>> 			CONNECTION_MSG(event->event));
>> 		break;
>> 	}
>> @@ -491,19 +491,16 @@ connected:
>> 	if (connstate == 1) {
>> 		int ird = attr->max_dest_rd_atomic;
>> 		int tird = ep->rep_remote_cma.responder_resources;
>> -		printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
>> -			"on %s, memreg %d slots %d ird %d%s\n",
>> -			&addr->sin_addr.s_addr,
>> -			ntohs(addr->sin_port),
>> +
>> +		pr_info("rpcrdma: connection to %pIS:%u on %s, memreg %d
>> slots %d ird %d%s\n",
>> +			sap, rpc_get_port(sap),
>> 			ia->ri_id->device->name,
>> 			ia->ri_memreg_strategy,
>> 			xprt->rx_buf.rb_max_requests,
>> 			ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
>> 	} else if (connstate < 0) {
>> -		printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed
>> (%d)\n",
>> -			&addr->sin_addr.s_addr,
>> -			ntohs(addr->sin_port),
>> -			connstate);
>> +		pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
>> +			sap, rpc_get_port(sap), connstate);
>> 	}
>> #endif
>> 
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body
>> of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at
>> http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 06/16] xprtrdma: Add a "deregister_external" op for each memreg mode
       [not found]         ` <EE7902D3F51F404C82415C4803930ACD5DC3A9C4-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
@ 2015-03-24 14:54           ` Chuck Lever
  0 siblings, 0 replies; 52+ messages in thread
From: Chuck Lever @ 2015-03-24 14:54 UTC (permalink / raw)
  To: Devesh Sharma; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA


On Mar 24, 2015, at 7:12 AM, Devesh Sharma <Devesh.Sharma-iH1Dq9VlAzfQT0dZR+AlfA@public.gmane.org> wrote:

>> -----Original Message-----
>> From: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org [mailto:linux-rdma-
>> owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Chuck Lever
>> Sent: Saturday, March 14, 2015 2:57 AM
>> To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> Subject: [PATCH v1 06/16] xprtrdma: Add a "deregister_external" op for each
>> memreg mode
>> 
>> There is very little common processing among the different external memory
>> deregistration functions.
>> 
>> In addition, instead of calling the deregistration function for each segment,
>> have one call release all segments for a request. This makes the API a little
>> asymmetrical, but a hair faster.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> ---
>> net/sunrpc/xprtrdma/fmr_ops.c      |   37 ++++++++++++++++
>> net/sunrpc/xprtrdma/frwr_ops.c     |   46 ++++++++++++++++++++
>> net/sunrpc/xprtrdma/physical_ops.c |   13 ++++++
>> net/sunrpc/xprtrdma/rpc_rdma.c     |    7 +--
>> net/sunrpc/xprtrdma/transport.c    |    8 +---
>> net/sunrpc/xprtrdma/verbs.c        |   81 ------------------------------------
>> net/sunrpc/xprtrdma/xprt_rdma.h    |    5 +-
>> 7 files changed, 103 insertions(+), 94 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
>> index 45fb646..9b983b4 100644
>> --- a/net/sunrpc/xprtrdma/fmr_ops.c
>> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
>> @@ -20,6 +20,32 @@
>> /* Maximum scatter/gather per FMR */
>> #define RPCRDMA_MAX_FMR_SGES	(64)
>> 
>> +/* Use the ib_unmap_fmr() verb to prevent further remote
>> + * access via RDMA READ or RDMA WRITE.
>> + */
>> +static int
>> +__fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) {
>> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> +	struct rpcrdma_mr_seg *seg1 = seg;
>> +	int rc, nsegs = seg->mr_nsegs;
>> +	LIST_HEAD(l);
>> +
>> +	list_add(&seg1->rl_mw->r.fmr->list, &l);
>> +	rc = ib_unmap_fmr(&l);
>> +	read_lock(&ia->ri_qplock);
>> +	while (seg1->mr_nsegs--)
>> +		rpcrdma_unmap_one(ia, seg++);
>> +	read_unlock(&ia->ri_qplock);
>> +	if (rc)
>> +		goto out_err;
>> +	return nsegs;
>> +
>> +out_err:
>> +	dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
>> +	return nsegs;
>> +}
>> +
>> /* FMR mode conveys up to 64 pages of payload per chunk segment.
>>  */
>> static size_t
>> @@ -79,8 +105,19 @@ out_maperr:
>> 	return rc;
>> }
>> 
>> +static void
>> +fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
>> +	     unsigned int count)
>> +{
>> +	unsigned int i;
>> +
>> +	for (i = 0; count--;)
>> +		i += __fmr_unmap(r_xprt, &req->rl_segments[i]); }
>> +
>> const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>> 	.ro_map				= fmr_op_map,
>> +	.ro_unmap			= fmr_op_unmap,
>> 	.ro_maxpages			= fmr_op_maxpages,
>> 	.ro_displayname			= "fmr",
>> };
>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
>> index 2b5ccb0..05b5761 100644
>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>> @@ -17,6 +17,41 @@
>> # define RPCDBG_FACILITY	RPCDBG_TRANS
>> #endif
>> 
>> +/* Post a LOCAL_INV Work Request to prevent further remote access
>> + * via RDMA READ or RDMA WRITE.
>> + */
>> +static int
>> +__frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) {
>> +	struct rpcrdma_mr_seg *seg1 = seg;
>> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> +	struct ib_send_wr invalidate_wr, *bad_wr;
>> +	int rc, nsegs = seg->mr_nsegs;
>> +
>> +	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
>> +
>> +	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
>> +	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
>> +	invalidate_wr.opcode = IB_WR_LOCAL_INV;
>> +	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
>> +	DECR_CQCOUNT(&r_xprt->rx_ep);
>> +
>> +	read_lock(&ia->ri_qplock);
>> +	while (seg1->mr_nsegs--)
>> +		rpcrdma_unmap_one(ia, seg++);
>> +	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
>> +	read_unlock(&ia->ri_qplock);
>> +	if (rc)
>> +		goto out_err;
>> +	return nsegs;
>> +
>> +out_err:
>> +	/* Force rpcrdma_buffer_get() to retry */
>> +	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
>> +	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
>> +	return nsegs;
>> +}
>> +
>> /* FRWR mode conveys a list of pages per chunk segment. The
>>  * maximum length of that list is the FRWR page list depth.
>>  */
>> @@ -116,8 +151,19 @@ out_err:
>> 	return rc;
>> }
>> 
>> +static void
>> +frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
>> +	      unsigned int count)
>> +{
>> +	unsigned int i;
>> +
>> +	for (i = 0; count--;)
>> +		i += __frwr_unmap(r_xprt, &req->rl_segments[i]); }
>> +
>> const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>> 	.ro_map				= frwr_op_map,
>> +	.ro_unmap			= frwr_op_unmap,
>> 	.ro_maxpages			= frwr_op_maxpages,
>> 	.ro_displayname			= "frwr",
>> };
>> diff --git a/net/sunrpc/xprtrdma/physical_ops.c
>> b/net/sunrpc/xprtrdma/physical_ops.c
>> index 5a284ee..f2c15be 100644
>> --- a/net/sunrpc/xprtrdma/physical_ops.c
>> +++ b/net/sunrpc/xprtrdma/physical_ops.c
>> @@ -44,8 +44,21 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct
>> rpcrdma_mr_seg *seg,
>> 	return 1;
>> }
>> 
>> +/* Unmap a memory region, but leave it registered.
>> + */
>> +static void
>> +physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
>> +		  unsigned int count)
>> +{
>> +	unsigned int i;
>> +
>> +	for (i = 0; i < count; i++)
>> +		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]); }
>> +
>> const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>> 	.ro_map				= physical_op_map,
>> +	.ro_unmap			= physical_op_unmap,
>> 	.ro_maxpages			= physical_op_maxpages,
>> 	.ro_displayname			= "physical",
>> };
>> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c
>> b/net/sunrpc/xprtrdma/rpc_rdma.c index 6ab1d03..7b51d9d 100644
>> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
>> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
>> @@ -284,11 +284,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct
>> xdr_buf *target,
>> 	return (unsigned char *)iptr - (unsigned char *)headerp;
>> 
>> out:
>> -	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
>> -		for (pos = 0; nchunks--;)
>> -			pos += rpcrdma_deregister_external(
>> -					&req->rl_segments[pos], r_xprt);
>> -	}
>> +	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR)
>> +		r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, nchunks);
> 
> Do we still need to check for memreg_strategy in this exit path? Am I missing something..?

The check is removed in a later patch.

> 
>> 	return n;
>> }
>> 
>> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
>> index 9a9da40..c484671 100644
>> --- a/net/sunrpc/xprtrdma/transport.c
>> +++ b/net/sunrpc/xprtrdma/transport.c
>> @@ -571,7 +571,6 @@ xprt_rdma_free(void *buffer)
>> 	struct rpcrdma_req *req;
>> 	struct rpcrdma_xprt *r_xprt;
>> 	struct rpcrdma_regbuf *rb;
>> -	int i;
>> 
>> 	if (buffer == NULL)
>> 		return;
>> @@ -582,11 +581,8 @@ xprt_rdma_free(void *buffer)
>> 
>> 	dprintk("RPC:       %s: called on 0x%p\n", __func__, req->rl_reply);
>> 
>> -	for (i = 0; req->rl_nchunks;) {
>> -		--req->rl_nchunks;
>> -		i += rpcrdma_deregister_external(
>> -			&req->rl_segments[i], r_xprt);
>> -	}
>> +	r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, req->rl_nchunks);
>> +	req->rl_nchunks = 0;
>> 
>> 	rpcrdma_buffer_put(req);
>> }
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index
>> 851ed97..a1621fd 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -1509,7 +1509,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req
>> *req, struct rpcrdma_buffer *buf)
>> 	}
>> }
>> 
>> -/* rpcrdma_unmap_one() was already done by
>> rpcrdma_deregister_frmr_external().
>> +/* rpcrdma_unmap_one() was already done during deregistration.
>>  * Redo only the ib_post_send().
>>  */
>> static void
>> @@ -1889,85 +1889,6 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct
>> rpcrdma_mr_seg *seg)
>> 				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
>> }
>> 
>> -static int
>> -rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
>> -			struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
>> -{
>> -	struct rpcrdma_mr_seg *seg1 = seg;
>> -	struct ib_send_wr invalidate_wr, *bad_wr;
>> -	int rc;
>> -
>> -	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
>> -
>> -	memset(&invalidate_wr, 0, sizeof invalidate_wr);
>> -	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
>> -	invalidate_wr.opcode = IB_WR_LOCAL_INV;
>> -	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
>> -	DECR_CQCOUNT(&r_xprt->rx_ep);
>> -
>> -	read_lock(&ia->ri_qplock);
>> -	while (seg1->mr_nsegs--)
>> -		rpcrdma_unmap_one(ia, seg++);
>> -	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
>> -	read_unlock(&ia->ri_qplock);
>> -	if (rc) {
>> -		/* Force rpcrdma_buffer_get() to retry */
>> -		seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
>> -		dprintk("RPC:       %s: failed ib_post_send for invalidate,"
>> -			" status %i\n", __func__, rc);
>> -	}
>> -	return rc;
>> -}
>> -
>> -static int
>> -rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
>> -			struct rpcrdma_ia *ia)
>> -{
>> -	struct rpcrdma_mr_seg *seg1 = seg;
>> -	LIST_HEAD(l);
>> -	int rc;
>> -
>> -	list_add(&seg1->rl_mw->r.fmr->list, &l);
>> -	rc = ib_unmap_fmr(&l);
>> -	read_lock(&ia->ri_qplock);
>> -	while (seg1->mr_nsegs--)
>> -		rpcrdma_unmap_one(ia, seg++);
>> -	read_unlock(&ia->ri_qplock);
>> -	if (rc)
>> -		dprintk("RPC:       %s: failed ib_unmap_fmr,"
>> -			" status %i\n", __func__, rc);
>> -	return rc;
>> -}
>> -
>> -int
>> -rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
>> -		struct rpcrdma_xprt *r_xprt)
>> -{
>> -	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> -	int nsegs = seg->mr_nsegs, rc;
>> -
>> -	switch (ia->ri_memreg_strategy) {
>> -
>> -	case RPCRDMA_ALLPHYSICAL:
>> -		read_lock(&ia->ri_qplock);
>> -		rpcrdma_unmap_one(ia, seg);
>> -		read_unlock(&ia->ri_qplock);
>> -		break;
>> -
>> -	case RPCRDMA_FRMR:
>> -		rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
>> -		break;
>> -
>> -	case RPCRDMA_MTHCAFMR:
>> -		rc = rpcrdma_deregister_fmr_external(seg, ia);
>> -		break;
>> -
>> -	default:
>> -		break;
>> -	}
>> -	return nsegs;
>> -}
>> -
>> /*
>>  * Prepost any receive buffer, then post send.
>>  *
>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h
>> b/net/sunrpc/xprtrdma/xprt_rdma.h index 7bf077b..3aabbb2 100644
>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>> @@ -338,6 +338,8 @@ struct rpcrdma_xprt;  struct rpcrdma_memreg_ops {
>> 	int		(*ro_map)(struct rpcrdma_xprt *,
>> 				  struct rpcrdma_mr_seg *, int, bool);
>> +	void		(*ro_unmap)(struct rpcrdma_xprt *,
>> +				    struct rpcrdma_req *, unsigned int);
>> 	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>> 	const char	*ro_displayname;
>> };
>> @@ -405,9 +407,6 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);  void
>> rpcrdma_recv_buffer_get(struct rpcrdma_req *);  void
>> rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
>> 
>> -int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
>> -				struct rpcrdma_xprt *);
>> -
>> struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
>> 					    size_t, gfp_t);
>> void rpcrdma_free_regbuf(struct rpcrdma_ia *,
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body
>> of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at
>> http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 08/16] xprtrdma: Add "reset MRs" memreg op
       [not found]         ` <EE7902D3F51F404C82415C4803930ACD5DC3A9E3-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
@ 2015-03-24 15:10           ` Chuck Lever
  0 siblings, 0 replies; 52+ messages in thread
From: Chuck Lever @ 2015-03-24 15:10 UTC (permalink / raw)
  To: Devesh Sharma; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA


On Mar 24, 2015, at 7:27 AM, Devesh Sharma <Devesh.Sharma-iH1Dq9VlAzfQT0dZR+AlfA@public.gmane.org> wrote:

>> -----Original Message-----
>> From: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org [mailto:linux-rdma-
>> owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Chuck Lever
>> Sent: Saturday, March 14, 2015 2:58 AM
>> To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> Subject: [PATCH v1 08/16] xprtrdma: Add "reset MRs" memreg op
>> 
>> This method is invoked when a transport instance is about to be reconnected.
>> Each Memory Region object is reset to its initial state.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> ---
>> net/sunrpc/xprtrdma/fmr_ops.c      |   23 ++++++++
>> net/sunrpc/xprtrdma/frwr_ops.c     |   46 ++++++++++++++++
>> net/sunrpc/xprtrdma/physical_ops.c |    6 ++
>> net/sunrpc/xprtrdma/verbs.c        |  103 +-----------------------------------
>> net/sunrpc/xprtrdma/xprt_rdma.h    |    1
>> 5 files changed, 78 insertions(+), 101 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
>> index 1501db0..1ccb3de 100644
>> --- a/net/sunrpc/xprtrdma/fmr_ops.c
>> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
>> @@ -156,10 +156,33 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct
>> rpcrdma_req *req,
>> 		i += __fmr_unmap(r_xprt, &req->rl_segments[i]);  }
>> 
>> +/* After a disconnect, unmap all FMRs.
>> + *
>> + * This is invoked only in the transport connect worker in order
>> + * to serialize with rpcrdma_register_fmr_external().
>> + */
>> +static void
>> +fmr_op_reset(struct rpcrdma_xprt *r_xprt) {
>> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
>> +	struct rpcrdma_mw *r;
>> +	LIST_HEAD(list);
>> +	int rc;
>> +
>> +	list_for_each_entry(r, &buf->rb_all, mw_all)
>> +		list_add(&r->r.fmr->list, &list);
>> +
>> +	rc = ib_unmap_fmr(&list);
>> +	if (rc)
>> +		dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
>> +			__func__, rc);
>> +}
>> +
>> const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>> 	.ro_map				= fmr_op_map,
>> 	.ro_unmap			= fmr_op_unmap,
>> 	.ro_maxpages			= fmr_op_maxpages,
>> 	.ro_init			= fmr_op_init,
>> +	.ro_reset			= fmr_op_reset,
>> 	.ro_displayname			= "fmr",
>> };
>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
>> index 975372c..b4ce0e5 100644
>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>> @@ -81,6 +81,18 @@ out_err:
>> 	return nsegs;
>> }
>> 
>> +static void
>> +__frwr_release(struct rpcrdma_mw *r)
>> +{
>> +	int rc;
>> +
>> +	rc = ib_dereg_mr(r->r.frmr.fr_mr);
>> +	if (rc)
>> +		dprintk("RPC:       %s: ib_dereg_mr status %i\n",
>> +			__func__, rc);
>> +	ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
>> +}
>> +
>> /* FRWR mode conveys a list of pages per chunk segment. The
>>  * maximum length of that list is the FRWR page list depth.
>>  */
>> @@ -226,10 +238,44 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct
>> rpcrdma_req *req,
>> 		i += __frwr_unmap(r_xprt, &req->rl_segments[i]);  }
>> 
>> +/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
>> + * an unusable state. Find FRMRs in this state and dereg / reg
>> + * each.  FRMRs that are VALID and attached to an rpcrdma_req are
>> + * also torn down.
>> + *
>> + * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
>> + *
>> + * This is invoked only in the transport connect worker in order
>> + * to serialize with rpcrdma_register_frmr_external().
>> + */
>> +static void
>> +frwr_op_reset(struct rpcrdma_xprt *r_xprt) {
>> +	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
>> +	struct ib_device *device = r_xprt->rx_ia.ri_id->device;
>> +	unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
>> +	struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
>> +	struct rpcrdma_mw *r;
>> +	int rc;
>> +
>> +	list_for_each_entry(r, &buf->rb_all, mw_all) {
>> +		if (r->r.frmr.fr_state == FRMR_IS_INVALID)
>> +			continue;
>> +
>> +		__frwr_release(r);
>> +		rc = __frwr_init(r, pd, device, depth);
>> +		if (rc)
>> +			continue;
> 
> Should we print something here e.g. "failed to allocate frmr, mount will work with less number of frmr, performance hit is expected”?

I can’t remember why we skip the FRWR in this case. I think the
transport will eventually recover it (if needed, by a second
reconnect).

>> +
>> +		r->r.frmr.fr_state = FRMR_IS_INVALID;
>> +	}
>> +}
>> +
>> const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>> 	.ro_map				= frwr_op_map,
>> 	.ro_unmap			= frwr_op_unmap,
>> 	.ro_maxpages			= frwr_op_maxpages,
>> 	.ro_init			= frwr_op_init,
>> +	.ro_reset			= frwr_op_reset,
>> 	.ro_displayname			= "frwr",
>> };
>> diff --git a/net/sunrpc/xprtrdma/physical_ops.c
>> b/net/sunrpc/xprtrdma/physical_ops.c
>> index ae2b0bc..0afc691 100644
>> --- a/net/sunrpc/xprtrdma/physical_ops.c
>> +++ b/net/sunrpc/xprtrdma/physical_ops.c
>> @@ -62,10 +62,16 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct
>> rpcrdma_req *req,
>> 		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]);  }
>> 
>> +static void
>> +physical_op_reset(struct rpcrdma_xprt *r_xprt) { }
>> +
>> const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>> 	.ro_map				= physical_op_map,
>> 	.ro_unmap			= physical_op_unmap,
>> 	.ro_maxpages			= physical_op_maxpages,
>> 	.ro_init			= physical_op_init,
>> +	.ro_reset			= physical_op_reset,
>> 	.ro_displayname			= "physical",
>> };
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index
>> d7810d6..e17d91a 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -63,9 +63,6 @@
>> # define RPCDBG_FACILITY	RPCDBG_TRANS
>> #endif
>> 
>> -static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); -static void
>> rpcrdma_reset_fmrs(struct rpcrdma_ia *);
>> -
>> /*
>>  * internal functions
>>  */
>> @@ -944,21 +941,9 @@ retry:
>> 		rpcrdma_ep_disconnect(ep, ia);
>> 		rpcrdma_flush_cqs(ep);
>> 
>> -		switch (ia->ri_memreg_strategy) {
>> -		case RPCRDMA_FRMR:
>> -			rpcrdma_reset_frmrs(ia);
>> -			break;
>> -		case RPCRDMA_MTHCAFMR:
>> -			rpcrdma_reset_fmrs(ia);
>> -			break;
>> -		case RPCRDMA_ALLPHYSICAL:
>> -			break;
>> -		default:
>> -			rc = -EIO;
>> -			goto out;
>> -		}
>> -
>> 		xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
>> +		ia->ri_ops->ro_reset(xprt);
>> +
>> 		id = rpcrdma_create_id(xprt, ia,
>> 				(struct sockaddr *)&xprt->rx_data.addr);
>> 		if (IS_ERR(id)) {
>> @@ -1288,90 +1273,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer
>> *buf)
>> 	kfree(buf->rb_pool);
>> }
>> 
>> -/* After a disconnect, unmap all FMRs.
>> - *
>> - * This is invoked only in the transport connect worker in order
>> - * to serialize with rpcrdma_register_fmr_external().
>> - */
>> -static void
>> -rpcrdma_reset_fmrs(struct rpcrdma_ia *ia) -{
>> -	struct rpcrdma_xprt *r_xprt =
>> -				container_of(ia, struct rpcrdma_xprt, rx_ia);
>> -	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
>> -	struct list_head *pos;
>> -	struct rpcrdma_mw *r;
>> -	LIST_HEAD(l);
>> -	int rc;
>> -
>> -	list_for_each(pos, &buf->rb_all) {
>> -		r = list_entry(pos, struct rpcrdma_mw, mw_all);
>> -
>> -		INIT_LIST_HEAD(&l);
>> -		list_add(&r->r.fmr->list, &l);
>> -		rc = ib_unmap_fmr(&l);
>> -		if (rc)
>> -			dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
>> -				__func__, rc);
>> -	}
>> -}
>> -
>> -/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
>> - * an unusable state. Find FRMRs in this state and dereg / reg
>> - * each.  FRMRs that are VALID and attached to an rpcrdma_req are
>> - * also torn down.
>> - *
>> - * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
>> - *
>> - * This is invoked only in the transport connect worker in order
>> - * to serialize with rpcrdma_register_frmr_external().
>> - */
>> -static void
>> -rpcrdma_reset_frmrs(struct rpcrdma_ia *ia) -{
>> -	struct rpcrdma_xprt *r_xprt =
>> -				container_of(ia, struct rpcrdma_xprt, rx_ia);
>> -	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
>> -	struct list_head *pos;
>> -	struct rpcrdma_mw *r;
>> -	int rc;
>> -
>> -	list_for_each(pos, &buf->rb_all) {
>> -		r = list_entry(pos, struct rpcrdma_mw, mw_all);
>> -
>> -		if (r->r.frmr.fr_state == FRMR_IS_INVALID)
>> -			continue;
>> -
>> -		rc = ib_dereg_mr(r->r.frmr.fr_mr);
>> -		if (rc)
>> -			dprintk("RPC:       %s: ib_dereg_mr failed %i\n",
>> -				__func__, rc);
>> -		ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
>> -
>> -		r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
>> -					ia->ri_max_frmr_depth);
>> -		if (IS_ERR(r->r.frmr.fr_mr)) {
>> -			rc = PTR_ERR(r->r.frmr.fr_mr);
>> -			dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
>> -				" failed %i\n", __func__, rc);
>> -			continue;
>> -		}
>> -		r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
>> -					ia->ri_id->device,
>> -					ia->ri_max_frmr_depth);
>> -		if (IS_ERR(r->r.frmr.fr_pgl)) {
>> -			rc = PTR_ERR(r->r.frmr.fr_pgl);
>> -			dprintk("RPC:       %s: "
>> -				"ib_alloc_fast_reg_page_list "
>> -				"failed %i\n", __func__, rc);
>> -
>> -			ib_dereg_mr(r->r.frmr.fr_mr);
>> -			continue;
>> -		}
>> -		r->r.frmr.fr_state = FRMR_IS_INVALID;
>> -	}
>> -}
>> -
>> /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
>>  * some req segments uninitialized.
>>  */
>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h
>> b/net/sunrpc/xprtrdma/xprt_rdma.h index 4fe3c38..cdf6763 100644
>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>> @@ -342,6 +342,7 @@ struct rpcrdma_memreg_ops {
>> 				    struct rpcrdma_req *, unsigned int);
>> 	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>> 	int		(*ro_init)(struct rpcrdma_xprt *);
>> +	void		(*ro_reset)(struct rpcrdma_xprt *);
>> 	const char	*ro_displayname;
>> };
>> 
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body
>> of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at
>> http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 10/16] xprtrdma: Add "open" memreg op
       [not found]         ` <EE7902D3F51F404C82415C4803930ACD5DC3A9FF-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
@ 2015-03-24 15:29           ` Chuck Lever
  0 siblings, 0 replies; 52+ messages in thread
From: Chuck Lever @ 2015-03-24 15:29 UTC (permalink / raw)
  To: Devesh Sharma; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA


On Mar 24, 2015, at 7:34 AM, Devesh Sharma <Devesh.Sharma-iH1Dq9VlAzfQT0dZR+AlfA@public.gmane.org> wrote:

>> -----Original Message-----
>> From: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org [mailto:linux-rdma-
>> owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Chuck Lever
>> Sent: Saturday, March 14, 2015 2:58 AM
>> To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> Subject: [PATCH v1 10/16] xprtrdma: Add "open" memreg op
>> 
>> The open op determines the size of various transport data structures based on
>> device capabilities and memory registration mode.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> ---
>> net/sunrpc/xprtrdma/fmr_ops.c      |   22 +++++++++++++
>> net/sunrpc/xprtrdma/frwr_ops.c     |   60
>> ++++++++++++++++++++++++++++++++++++
>> net/sunrpc/xprtrdma/physical_ops.c |   22 +++++++++++++
>> net/sunrpc/xprtrdma/verbs.c        |   54 ++------------------------------
>> net/sunrpc/xprtrdma/xprt_rdma.h    |    3 ++
>> 5 files changed, 110 insertions(+), 51 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
>> index 3115e4b..96e6cd3 100644
>> --- a/net/sunrpc/xprtrdma/fmr_ops.c
>> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
>> @@ -46,6 +46,27 @@ out_err:
>> 	return nsegs;
>> }
>> 
>> +static int
>> +fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
>> +	    struct rpcrdma_create_data_internal *cdata) {
>> +	struct ib_device_attr *devattr = &ia->ri_devattr;
>> +	unsigned int wrs, max_wrs;
>> +
>> +	max_wrs = devattr->max_qp_wr;
>> +	if (cdata->max_requests > max_wrs)
>> +		cdata->max_requests = max_wrs;
>> +
>> +	wrs = cdata->max_requests;
>> +	ep->rep_attr.cap.max_send_wr = wrs;
>> +	ep->rep_attr.cap.max_recv_wr = wrs;
>> +
>> +	dprintk("RPC:       %s: pre-allocating %u send WRs, %u recv WRs\n",
>> +		__func__, ep->rep_attr.cap.max_send_wr,
>> +		ep->rep_attr.cap.max_recv_wr);
>> +	return 0;
>> +}
>> +
>> /* FMR mode conveys up to 64 pages of payload per chunk segment.
>>  */
>> static size_t
>> @@ -201,6 +222,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)  const
>> struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>> 	.ro_map				= fmr_op_map,
>> 	.ro_unmap			= fmr_op_unmap,
>> +	.ro_open			= fmr_op_open,
>> 	.ro_maxpages			= fmr_op_maxpages,
>> 	.ro_init			= fmr_op_init,
>> 	.ro_reset			= fmr_op_reset,
>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
>> index fc3a228..9bb4b2d 100644
>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>> @@ -93,6 +93,65 @@ __frwr_release(struct rpcrdma_mw *r)
>> 	ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
>> }
>> 
>> +static int
>> +frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
>> +	     struct rpcrdma_create_data_internal *cdata) {
>> +	struct ib_device_attr *devattr = &ia->ri_devattr;
>> +	unsigned int wrs, max_wrs;
>> +	int depth = 7;
>> +
>> +	max_wrs = devattr->max_qp_wr;
>> +	if (cdata->max_requests > max_wrs)
>> +		cdata->max_requests = max_wrs;
>> +
>> +	wrs = cdata->max_requests;
>> +	ep->rep_attr.cap.max_send_wr = wrs;
>> +	ep->rep_attr.cap.max_recv_wr = wrs;
>> +
>> +	ia->ri_max_frmr_depth =
>> +			min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
>> +			      devattr->max_fast_reg_page_list_len);
>> +	dprintk("RPC:       %s: device's max FR page list len = %u\n",
>> +		__func__, ia->ri_max_frmr_depth);
>> +
>> +	/* Add room for frmr register and invalidate WRs.
>> +	 * 1. FRMR reg WR for head
>> +	 * 2. FRMR invalidate WR for head
>> +	 * 3. N FRMR reg WRs for pagelist
>> +	 * 4. N FRMR invalidate WRs for pagelist
>> +	 * 5. FRMR reg WR for tail
>> +	 * 6. FRMR invalidate WR for tail
>> +	 * 7. The RDMA_SEND WR
>> +	 */
>> +
>> +	/* Calculate N if the device max FRMR depth is smaller than
>> +	 * RPCRDMA_MAX_DATA_SEGS.
>> +	 */
>> +	if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
>> +		int delta = RPCRDMA_MAX_DATA_SEGS - ia-
>>> ri_max_frmr_depth;
>> +
>> +		do {
>> +			depth += 2; /* FRMR reg + invalidate */
>> +			delta -= ia->ri_max_frmr_depth;
>> +		} while (delta > 0);
> 
> Please add a check on ia->ri_max_frmr_depth to check it non-zero. A bug in provider (if it reports max_frmr_depth = 0 in query_device), would form an infinite loop here and mount will be stuck.

I’ll include a patch to address this in the next version of this series.

> 
>> +	}
>> +
>> +	ep->rep_attr.cap.max_send_wr *= depth;
>> +	if (ep->rep_attr.cap.max_send_wr > max_wrs) {
>> +		cdata->max_requests = max_wrs / depth;
>> +		if (!cdata->max_requests)
>> +			return -EINVAL;
>> +		ep->rep_attr.cap.max_send_wr = cdata->max_requests *
>> +					       depth;
>> +	}
>> +
>> +	dprintk("RPC:       %s: pre-allocating %u send WRs, %u recv WRs\n",
>> +		__func__, ep->rep_attr.cap.max_send_wr,
>> +		ep->rep_attr.cap.max_recv_wr);
>> +	return 0;
>> +}
>> +
>> /* FRWR mode conveys a list of pages per chunk segment. The
>>  * maximum length of that list is the FRWR page list depth.
>>  */
>> @@ -290,6 +349,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf)  const
>> struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>> 	.ro_map				= frwr_op_map,
>> 	.ro_unmap			= frwr_op_unmap,
>> +	.ro_open			= frwr_op_open,
>> 	.ro_maxpages			= frwr_op_maxpages,
>> 	.ro_init			= frwr_op_init,
>> 	.ro_reset			= frwr_op_reset,
>> diff --git a/net/sunrpc/xprtrdma/physical_ops.c
>> b/net/sunrpc/xprtrdma/physical_ops.c
>> index f8da8c4..0998f4f 100644
>> --- a/net/sunrpc/xprtrdma/physical_ops.c
>> +++ b/net/sunrpc/xprtrdma/physical_ops.c
>> @@ -19,6 +19,27 @@
>> # define RPCDBG_FACILITY	RPCDBG_TRANS
>> #endif
>> 
>> +static int
>> +physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
>> +		 struct rpcrdma_create_data_internal *cdata) {
>> +	struct ib_device_attr *devattr = &ia->ri_devattr;
>> +	unsigned int wrs, max_wrs;
>> +
>> +	max_wrs = devattr->max_qp_wr;
>> +	if (cdata->max_requests > max_wrs)
>> +		cdata->max_requests = max_wrs;
>> +
>> +	wrs = cdata->max_requests;
>> +	ep->rep_attr.cap.max_send_wr = wrs;
>> +	ep->rep_attr.cap.max_recv_wr = wrs;
>> +
>> +	dprintk("RPC:       %s: pre-allocating %u send WRs, %u recv WRs\n",
>> +		__func__, ep->rep_attr.cap.max_send_wr,
>> +		ep->rep_attr.cap.max_recv_wr);
>> +	return 0;
>> +}
>> +
>> /* PHYSICAL memory registration conveys one page per chunk segment.
>>  */
>> static size_t
>> @@ -75,6 +96,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf)  const
>> struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>> 	.ro_map				= physical_op_map,
>> 	.ro_unmap			= physical_op_unmap,
>> +	.ro_open			= physical_op_open,
>> 	.ro_maxpages			= physical_op_maxpages,
>> 	.ro_init			= physical_op_init,
>> 	.ro_reset			= physical_op_reset,
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index
>> dcbc736..17b2a29 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -621,11 +621,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct
>> sockaddr *addr, int memreg)
>> 			dprintk("RPC:       %s: FRMR registration "
>> 				"not supported by HCA\n", __func__);
>> 			memreg = RPCRDMA_MTHCAFMR;
>> -		} else {
>> -			/* Mind the ia limit on FRMR page list depth */
>> -			ia->ri_max_frmr_depth = min_t(unsigned int,
>> -				RPCRDMA_MAX_DATA_SEGS,
>> -				devattr->max_fast_reg_page_list_len);
>> 		}
>> 	}
>> 	if (memreg == RPCRDMA_MTHCAFMR) {
>> @@ -734,56 +729,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct
>> rpcrdma_ia *ia,
>> 	struct ib_cq *sendcq, *recvcq;
>> 	int rc, err;
>> 
>> -	/* check provider's send/recv wr limits */
>> -	if (cdata->max_requests > devattr->max_qp_wr)
>> -		cdata->max_requests = devattr->max_qp_wr;
>> +	rc = ia->ri_ops->ro_open(ia, ep, cdata);
>> +	if (rc)
>> +		return rc;
>> 
>> 	ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
>> 	ep->rep_attr.qp_context = ep;
>> -	/* send_cq and recv_cq initialized below */
>> 	ep->rep_attr.srq = NULL;
>> -	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
>> -	switch (ia->ri_memreg_strategy) {
>> -	case RPCRDMA_FRMR: {
>> -		int depth = 7;
>> -
>> -		/* Add room for frmr register and invalidate WRs.
>> -		 * 1. FRMR reg WR for head
>> -		 * 2. FRMR invalidate WR for head
>> -		 * 3. N FRMR reg WRs for pagelist
>> -		 * 4. N FRMR invalidate WRs for pagelist
>> -		 * 5. FRMR reg WR for tail
>> -		 * 6. FRMR invalidate WR for tail
>> -		 * 7. The RDMA_SEND WR
>> -		 */
>> -
>> -		/* Calculate N if the device max FRMR depth is smaller than
>> -		 * RPCRDMA_MAX_DATA_SEGS.
>> -		 */
>> -		if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
>> -			int delta = RPCRDMA_MAX_DATA_SEGS -
>> -				    ia->ri_max_frmr_depth;
>> -
>> -			do {
>> -				depth += 2; /* FRMR reg + invalidate */
>> -				delta -= ia->ri_max_frmr_depth;
>> -			} while (delta > 0);
>> -
>> -		}
>> -		ep->rep_attr.cap.max_send_wr *= depth;
>> -		if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
>> -			cdata->max_requests = devattr->max_qp_wr / depth;
>> -			if (!cdata->max_requests)
>> -				return -EINVAL;
>> -			ep->rep_attr.cap.max_send_wr = cdata-
>>> max_requests *
>> -						       depth;
>> -		}
>> -		break;
>> -	}
>> -	default:
>> -		break;
>> -	}
>> -	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
>> 	ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
>> 	ep->rep_attr.cap.max_recv_sge = 1;
>> 	ep->rep_attr.cap.max_inline_data = 0;
>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h
>> b/net/sunrpc/xprtrdma/xprt_rdma.h index a0e3c3e..a53a564 100644
>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>> @@ -340,6 +340,9 @@ struct rpcrdma_memreg_ops {
>> 				  struct rpcrdma_mr_seg *, int, bool);
>> 	void		(*ro_unmap)(struct rpcrdma_xprt *,
>> 				    struct rpcrdma_req *, unsigned int);
>> +	int		(*ro_open)(struct rpcrdma_ia *,
>> +				   struct rpcrdma_ep *,
>> +				   struct rpcrdma_create_data_internal *);
>> 	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>> 	int		(*ro_init)(struct rpcrdma_xprt *);
>> 	void		(*ro_reset)(struct rpcrdma_xprt *);
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body
>> of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at
>> http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 06/16] xprtrdma: Add a "deregister_external" op for each memreg mode
  2015-03-17 14:37   ` Anna Schumaker
@ 2015-03-17 15:04     ` Chuck Lever
  0 siblings, 0 replies; 52+ messages in thread
From: Chuck Lever @ 2015-03-17 15:04 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: Linux NFS Mailing List


On Mar 17, 2015, at 7:37 AM, Anna Schumaker <Anna.Schumaker@netapp.com> wrote:

> On 03/13/2015 05:22 PM, Chuck Lever wrote:
>> There is very little common processing among the different external
>> memory deregistration functions.
>> 
>> In addition, instead of calling the deregistration function for each
>> segment, have one call release all segments for a request. This makes
>> the API a little asymmetrical, but a hair faster.
> 
> The common processing would be the for-each loop that you moved into the ro_unmap functions.  I'm not completely sold on this... how often do unmaps happen?

Once for every RPC.

> Anna
> 
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> net/sunrpc/xprtrdma/fmr_ops.c      |   37 ++++++++++++++++
>> net/sunrpc/xprtrdma/frwr_ops.c     |   46 ++++++++++++++++++++
>> net/sunrpc/xprtrdma/physical_ops.c |   13 ++++++
>> net/sunrpc/xprtrdma/rpc_rdma.c     |    7 +--
>> net/sunrpc/xprtrdma/transport.c    |    8 +---
>> net/sunrpc/xprtrdma/verbs.c        |   81 ------------------------------------
>> net/sunrpc/xprtrdma/xprt_rdma.h    |    5 +-
>> 7 files changed, 103 insertions(+), 94 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
>> index 45fb646..9b983b4 100644
>> --- a/net/sunrpc/xprtrdma/fmr_ops.c
>> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
>> @@ -20,6 +20,32 @@
>> /* Maximum scatter/gather per FMR */
>> #define RPCRDMA_MAX_FMR_SGES	(64)
>> 
>> +/* Use the ib_unmap_fmr() verb to prevent further remote
>> + * access via RDMA READ or RDMA WRITE.
>> + */
>> +static int
>> +__fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>> +{
>> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> +	struct rpcrdma_mr_seg *seg1 = seg;
>> +	int rc, nsegs = seg->mr_nsegs;
>> +	LIST_HEAD(l);
>> +
>> +	list_add(&seg1->rl_mw->r.fmr->list, &l);
>> +	rc = ib_unmap_fmr(&l);
>> +	read_lock(&ia->ri_qplock);
>> +	while (seg1->mr_nsegs--)
>> +		rpcrdma_unmap_one(ia, seg++);
>> +	read_unlock(&ia->ri_qplock);
>> +	if (rc)
>> +		goto out_err;
>> +	return nsegs;
>> +
>> +out_err:
>> +	dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
>> +	return nsegs;
>> +}
>> +
>> /* FMR mode conveys up to 64 pages of payload per chunk segment.
>>  */
>> static size_t
>> @@ -79,8 +105,19 @@ out_maperr:
>> 	return rc;
>> }
>> 
>> +static void
>> +fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
>> +	     unsigned int count)
>> +{
>> +	unsigned int i;
>> +
>> +	for (i = 0; count--;)
>> +		i += __fmr_unmap(r_xprt, &req->rl_segments[i]);
>> +}
>> +
>> const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>> 	.ro_map				= fmr_op_map,
>> +	.ro_unmap			= fmr_op_unmap,
>> 	.ro_maxpages			= fmr_op_maxpages,
>> 	.ro_displayname			= "fmr",
>> };
>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
>> index 2b5ccb0..05b5761 100644
>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>> @@ -17,6 +17,41 @@
>> # define RPCDBG_FACILITY	RPCDBG_TRANS
>> #endif
>> 
>> +/* Post a LOCAL_INV Work Request to prevent further remote access
>> + * via RDMA READ or RDMA WRITE.
>> + */
>> +static int
>> +__frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
>> +{
>> +	struct rpcrdma_mr_seg *seg1 = seg;
>> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> +	struct ib_send_wr invalidate_wr, *bad_wr;
>> +	int rc, nsegs = seg->mr_nsegs;
>> +
>> +	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
>> +
>> +	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
>> +	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
>> +	invalidate_wr.opcode = IB_WR_LOCAL_INV;
>> +	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
>> +	DECR_CQCOUNT(&r_xprt->rx_ep);
>> +
>> +	read_lock(&ia->ri_qplock);
>> +	while (seg1->mr_nsegs--)
>> +		rpcrdma_unmap_one(ia, seg++);
>> +	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
>> +	read_unlock(&ia->ri_qplock);
>> +	if (rc)
>> +		goto out_err;
>> +	return nsegs;
>> +
>> +out_err:
>> +	/* Force rpcrdma_buffer_get() to retry */
>> +	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
>> +	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
>> +	return nsegs;
>> +}
>> +
>> /* FRWR mode conveys a list of pages per chunk segment. The
>>  * maximum length of that list is the FRWR page list depth.
>>  */
>> @@ -116,8 +151,19 @@ out_err:
>> 	return rc;
>> }
>> 
>> +static void
>> +frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
>> +	      unsigned int count)
>> +{
>> +	unsigned int i;
>> +
>> +	for (i = 0; count--;)
>> +		i += __frwr_unmap(r_xprt, &req->rl_segments[i]);
>> +}
>> +
>> const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>> 	.ro_map				= frwr_op_map,
>> +	.ro_unmap			= frwr_op_unmap,
>> 	.ro_maxpages			= frwr_op_maxpages,
>> 	.ro_displayname			= "frwr",
>> };
>> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
>> index 5a284ee..f2c15be 100644
>> --- a/net/sunrpc/xprtrdma/physical_ops.c
>> +++ b/net/sunrpc/xprtrdma/physical_ops.c
>> @@ -44,8 +44,21 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>> 	return 1;
>> }
>> 
>> +/* Unmap a memory region, but leave it registered.
>> + */
>> +static void
>> +physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
>> +		  unsigned int count)
>> +{
>> +	unsigned int i;
>> +
>> +	for (i = 0; i < count; i++)
>> +		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]);
>> +}
>> +
>> const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>> 	.ro_map				= physical_op_map,
>> +	.ro_unmap			= physical_op_unmap,
>> 	.ro_maxpages			= physical_op_maxpages,
>> 	.ro_displayname			= "physical",
>> };
>> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
>> index 6ab1d03..7b51d9d 100644
>> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
>> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
>> @@ -284,11 +284,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
>> 	return (unsigned char *)iptr - (unsigned char *)headerp;
>> 
>> out:
>> -	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
>> -		for (pos = 0; nchunks--;)
>> -			pos += rpcrdma_deregister_external(
>> -					&req->rl_segments[pos], r_xprt);
>> -	}
>> +	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR)
>> +		r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, nchunks);
>> 	return n;
>> }
>> 
>> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
>> index 9a9da40..c484671 100644
>> --- a/net/sunrpc/xprtrdma/transport.c
>> +++ b/net/sunrpc/xprtrdma/transport.c
>> @@ -571,7 +571,6 @@ xprt_rdma_free(void *buffer)
>> 	struct rpcrdma_req *req;
>> 	struct rpcrdma_xprt *r_xprt;
>> 	struct rpcrdma_regbuf *rb;
>> -	int i;
>> 
>> 	if (buffer == NULL)
>> 		return;
>> @@ -582,11 +581,8 @@ xprt_rdma_free(void *buffer)
>> 
>> 	dprintk("RPC:       %s: called on 0x%p\n", __func__, req->rl_reply);
>> 
>> -	for (i = 0; req->rl_nchunks;) {
>> -		--req->rl_nchunks;
>> -		i += rpcrdma_deregister_external(
>> -			&req->rl_segments[i], r_xprt);
>> -	}
>> +	r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, req->rl_nchunks);
>> +	req->rl_nchunks = 0;
>> 
>> 	rpcrdma_buffer_put(req);
>> }
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>> index 851ed97..a1621fd 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -1509,7 +1509,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
>> 	}
>> }
>> 
>> -/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
>> +/* rpcrdma_unmap_one() was already done during deregistration.
>>  * Redo only the ib_post_send().
>>  */
>> static void
>> @@ -1889,85 +1889,6 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
>> 				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
>> }
>> 
>> -static int
>> -rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
>> -			struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
>> -{
>> -	struct rpcrdma_mr_seg *seg1 = seg;
>> -	struct ib_send_wr invalidate_wr, *bad_wr;
>> -	int rc;
>> -
>> -	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
>> -
>> -	memset(&invalidate_wr, 0, sizeof invalidate_wr);
>> -	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
>> -	invalidate_wr.opcode = IB_WR_LOCAL_INV;
>> -	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
>> -	DECR_CQCOUNT(&r_xprt->rx_ep);
>> -
>> -	read_lock(&ia->ri_qplock);
>> -	while (seg1->mr_nsegs--)
>> -		rpcrdma_unmap_one(ia, seg++);
>> -	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
>> -	read_unlock(&ia->ri_qplock);
>> -	if (rc) {
>> -		/* Force rpcrdma_buffer_get() to retry */
>> -		seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
>> -		dprintk("RPC:       %s: failed ib_post_send for invalidate,"
>> -			" status %i\n", __func__, rc);
>> -	}
>> -	return rc;
>> -}
>> -
>> -static int
>> -rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
>> -			struct rpcrdma_ia *ia)
>> -{
>> -	struct rpcrdma_mr_seg *seg1 = seg;
>> -	LIST_HEAD(l);
>> -	int rc;
>> -
>> -	list_add(&seg1->rl_mw->r.fmr->list, &l);
>> -	rc = ib_unmap_fmr(&l);
>> -	read_lock(&ia->ri_qplock);
>> -	while (seg1->mr_nsegs--)
>> -		rpcrdma_unmap_one(ia, seg++);
>> -	read_unlock(&ia->ri_qplock);
>> -	if (rc)
>> -		dprintk("RPC:       %s: failed ib_unmap_fmr,"
>> -			" status %i\n", __func__, rc);
>> -	return rc;
>> -}
>> -
>> -int
>> -rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
>> -		struct rpcrdma_xprt *r_xprt)
>> -{
>> -	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> -	int nsegs = seg->mr_nsegs, rc;
>> -
>> -	switch (ia->ri_memreg_strategy) {
>> -
>> -	case RPCRDMA_ALLPHYSICAL:
>> -		read_lock(&ia->ri_qplock);
>> -		rpcrdma_unmap_one(ia, seg);
>> -		read_unlock(&ia->ri_qplock);
>> -		break;
>> -
>> -	case RPCRDMA_FRMR:
>> -		rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
>> -		break;
>> -
>> -	case RPCRDMA_MTHCAFMR:
>> -		rc = rpcrdma_deregister_fmr_external(seg, ia);
>> -		break;
>> -
>> -	default:
>> -		break;
>> -	}
>> -	return nsegs;
>> -}
>> -
>> /*
>>  * Prepost any receive buffer, then post send.
>>  *
>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
>> index 7bf077b..3aabbb2 100644
>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>> @@ -338,6 +338,8 @@ struct rpcrdma_xprt;
>> struct rpcrdma_memreg_ops {
>> 	int		(*ro_map)(struct rpcrdma_xprt *,
>> 				  struct rpcrdma_mr_seg *, int, bool);
>> +	void		(*ro_unmap)(struct rpcrdma_xprt *,
>> +				    struct rpcrdma_req *, unsigned int);
>> 	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>> 	const char	*ro_displayname;
>> };
>> @@ -405,9 +407,6 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);
>> void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
>> void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
>> 
>> -int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
>> -				struct rpcrdma_xprt *);
>> -
>> struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
>> 					    size_t, gfp_t);
>> void rpcrdma_free_regbuf(struct rpcrdma_ia *,
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com




^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH v1 06/16] xprtrdma: Add a "deregister_external" op for each memreg mode
  2015-03-13 21:22 ` [PATCH v1 06/16] xprtrdma: Add a "deregister_external" op for each memreg mode Chuck Lever
@ 2015-03-17 14:37   ` Anna Schumaker
  2015-03-17 15:04     ` Chuck Lever
  0 siblings, 1 reply; 52+ messages in thread
From: Anna Schumaker @ 2015-03-17 14:37 UTC (permalink / raw)
  To: Chuck Lever, linux-nfs

On 03/13/2015 05:22 PM, Chuck Lever wrote:
> There is very little common processing among the different external
> memory deregistration functions.
> 
> In addition, instead of calling the deregistration function for each
> segment, have one call release all segments for a request. This makes
> the API a little asymmetrical, but a hair faster.

The common processing would be the for-each loop that you moved into the ro_unmap functions.  I'm not completely sold on this... how often do unmaps happen?

Anna

> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  net/sunrpc/xprtrdma/fmr_ops.c      |   37 ++++++++++++++++
>  net/sunrpc/xprtrdma/frwr_ops.c     |   46 ++++++++++++++++++++
>  net/sunrpc/xprtrdma/physical_ops.c |   13 ++++++
>  net/sunrpc/xprtrdma/rpc_rdma.c     |    7 +--
>  net/sunrpc/xprtrdma/transport.c    |    8 +---
>  net/sunrpc/xprtrdma/verbs.c        |   81 ------------------------------------
>  net/sunrpc/xprtrdma/xprt_rdma.h    |    5 +-
>  7 files changed, 103 insertions(+), 94 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index 45fb646..9b983b4 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -20,6 +20,32 @@
>  /* Maximum scatter/gather per FMR */
>  #define RPCRDMA_MAX_FMR_SGES	(64)
>  
> +/* Use the ib_unmap_fmr() verb to prevent further remote
> + * access via RDMA READ or RDMA WRITE.
> + */
> +static int
> +__fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
> +{
> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> +	struct rpcrdma_mr_seg *seg1 = seg;
> +	int rc, nsegs = seg->mr_nsegs;
> +	LIST_HEAD(l);
> +
> +	list_add(&seg1->rl_mw->r.fmr->list, &l);
> +	rc = ib_unmap_fmr(&l);
> +	read_lock(&ia->ri_qplock);
> +	while (seg1->mr_nsegs--)
> +		rpcrdma_unmap_one(ia, seg++);
> +	read_unlock(&ia->ri_qplock);
> +	if (rc)
> +		goto out_err;
> +	return nsegs;
> +
> +out_err:
> +	dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
> +	return nsegs;
> +}
> +
>  /* FMR mode conveys up to 64 pages of payload per chunk segment.
>   */
>  static size_t
> @@ -79,8 +105,19 @@ out_maperr:
>  	return rc;
>  }
>  
> +static void
> +fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> +	     unsigned int count)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; count--;)
> +		i += __fmr_unmap(r_xprt, &req->rl_segments[i]);
> +}
> +
>  const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>  	.ro_map				= fmr_op_map,
> +	.ro_unmap			= fmr_op_unmap,
>  	.ro_maxpages			= fmr_op_maxpages,
>  	.ro_displayname			= "fmr",
>  };
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 2b5ccb0..05b5761 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -17,6 +17,41 @@
>  # define RPCDBG_FACILITY	RPCDBG_TRANS
>  #endif
>  
> +/* Post a LOCAL_INV Work Request to prevent further remote access
> + * via RDMA READ or RDMA WRITE.
> + */
> +static int
> +__frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
> +{
> +	struct rpcrdma_mr_seg *seg1 = seg;
> +	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> +	struct ib_send_wr invalidate_wr, *bad_wr;
> +	int rc, nsegs = seg->mr_nsegs;
> +
> +	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
> +
> +	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
> +	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
> +	invalidate_wr.opcode = IB_WR_LOCAL_INV;
> +	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
> +	DECR_CQCOUNT(&r_xprt->rx_ep);
> +
> +	read_lock(&ia->ri_qplock);
> +	while (seg1->mr_nsegs--)
> +		rpcrdma_unmap_one(ia, seg++);
> +	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
> +	read_unlock(&ia->ri_qplock);
> +	if (rc)
> +		goto out_err;
> +	return nsegs;
> +
> +out_err:
> +	/* Force rpcrdma_buffer_get() to retry */
> +	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
> +	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
> +	return nsegs;
> +}
> +
>  /* FRWR mode conveys a list of pages per chunk segment. The
>   * maximum length of that list is the FRWR page list depth.
>   */
> @@ -116,8 +151,19 @@ out_err:
>  	return rc;
>  }
>  
> +static void
> +frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> +	      unsigned int count)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; count--;)
> +		i += __frwr_unmap(r_xprt, &req->rl_segments[i]);
> +}
> +
>  const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>  	.ro_map				= frwr_op_map,
> +	.ro_unmap			= frwr_op_unmap,
>  	.ro_maxpages			= frwr_op_maxpages,
>  	.ro_displayname			= "frwr",
>  };
> diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
> index 5a284ee..f2c15be 100644
> --- a/net/sunrpc/xprtrdma/physical_ops.c
> +++ b/net/sunrpc/xprtrdma/physical_ops.c
> @@ -44,8 +44,21 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
>  	return 1;
>  }
>  
> +/* Unmap a memory region, but leave it registered.
> + */
> +static void
> +physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
> +		  unsigned int count)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < count; i++)
> +		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]);
> +}
> +
>  const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>  	.ro_map				= physical_op_map,
> +	.ro_unmap			= physical_op_unmap,
>  	.ro_maxpages			= physical_op_maxpages,
>  	.ro_displayname			= "physical",
>  };
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index 6ab1d03..7b51d9d 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -284,11 +284,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
>  	return (unsigned char *)iptr - (unsigned char *)headerp;
>  
>  out:
> -	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
> -		for (pos = 0; nchunks--;)
> -			pos += rpcrdma_deregister_external(
> -					&req->rl_segments[pos], r_xprt);
> -	}
> +	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR)
> +		r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, nchunks);
>  	return n;
>  }
>  
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index 9a9da40..c484671 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -571,7 +571,6 @@ xprt_rdma_free(void *buffer)
>  	struct rpcrdma_req *req;
>  	struct rpcrdma_xprt *r_xprt;
>  	struct rpcrdma_regbuf *rb;
> -	int i;
>  
>  	if (buffer == NULL)
>  		return;
> @@ -582,11 +581,8 @@ xprt_rdma_free(void *buffer)
>  
>  	dprintk("RPC:       %s: called on 0x%p\n", __func__, req->rl_reply);
>  
> -	for (i = 0; req->rl_nchunks;) {
> -		--req->rl_nchunks;
> -		i += rpcrdma_deregister_external(
> -			&req->rl_segments[i], r_xprt);
> -	}
> +	r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, req->rl_nchunks);
> +	req->rl_nchunks = 0;
>  
>  	rpcrdma_buffer_put(req);
>  }
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 851ed97..a1621fd 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1509,7 +1509,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
>  	}
>  }
>  
> -/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
> +/* rpcrdma_unmap_one() was already done during deregistration.
>   * Redo only the ib_post_send().
>   */
>  static void
> @@ -1889,85 +1889,6 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
>  				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
>  }
>  
> -static int
> -rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
> -			struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
> -{
> -	struct rpcrdma_mr_seg *seg1 = seg;
> -	struct ib_send_wr invalidate_wr, *bad_wr;
> -	int rc;
> -
> -	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
> -
> -	memset(&invalidate_wr, 0, sizeof invalidate_wr);
> -	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
> -	invalidate_wr.opcode = IB_WR_LOCAL_INV;
> -	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
> -	DECR_CQCOUNT(&r_xprt->rx_ep);
> -
> -	read_lock(&ia->ri_qplock);
> -	while (seg1->mr_nsegs--)
> -		rpcrdma_unmap_one(ia, seg++);
> -	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
> -	read_unlock(&ia->ri_qplock);
> -	if (rc) {
> -		/* Force rpcrdma_buffer_get() to retry */
> -		seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
> -		dprintk("RPC:       %s: failed ib_post_send for invalidate,"
> -			" status %i\n", __func__, rc);
> -	}
> -	return rc;
> -}
> -
> -static int
> -rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
> -			struct rpcrdma_ia *ia)
> -{
> -	struct rpcrdma_mr_seg *seg1 = seg;
> -	LIST_HEAD(l);
> -	int rc;
> -
> -	list_add(&seg1->rl_mw->r.fmr->list, &l);
> -	rc = ib_unmap_fmr(&l);
> -	read_lock(&ia->ri_qplock);
> -	while (seg1->mr_nsegs--)
> -		rpcrdma_unmap_one(ia, seg++);
> -	read_unlock(&ia->ri_qplock);
> -	if (rc)
> -		dprintk("RPC:       %s: failed ib_unmap_fmr,"
> -			" status %i\n", __func__, rc);
> -	return rc;
> -}
> -
> -int
> -rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
> -		struct rpcrdma_xprt *r_xprt)
> -{
> -	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> -	int nsegs = seg->mr_nsegs, rc;
> -
> -	switch (ia->ri_memreg_strategy) {
> -
> -	case RPCRDMA_ALLPHYSICAL:
> -		read_lock(&ia->ri_qplock);
> -		rpcrdma_unmap_one(ia, seg);
> -		read_unlock(&ia->ri_qplock);
> -		break;
> -
> -	case RPCRDMA_FRMR:
> -		rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
> -		break;
> -
> -	case RPCRDMA_MTHCAFMR:
> -		rc = rpcrdma_deregister_fmr_external(seg, ia);
> -		break;
> -
> -	default:
> -		break;
> -	}
> -	return nsegs;
> -}
> -
>  /*
>   * Prepost any receive buffer, then post send.
>   *
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index 7bf077b..3aabbb2 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -338,6 +338,8 @@ struct rpcrdma_xprt;
>  struct rpcrdma_memreg_ops {
>  	int		(*ro_map)(struct rpcrdma_xprt *,
>  				  struct rpcrdma_mr_seg *, int, bool);
> +	void		(*ro_unmap)(struct rpcrdma_xprt *,
> +				    struct rpcrdma_req *, unsigned int);
>  	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
>  	const char	*ro_displayname;
>  };
> @@ -405,9 +407,6 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);
>  void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
>  void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
>  
> -int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
> -				struct rpcrdma_xprt *);
> -
>  struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
>  					    size_t, gfp_t);
>  void rpcrdma_free_regbuf(struct rpcrdma_ia *,
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 52+ messages in thread

* [PATCH v1 06/16] xprtrdma: Add a "deregister_external" op for each memreg mode
  2015-03-13 21:21 [PATCH v1 00/16] NFS/RDMA patches proposed for 4.1 Chuck Lever
@ 2015-03-13 21:22 ` Chuck Lever
  2015-03-17 14:37   ` Anna Schumaker
  0 siblings, 1 reply; 52+ messages in thread
From: Chuck Lever @ 2015-03-13 21:22 UTC (permalink / raw)
  To: linux-nfs

There is very little common processing among the different external
memory deregistration functions.

In addition, instead of calling the deregistration function for each
segment, have one call release all segments for a request. This makes
the API a little asymmetrical, but a hair faster.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/fmr_ops.c      |   37 ++++++++++++++++
 net/sunrpc/xprtrdma/frwr_ops.c     |   46 ++++++++++++++++++++
 net/sunrpc/xprtrdma/physical_ops.c |   13 ++++++
 net/sunrpc/xprtrdma/rpc_rdma.c     |    7 +--
 net/sunrpc/xprtrdma/transport.c    |    8 +---
 net/sunrpc/xprtrdma/verbs.c        |   81 ------------------------------------
 net/sunrpc/xprtrdma/xprt_rdma.h    |    5 +-
 7 files changed, 103 insertions(+), 94 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 45fb646..9b983b4 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -20,6 +20,32 @@
 /* Maximum scatter/gather per FMR */
 #define RPCRDMA_MAX_FMR_SGES	(64)
 
+/* Use the ib_unmap_fmr() verb to prevent further remote
+ * access via RDMA READ or RDMA WRITE.
+ */
+static int
+__fmr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
+{
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct rpcrdma_mr_seg *seg1 = seg;
+	int rc, nsegs = seg->mr_nsegs;
+	LIST_HEAD(l);
+
+	list_add(&seg1->rl_mw->r.fmr->list, &l);
+	rc = ib_unmap_fmr(&l);
+	read_lock(&ia->ri_qplock);
+	while (seg1->mr_nsegs--)
+		rpcrdma_unmap_one(ia, seg++);
+	read_unlock(&ia->ri_qplock);
+	if (rc)
+		goto out_err;
+	return nsegs;
+
+out_err:
+	dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
+	return nsegs;
+}
+
 /* FMR mode conveys up to 64 pages of payload per chunk segment.
  */
 static size_t
@@ -79,8 +105,19 @@ out_maperr:
 	return rc;
 }
 
+static void
+fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+	     unsigned int count)
+{
+	unsigned int i;
+
+	for (i = 0; count--;)
+		i += __fmr_unmap(r_xprt, &req->rl_segments[i]);
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
 	.ro_map				= fmr_op_map,
+	.ro_unmap			= fmr_op_unmap,
 	.ro_maxpages			= fmr_op_maxpages,
 	.ro_displayname			= "fmr",
 };
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 2b5ccb0..05b5761 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -17,6 +17,41 @@
 # define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
+/* Post a LOCAL_INV Work Request to prevent further remote access
+ * via RDMA READ or RDMA WRITE.
+ */
+static int
+__frwr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
+{
+	struct rpcrdma_mr_seg *seg1 = seg;
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct ib_send_wr invalidate_wr, *bad_wr;
+	int rc, nsegs = seg->mr_nsegs;
+
+	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
+
+	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
+	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
+	invalidate_wr.opcode = IB_WR_LOCAL_INV;
+	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
+	DECR_CQCOUNT(&r_xprt->rx_ep);
+
+	read_lock(&ia->ri_qplock);
+	while (seg1->mr_nsegs--)
+		rpcrdma_unmap_one(ia, seg++);
+	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
+	read_unlock(&ia->ri_qplock);
+	if (rc)
+		goto out_err;
+	return nsegs;
+
+out_err:
+	/* Force rpcrdma_buffer_get() to retry */
+	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
+	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
+	return nsegs;
+}
+
 /* FRWR mode conveys a list of pages per chunk segment. The
  * maximum length of that list is the FRWR page list depth.
  */
@@ -116,8 +151,19 @@ out_err:
 	return rc;
 }
 
+static void
+frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+	      unsigned int count)
+{
+	unsigned int i;
+
+	for (i = 0; count--;)
+		i += __frwr_unmap(r_xprt, &req->rl_segments[i]);
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
 	.ro_map				= frwr_op_map,
+	.ro_unmap			= frwr_op_unmap,
 	.ro_maxpages			= frwr_op_maxpages,
 	.ro_displayname			= "frwr",
 };
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index 5a284ee..f2c15be 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -44,8 +44,21 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	return 1;
 }
 
+/* Unmap a memory region, but leave it registered.
+ */
+static void
+physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+		  unsigned int count)
+{
+	unsigned int i;
+
+	for (i = 0; i < count; i++)
+		rpcrdma_unmap_one(&r_xprt->rx_ia, &req->rl_segments[i]);
+}
+
 const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
 	.ro_map				= physical_op_map,
+	.ro_unmap			= physical_op_unmap,
 	.ro_maxpages			= physical_op_maxpages,
 	.ro_displayname			= "physical",
 };
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 6ab1d03..7b51d9d 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -284,11 +284,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
 	return (unsigned char *)iptr - (unsigned char *)headerp;
 
 out:
-	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
-		for (pos = 0; nchunks--;)
-			pos += rpcrdma_deregister_external(
-					&req->rl_segments[pos], r_xprt);
-	}
+	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR)
+		r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, nchunks);
 	return n;
 }
 
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 9a9da40..c484671 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -571,7 +571,6 @@ xprt_rdma_free(void *buffer)
 	struct rpcrdma_req *req;
 	struct rpcrdma_xprt *r_xprt;
 	struct rpcrdma_regbuf *rb;
-	int i;
 
 	if (buffer == NULL)
 		return;
@@ -582,11 +581,8 @@ xprt_rdma_free(void *buffer)
 
 	dprintk("RPC:       %s: called on 0x%p\n", __func__, req->rl_reply);
 
-	for (i = 0; req->rl_nchunks;) {
-		--req->rl_nchunks;
-		i += rpcrdma_deregister_external(
-			&req->rl_segments[i], r_xprt);
-	}
+	r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, req, req->rl_nchunks);
+	req->rl_nchunks = 0;
 
 	rpcrdma_buffer_put(req);
 }
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 851ed97..a1621fd 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1509,7 +1509,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
 	}
 }
 
-/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
+/* rpcrdma_unmap_one() was already done during deregistration.
  * Redo only the ib_post_send().
  */
 static void
@@ -1889,85 +1889,6 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
 				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
 }
 
-static int
-rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
-			struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
-{
-	struct rpcrdma_mr_seg *seg1 = seg;
-	struct ib_send_wr invalidate_wr, *bad_wr;
-	int rc;
-
-	seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
-
-	memset(&invalidate_wr, 0, sizeof invalidate_wr);
-	invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
-	invalidate_wr.opcode = IB_WR_LOCAL_INV;
-	invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
-	DECR_CQCOUNT(&r_xprt->rx_ep);
-
-	read_lock(&ia->ri_qplock);
-	while (seg1->mr_nsegs--)
-		rpcrdma_unmap_one(ia, seg++);
-	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
-	read_unlock(&ia->ri_qplock);
-	if (rc) {
-		/* Force rpcrdma_buffer_get() to retry */
-		seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
-		dprintk("RPC:       %s: failed ib_post_send for invalidate,"
-			" status %i\n", __func__, rc);
-	}
-	return rc;
-}
-
-static int
-rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
-			struct rpcrdma_ia *ia)
-{
-	struct rpcrdma_mr_seg *seg1 = seg;
-	LIST_HEAD(l);
-	int rc;
-
-	list_add(&seg1->rl_mw->r.fmr->list, &l);
-	rc = ib_unmap_fmr(&l);
-	read_lock(&ia->ri_qplock);
-	while (seg1->mr_nsegs--)
-		rpcrdma_unmap_one(ia, seg++);
-	read_unlock(&ia->ri_qplock);
-	if (rc)
-		dprintk("RPC:       %s: failed ib_unmap_fmr,"
-			" status %i\n", __func__, rc);
-	return rc;
-}
-
-int
-rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
-		struct rpcrdma_xprt *r_xprt)
-{
-	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-	int nsegs = seg->mr_nsegs, rc;
-
-	switch (ia->ri_memreg_strategy) {
-
-	case RPCRDMA_ALLPHYSICAL:
-		read_lock(&ia->ri_qplock);
-		rpcrdma_unmap_one(ia, seg);
-		read_unlock(&ia->ri_qplock);
-		break;
-
-	case RPCRDMA_FRMR:
-		rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
-		break;
-
-	case RPCRDMA_MTHCAFMR:
-		rc = rpcrdma_deregister_fmr_external(seg, ia);
-		break;
-
-	default:
-		break;
-	}
-	return nsegs;
-}
-
 /*
  * Prepost any receive buffer, then post send.
  *
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 7bf077b..3aabbb2 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -338,6 +338,8 @@ struct rpcrdma_xprt;
 struct rpcrdma_memreg_ops {
 	int		(*ro_map)(struct rpcrdma_xprt *,
 				  struct rpcrdma_mr_seg *, int, bool);
+	void		(*ro_unmap)(struct rpcrdma_xprt *,
+				    struct rpcrdma_req *, unsigned int);
 	size_t		(*ro_maxpages)(struct rpcrdma_xprt *);
 	const char	*ro_displayname;
 };
@@ -405,9 +407,6 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);
 void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
 void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
 
-int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
-				struct rpcrdma_xprt *);
-
 struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
 					    size_t, gfp_t);
 void rpcrdma_free_regbuf(struct rpcrdma_ia *,


^ permalink raw reply related	[flat|nested] 52+ messages in thread

end of thread, other threads:[~2015-03-24 15:29 UTC | newest]

Thread overview: 52+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-13 21:26 [PATCH v1 00/16] NFS/RDMA patches proposed for 4.1 Chuck Lever
     [not found] ` <20150313212517.22783.18364.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-13 21:26   ` [PATCH v1 01/16] xprtrdma: Display IPv6 addresses and port numbers correctly Chuck Lever
     [not found]     ` <20150313212641.22783.93522.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-15  2:50       ` Sagi Grimberg
     [not found]         ` <5504F37A.20803-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-03-16 16:34           ` Chuck Lever
2015-03-24  8:27       ` Devesh Sharma
     [not found]         ` <EE7902D3F51F404C82415C4803930ACD5DC3A952-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
2015-03-24 14:43           ` Chuck Lever
2015-03-13 21:26   ` [PATCH v1 02/16] xprtrdma: Perform a full marshal on retransmit Chuck Lever
     [not found]     ` <20150313212650.22783.28071.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-15  3:01       ` Sagi Grimberg
2015-03-13 21:26   ` [PATCH v1 03/16] xprtrdma: Add vector of ops for each memory registration strategy Chuck Lever
     [not found]     ` <20150313212659.22783.28341.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-15  3:04       ` Sagi Grimberg
2015-03-13 21:27   ` [PATCH v1 04/16] xprtrdma: Add a "max_payload" op for each memreg mode Chuck Lever
     [not found]     ` <20150313212708.22783.70403.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-16 10:18       ` Sagi Grimberg
2015-03-13 21:27   ` [PATCH v1 05/16] xprtrdma: Add a "register_external" " Chuck Lever
     [not found]     ` <20150313212718.22783.10096.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-16 10:28       ` Sagi Grimberg
     [not found]         ` <5506B036.1040905-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-03-16 16:48           ` Chuck Lever
     [not found]             ` <982A021D-1B85-4AAF-89A3-302A21CF2B36-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2015-03-16 18:15               ` Sagi Grimberg
     [not found]                 ` <55071DBB.4050500-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-03-16 20:13                   ` Steve Wise
     [not found]                     ` <55073966.30006-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2015-03-16 22:11                       ` Chuck Lever
     [not found]                         ` <7595A8CB-B38B-4F01-A132-CE3BE143A897-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2015-03-21 11:53                           ` Sagi Grimberg
2015-03-13 21:27   ` [PATCH v1 06/16] xprtrdma: Add a "deregister_external" " Chuck Lever
     [not found]     ` <20150313212728.22783.11821.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-16 10:34       ` Sagi Grimberg
     [not found]         ` <5506B19F.80105-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-03-16 16:57           ` Chuck Lever
2015-03-24 11:12       ` Devesh Sharma
     [not found]         ` <EE7902D3F51F404C82415C4803930ACD5DC3A9C4-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
2015-03-24 14:54           ` Chuck Lever
2015-03-13 21:27   ` [PATCH v1 07/16] xprtrdma: Add "init MRs" memreg op Chuck Lever
     [not found]     ` <20150313212738.22783.34521.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-16 10:36       ` Sagi Grimberg
2015-03-13 21:27   ` [PATCH v1 08/16] xprtrdma: Add "reset " Chuck Lever
     [not found]     ` <20150313212747.22783.98300.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-16 10:38       ` Sagi Grimberg
2015-03-24 11:27       ` Devesh Sharma
     [not found]         ` <EE7902D3F51F404C82415C4803930ACD5DC3A9E3-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
2015-03-24 15:10           ` Chuck Lever
2015-03-13 21:27   ` [PATCH v1 09/16] xprtrdma: Add "destroy " Chuck Lever
     [not found]     ` <20150313212758.22783.67493.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-16 10:46       ` Sagi Grimberg
     [not found]         ` <5506B48F.6050902-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-03-16 16:59           ` Chuck Lever
2015-03-13 21:28   ` [PATCH v1 10/16] xprtrdma: Add "open" " Chuck Lever
     [not found]     ` <20150313212807.22783.61881.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-24 11:34       ` Devesh Sharma
     [not found]         ` <EE7902D3F51F404C82415C4803930ACD5DC3A9FF-DWYeeINJQrxExQ8dmkPuX0M9+F4ksjoh@public.gmane.org>
2015-03-24 15:29           ` Chuck Lever
2015-03-13 21:28   ` [PATCH v1 11/16] xprtrdma: Handle non-SEND completions via a callout Chuck Lever
     [not found]     ` <20150313212816.22783.49677.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-16 10:53       ` Sagi Grimberg
2015-03-13 21:28   ` [PATCH v1 12/16] xprtrdma: Acquire FMRs in rpcrdma_fmr_register_external() Chuck Lever
     [not found]     ` <20150313212825.22783.51384.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-16 12:28       ` Sagi Grimberg
     [not found]         ` <5506CC6C.8090106-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-03-16 17:03           ` Chuck Lever
2015-03-13 21:28   ` [PATCH v1 13/16] xprtrdma: Acquire MRs in rpcrdma_register_external() Chuck Lever
     [not found]     ` <20150313212835.22783.12326.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-16 12:44       ` Sagi Grimberg
     [not found]         ` <5506D02B.5050602-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-03-16 18:14           ` Chuck Lever
2015-03-13 21:28   ` [PATCH v1 14/16] xprtrdma: Remove rpcrdma_ia::ri_memreg_strategy Chuck Lever
     [not found]     ` <20150313212844.22783.1438.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-16 12:45       ` Sagi Grimberg
2015-03-13 21:28   ` [PATCH v1 15/16] xprtrdma: Make rpcrdma_{un}map_one() into inline functions Chuck Lever
     [not found]     ` <20150313212853.22783.62285.stgit-FYjufvaPoItvLzlybtyyYzGyq/o6K9yX@public.gmane.org>
2015-03-16 12:45       ` Sagi Grimberg
2015-03-13 21:29   ` [PATCH v1 16/16] xprtrdma: Split rb_lock Chuck Lever
  -- strict thread matches above, loose matches on Subject: below --
2015-03-13 21:21 [PATCH v1 00/16] NFS/RDMA patches proposed for 4.1 Chuck Lever
2015-03-13 21:22 ` [PATCH v1 06/16] xprtrdma: Add a "deregister_external" op for each memreg mode Chuck Lever
2015-03-17 14:37   ` Anna Schumaker
2015-03-17 15:04     ` Chuck Lever

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.