All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chuck Lever <chuck.lever@oracle.com>
To: anna.schumaker@netapp.com
Cc: linux-nfs@vger.kernel.org, linux-rdma@vger.kernel.org
Subject: [PATCH v2 15/21] xprtrdma: Cache free MRs in each rpcrdma_req
Date: Mon, 19 Aug 2019 18:47:10 -0400	[thread overview]
Message-ID: <156625481040.8161.13621119408884656142.stgit@seurat29.1015granger.net> (raw)
In-Reply-To: <156625401091.8161.14744201497689200191.stgit@seurat29.1015granger.net>

Instead of a globally-contended MR free list, cache MRs in each
rpcrdma_req as they are released. This means acquiring and releasing
an MR will be lock-free in the common case, even outside the
transport send lock.

The original idea of per-rpcrdma_req MR free lists was suggested by
Shirley Ma <shirley.ma@oracle.com> several years ago. I just now
figured out how to make that idea work with on-demand MR allocation.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/trace/events/rpcrdma.h  |   38 ++++++++++++++++++++++++++++++++++++--
 net/sunrpc/xprtrdma/frwr_ops.c  |    9 ++++++---
 net/sunrpc/xprtrdma/rpc_rdma.c  |   11 ++++++++---
 net/sunrpc/xprtrdma/verbs.c     |   18 +++++++++++++++---
 net/sunrpc/xprtrdma/xprt_rdma.h |    7 ++++---
 5 files changed, 69 insertions(+), 14 deletions(-)

diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 83c4dfd7feea..a13830616107 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -451,16 +451,50 @@ TRACE_EVENT(xprtrdma_createmrs,
 
 	TP_STRUCT__entry(
 		__field(const void *, r_xprt)
+		__string(addr, rpcrdma_addrstr(r_xprt))
+		__string(port, rpcrdma_portstr(r_xprt))
 		__field(unsigned int, count)
 	),
 
 	TP_fast_assign(
 		__entry->r_xprt = r_xprt;
 		__entry->count = count;
+		__assign_str(addr, rpcrdma_addrstr(r_xprt));
+		__assign_str(port, rpcrdma_portstr(r_xprt));
 	),
 
-	TP_printk("r_xprt=%p: created %u MRs",
-		__entry->r_xprt, __entry->count
+	TP_printk("peer=[%s]:%s r_xprt=%p: created %u MRs",
+		__get_str(addr), __get_str(port), __entry->r_xprt,
+		__entry->count
+	)
+);
+
+TRACE_EVENT(xprtrdma_mr_get,
+	TP_PROTO(
+		const struct rpcrdma_req *req
+	),
+
+	TP_ARGS(req),
+
+	TP_STRUCT__entry(
+		__field(const void *, req)
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(u32, xid)
+	),
+
+	TP_fast_assign(
+		const struct rpc_rqst *rqst = &req->rl_slot;
+
+		__entry->req = req;
+		__entry->task_id = rqst->rq_task->tk_pid;
+		__entry->client_id = rqst->rq_task->tk_client->cl_clid;
+		__entry->xid = be32_to_cpu(rqst->rq_xid);
+	),
+
+	TP_printk("task:%u@%u xid=0x%08x req=%p",
+		__entry->task_id, __entry->client_id, __entry->xid,
+		__entry->req
 	)
 );
 
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 1f2e3dda7401..0e740bae2d80 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -488,8 +488,8 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
 
 	/* WARNING: Only wr_cqe and status are reliable at this point */
 	trace_xprtrdma_wc_li_wake(wc, frwr);
-	complete(&frwr->fr_linv_done);
 	__frwr_release_mr(wc, mr);
+	complete(&frwr->fr_linv_done);
 }
 
 /**
@@ -587,11 +587,15 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
 	struct rpcrdma_frwr *frwr =
 		container_of(cqe, struct rpcrdma_frwr, fr_cqe);
 	struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+	struct rpcrdma_rep *rep = mr->mr_req->rl_reply;
 
 	/* WARNING: Only wr_cqe and status are reliable at this point */
 	trace_xprtrdma_wc_li_done(wc, frwr);
-	rpcrdma_complete_rqst(frwr->fr_req->rl_reply);
 	__frwr_release_mr(wc, mr);
+
+	/* Ensure @rep is generated before __frwr_release_mr */
+	smp_rmb();
+	rpcrdma_complete_rqst(rep);
 }
 
 /**
@@ -624,7 +628,6 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 
 		frwr = &mr->frwr;
 		frwr->fr_cqe.done = frwr_wc_localinv;
-		frwr->fr_req = req;
 		last = &frwr->fr_invwr;
 		last->next = NULL;
 		last->wr_cqe = &frwr->fr_cqe;
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 34772cb19286..ffeb4dfebd46 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -348,9 +348,14 @@ static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
 						 int nsegs, bool writing,
 						 struct rpcrdma_mr **mr)
 {
-	*mr = rpcrdma_mr_get(r_xprt);
-	if (!*mr)
-		goto out_getmr_err;
+	*mr = rpcrdma_mr_pop(&req->rl_free_mrs);
+	if (!*mr) {
+		*mr = rpcrdma_mr_get(r_xprt);
+		if (!*mr)
+			goto out_getmr_err;
+		trace_xprtrdma_mr_get(req);
+		(*mr)->mr_req = req;
+	}
 
 	rpcrdma_mr_push(*mr, &req->rl_registered);
 	return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index cb6df58488bb..69753ec73c36 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -77,6 +77,7 @@
 static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
 static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
+static void rpcrdma_mr_free(struct rpcrdma_mr *mr);
 static struct rpcrdma_regbuf *
 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
 		     gfp_t flags);
@@ -1022,6 +1023,7 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
 	if (!req->rl_recvbuf)
 		goto out4;
 
+	INIT_LIST_HEAD(&req->rl_free_mrs);
 	INIT_LIST_HEAD(&req->rl_registered);
 	spin_lock(&buffer->rb_lock);
 	list_add(&req->rl_all, &buffer->rb_allreqs);
@@ -1130,11 +1132,13 @@ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
  * This function assumes that the caller prevents concurrent device
  * unload and transport tear-down.
  */
-void
-rpcrdma_req_destroy(struct rpcrdma_req *req)
+void rpcrdma_req_destroy(struct rpcrdma_req *req)
 {
 	list_del(&req->rl_all);
 
+	while (!list_empty(&req->rl_free_mrs))
+		rpcrdma_mr_free(rpcrdma_mr_pop(&req->rl_free_mrs));
+
 	rpcrdma_regbuf_free(req->rl_recvbuf);
 	rpcrdma_regbuf_free(req->rl_sendbuf);
 	rpcrdma_regbuf_free(req->rl_rdmabuf);
@@ -1228,7 +1232,6 @@ rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
 void rpcrdma_mr_put(struct rpcrdma_mr *mr)
 {
 	struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 
 	if (mr->mr_dir != DMA_NONE) {
 		trace_xprtrdma_mr_unmap(mr);
@@ -1237,6 +1240,15 @@ void rpcrdma_mr_put(struct rpcrdma_mr *mr)
 		mr->mr_dir = DMA_NONE;
 	}
 
+	rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
+}
+
+static void rpcrdma_mr_free(struct rpcrdma_mr *mr)
+{
+	struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+
+	mr->mr_req = NULL;
 	spin_lock(&buf->rb_mrlock);
 	rpcrdma_mr_push(mr, &buf->rb_mrs);
 	spin_unlock(&buf->rb_mrlock);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 9573587ca602..c375b0e434ac 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -234,20 +234,20 @@ struct rpcrdma_sendctx {
  * An external memory region is any buffer or page that is registered
  * on the fly (ie, not pre-registered).
  */
-struct rpcrdma_req;
 struct rpcrdma_frwr {
 	struct ib_mr			*fr_mr;
 	struct ib_cqe			fr_cqe;
 	struct completion		fr_linv_done;
-	struct rpcrdma_req		*fr_req;
 	union {
 		struct ib_reg_wr	fr_regwr;
 		struct ib_send_wr	fr_invwr;
 	};
 };
 
+struct rpcrdma_req;
 struct rpcrdma_mr {
 	struct list_head	mr_list;
+	struct rpcrdma_req	*mr_req;
 	struct scatterlist	*mr_sg;
 	int			mr_nents;
 	enum dma_data_direction	mr_dir;
@@ -325,7 +325,8 @@ struct rpcrdma_req {
 	struct list_head	rl_all;
 	struct kref		rl_kref;
 
-	struct list_head	rl_registered;	/* registered segments */
+	struct list_head	rl_free_mrs;
+	struct list_head	rl_registered;
 	struct rpcrdma_mr_seg	rl_segments[RPCRDMA_MAX_SEGS];
 };
 


  parent reply	other threads:[~2019-08-19 22:47 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-19 22:35 [PATCH v2 00/21] NFS/RDMA client-side for-5.4 Chuck Lever
2019-08-19 22:36 ` [PATCH v2 01/21] SUNRPC: Remove rpc_wake_up_queued_task_on_wq() Chuck Lever
2019-08-19 22:37 ` [PATCH v2 02/21] SUNRPC: Inline xdr_commit_encode Chuck Lever
2019-08-19 22:37 ` [PATCH v2 03/21] xprtrdma: Refresh the documenting comment in frwr_ops.c Chuck Lever
2019-08-19 22:38 ` [PATCH v2 04/21] xprtrdma: Update obsolete comment Chuck Lever
2019-08-19 22:39 ` [PATCH v2 05/21] xprtrdma: Fix calculation of ri_max_segs again Chuck Lever
2019-08-19 22:40 ` [PATCH v2 06/21] xprtrdma: Boost maximum transport header size Chuck Lever
2019-08-19 22:40 ` [PATCH v2 07/21] xprtrdma: Boost client's max slot table size to match Linux server Chuck Lever
2019-08-19 22:41 ` [PATCH v2 08/21] xprtrdma: Rename CQE field in Receive trace points Chuck Lever
2019-08-19 22:42 ` [PATCH v2 09/21] xprtrdma: Rename rpcrdma_buffer::rb_all Chuck Lever
2019-08-19 22:43 ` [PATCH v2 10/21] xprtrdma: Toggle XPRT_CONGESTED in xprtrdma's slot methods Chuck Lever
2019-08-19 22:44 ` [PATCH v2 11/21] xprtrdma: Simplify rpcrdma_mr_pop Chuck Lever
2019-08-19 22:44 ` [PATCH v2 12/21] xprtrdma: Combine rpcrdma_mr_put and rpcrdma_mr_unmap_and_put Chuck Lever
2019-08-19 22:45 ` [PATCH v2 13/21] xprtrdma: Move rpcrdma_mr_get out of frwr_map Chuck Lever
2019-08-19 22:46 ` [PATCH v2 14/21] xprtrdma: Ensure creating an MR does not trigger FS writeback Chuck Lever
2019-08-19 22:47 ` Chuck Lever [this message]
2019-08-19 22:47 ` [PATCH v2 16/21] xprtrdma: Remove rpcrdma_buffer::rb_mrlock Chuck Lever
2019-08-19 22:48 ` [PATCH v2 17/21] xprtrdma: Use an llist to manage free rpcrdma_reps Chuck Lever
2019-08-19 22:49 ` [PATCH v2 18/21] xprtrdma: Clean up xprt_rdma_set_connect_timeout() Chuck Lever
2019-08-19 22:50 ` [PATCH v2 19/21] xprtrdma: Fix bc_max_slots return value Chuck Lever
2019-08-19 22:51 ` [PATCH v2 20/21] xprtrdma: Inline XDR chunk encoder functions Chuck Lever
2019-08-19 22:51 ` [PATCH v2 21/21] xprtrdma: Optimize rpcrdma_post_recvs() Chuck Lever
2019-08-22 20:18 ` [PATCH v2 00/21] NFS/RDMA client-side for-5.4 Schumaker, Anna

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=156625481040.8161.13621119408884656142.stgit@seurat29.1015granger.net \
    --to=chuck.lever@oracle.com \
    --cc=anna.schumaker@netapp.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.