All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 0/2] Two small NFSD/RDMA scalability enhancements
@ 2021-01-21 20:52 Chuck Lever
  2021-01-21 20:53 ` [PATCH v1 1/2] svcrdma: Reduce Receive doorbell rate Chuck Lever
  2021-01-21 20:53 ` [PATCH v1 2/2] svcrdma: DMA-sync the receive buffer in svc_rdma_recvfrom() Chuck Lever
  0 siblings, 2 replies; 6+ messages in thread
From: Chuck Lever @ 2021-01-21 20:52 UTC (permalink / raw)
  To: linux-nfs, linux-rdma

The following series reduces the work done by the NFS/RDMA server-
side Receive completion handler in two ways. Details are in the
patch descriptions.

The Receive completion handler is single-threaded per transport. The
less work the handler has to do per completion, the better the
server's receive code scales.

---

Chuck Lever (2):
      svcrdma: Reduce Receive doorbell rate
      svcrdma: DMA-sync the receive buffer in svc_rdma_recvfrom()


 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

--
Chuck Lever


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v1 1/2] svcrdma: Reduce Receive doorbell rate
  2021-01-21 20:52 [PATCH v1 0/2] Two small NFSD/RDMA scalability enhancements Chuck Lever
@ 2021-01-21 20:53 ` Chuck Lever
  2021-01-21 20:53 ` [PATCH v1 2/2] svcrdma: DMA-sync the receive buffer in svc_rdma_recvfrom() Chuck Lever
  1 sibling, 0 replies; 6+ messages in thread
From: Chuck Lever @ 2021-01-21 20:53 UTC (permalink / raw)
  To: linux-nfs, linux-rdma

This is similar to commit e340c2d6ef2a ("xprtrdma: Reduce the
doorbell rate (Receive)") which added Receive batching to the
client.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h         |    1 
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c |   82 ++++++++++++++++---------------
 2 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 1e76ed688044..7c693b31965e 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -104,6 +104,7 @@ struct svcxprt_rdma {
 
 	wait_queue_head_t    sc_send_wait;	/* SQ exhaustion waitlist */
 	unsigned long	     sc_flags;
+	u32		     sc_pending_recvs;
 	struct list_head     sc_read_complete_q;
 	struct work_struct   sc_work;
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 7d14a74df716..ab0b7e9777bc 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -266,33 +266,46 @@ void svc_rdma_release_rqst(struct svc_rqst *rqstp)
 		svc_rdma_recv_ctxt_put(rdma, ctxt);
 }
 
-static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
-				struct svc_rdma_recv_ctxt *ctxt)
+static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
+				   unsigned int wanted, bool temp)
 {
+	const struct ib_recv_wr *bad_wr = NULL;
+	struct svc_rdma_recv_ctxt *ctxt;
+	struct ib_recv_wr *recv_chain;
 	int ret;
 
-	trace_svcrdma_post_recv(ctxt);
-	ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL);
+	recv_chain = NULL;
+	while (wanted--) {
+		ctxt = svc_rdma_recv_ctxt_get(rdma);
+		if (!ctxt)
+			break;
+
+		trace_svcrdma_post_recv(ctxt);
+		ctxt->rc_temp = temp;
+		ctxt->rc_recv_wr.next = recv_chain;
+		recv_chain = &ctxt->rc_recv_wr;
+		rdma->sc_pending_recvs++;
+	}
+	if (!recv_chain)
+		return false;
+
+	ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr);
 	if (ret)
 		goto err_post;
-	return 0;
+	return true;
 
 err_post:
-	trace_svcrdma_rq_post_err(rdma, ret);
-	svc_rdma_recv_ctxt_put(rdma, ctxt);
-	return ret;
-}
-
-static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
-{
-	struct svc_rdma_recv_ctxt *ctxt;
+	while (bad_wr) {
+		ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt,
+				    rc_recv_wr);
+		bad_wr = bad_wr->next;
+		svc_rdma_recv_ctxt_put(rdma, ctxt);
+	}
 
-	if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
-		return 0;
-	ctxt = svc_rdma_recv_ctxt_get(rdma);
-	if (!ctxt)
-		return -ENOMEM;
-	return __svc_rdma_post_recv(rdma, ctxt);
+	trace_svcrdma_rq_post_err(rdma, ret);
+	/* Since we're destroying the xprt, no need to reset
+	 * sc_pending_recvs. */
+	return false;
 }
 
 /**
@@ -303,20 +316,7 @@ static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
  */
 bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
 {
-	struct svc_rdma_recv_ctxt *ctxt;
-	unsigned int i;
-	int ret;
-
-	for (i = 0; i < rdma->sc_max_requests; i++) {
-		ctxt = svc_rdma_recv_ctxt_get(rdma);
-		if (!ctxt)
-			return false;
-		ctxt->rc_temp = true;
-		ret = __svc_rdma_post_recv(rdma, ctxt);
-		if (ret)
-			return false;
-	}
-	return true;
+	return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true);
 }
 
 /**
@@ -324,8 +324,6 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
  * @cq: Completion Queue context
  * @wc: Work Completion object
  *
- * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that
- * the Receive completion handler could be running.
  */
 static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 {
@@ -333,6 +331,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 	struct ib_cqe *cqe = wc->wr_cqe;
 	struct svc_rdma_recv_ctxt *ctxt;
 
+	rdma->sc_pending_recvs--;
+
 	/* WARNING: Only wc->wr_cqe and wc->status are reliable */
 	ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);
 
@@ -340,9 +340,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 	if (wc->status != IB_WC_SUCCESS)
 		goto flushed;
 
-	if (svc_rdma_post_recv(rdma))
-		goto post_err;
-
 	/* All wc fields are now known to be valid */
 	ctxt->rc_byte_len = wc->byte_len;
 	ib_dma_sync_single_for_cpu(rdma->sc_pd->device,
@@ -356,11 +353,18 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 	spin_unlock(&rdma->sc_rq_dto_lock);
 	if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
 		svc_xprt_enqueue(&rdma->sc_xprt);
+
+	if (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) &&
+	    rdma->sc_pending_recvs < rdma->sc_max_requests)
+		if (!svc_rdma_refresh_recvs(rdma, RPCRDMA_MAX_RECV_BATCH,
+					    false))
+			goto post_err;
+
 	return;
 
 flushed:
-post_err:
 	svc_rdma_recv_ctxt_put(rdma, ctxt);
+post_err:
 	set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
 	svc_xprt_enqueue(&rdma->sc_xprt);
 }



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v1 2/2] svcrdma: DMA-sync the receive buffer in svc_rdma_recvfrom()
  2021-01-21 20:52 [PATCH v1 0/2] Two small NFSD/RDMA scalability enhancements Chuck Lever
  2021-01-21 20:53 ` [PATCH v1 1/2] svcrdma: Reduce Receive doorbell rate Chuck Lever
@ 2021-01-21 20:53 ` Chuck Lever
  2021-01-22  7:58   ` Christoph Hellwig
  2021-01-22 14:37   ` Tom Talpey
  1 sibling, 2 replies; 6+ messages in thread
From: Chuck Lever @ 2021-01-21 20:53 UTC (permalink / raw)
  To: linux-nfs, linux-rdma

The Receive completion handler doesn't look at the contents of the
Receive buffer. The DMA sync isn't terribly expensive but it's one
less thing that needs to be done by the Receive completion handler,
which is single-threaded (per svc_xprt). This helps scalability.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index ab0b7e9777bc..6d28f23ceb35 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -342,9 +342,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 
 	/* All wc fields are now known to be valid */
 	ctxt->rc_byte_len = wc->byte_len;
-	ib_dma_sync_single_for_cpu(rdma->sc_pd->device,
-				   ctxt->rc_recv_sge.addr,
-				   wc->byte_len, DMA_FROM_DEVICE);
 
 	spin_lock(&rdma->sc_rq_dto_lock);
 	list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q);
@@ -851,6 +848,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	spin_unlock(&rdma_xprt->sc_rq_dto_lock);
 	percpu_counter_inc(&svcrdma_stat_recv);
 
+	ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device,
+				   ctxt->rc_recv_sge.addr, ctxt->rc_byte_len,
+				   DMA_FROM_DEVICE);
 	svc_rdma_build_arg_xdr(rqstp, ctxt);
 
 	/* Prevent svc_xprt_release from releasing pages in rq_pages



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v1 2/2] svcrdma: DMA-sync the receive buffer in svc_rdma_recvfrom()
  2021-01-21 20:53 ` [PATCH v1 2/2] svcrdma: DMA-sync the receive buffer in svc_rdma_recvfrom() Chuck Lever
@ 2021-01-22  7:58   ` Christoph Hellwig
  2021-01-22 14:37   ` Tom Talpey
  1 sibling, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2021-01-22  7:58 UTC (permalink / raw)
  To: Chuck Lever; +Cc: linux-nfs, linux-rdma

On Thu, Jan 21, 2021 at 03:53:12PM -0500, Chuck Lever wrote:
> The Receive completion handler doesn't look at the contents of the
> Receive buffer. The DMA sync isn't terribly expensive but it's one
> less thing that needs to be done by the Receive completion handler,
> which is single-threaded (per svc_xprt). This helps scalability.

On dma-noncoherent systems that have speculative execution (e.g. a lot
of ARM systems) it can be fairly expensive, so for those this a very
good thing.

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v1 2/2] svcrdma: DMA-sync the receive buffer in svc_rdma_recvfrom()
  2021-01-21 20:53 ` [PATCH v1 2/2] svcrdma: DMA-sync the receive buffer in svc_rdma_recvfrom() Chuck Lever
  2021-01-22  7:58   ` Christoph Hellwig
@ 2021-01-22 14:37   ` Tom Talpey
  2021-01-22 17:26     ` Christoph Hellwig
  1 sibling, 1 reply; 6+ messages in thread
From: Tom Talpey @ 2021-01-22 14:37 UTC (permalink / raw)
  To: Chuck Lever, linux-nfs, linux-rdma

Is there an asynchronous version of ib_dma_sync? Because it flushes
DMA pipelines, I'm wondering if kicking it off early might improve
latency, getting it done before svc_rdma_recvfrom() needs to dig
into the contents.

Tom.

On 1/21/2021 3:53 PM, Chuck Lever wrote:
> The Receive completion handler doesn't look at the contents of the
> Receive buffer. The DMA sync isn't terribly expensive but it's one
> less thing that needs to be done by the Receive completion handler,
> which is single-threaded (per svc_xprt). This helps scalability.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>   net/sunrpc/xprtrdma/svc_rdma_recvfrom.c |    6 +++---
>   1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> index ab0b7e9777bc..6d28f23ceb35 100644
> --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> @@ -342,9 +342,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
>   
>   	/* All wc fields are now known to be valid */
>   	ctxt->rc_byte_len = wc->byte_len;
> -	ib_dma_sync_single_for_cpu(rdma->sc_pd->device,
> -				   ctxt->rc_recv_sge.addr,
> -				   wc->byte_len, DMA_FROM_DEVICE);
>   
>   	spin_lock(&rdma->sc_rq_dto_lock);
>   	list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q);
> @@ -851,6 +848,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
>   	spin_unlock(&rdma_xprt->sc_rq_dto_lock);
>   	percpu_counter_inc(&svcrdma_stat_recv);
>   
> +	ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device,
> +				   ctxt->rc_recv_sge.addr, ctxt->rc_byte_len,
> +				   DMA_FROM_DEVICE);
>   	svc_rdma_build_arg_xdr(rqstp, ctxt);
>   
>   	/* Prevent svc_xprt_release from releasing pages in rq_pages
> 
> 
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v1 2/2] svcrdma: DMA-sync the receive buffer in svc_rdma_recvfrom()
  2021-01-22 14:37   ` Tom Talpey
@ 2021-01-22 17:26     ` Christoph Hellwig
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2021-01-22 17:26 UTC (permalink / raw)
  To: Tom Talpey; +Cc: Chuck Lever, linux-nfs, linux-rdma

On Fri, Jan 22, 2021 at 09:37:02AM -0500, Tom Talpey wrote:
> Is there an asynchronous version of ib_dma_sync?

No.  These routines basically compile down to cache writeback and/or
invalidate instructions without much logic around them.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-01-22 17:51 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-21 20:52 [PATCH v1 0/2] Two small NFSD/RDMA scalability enhancements Chuck Lever
2021-01-21 20:53 ` [PATCH v1 1/2] svcrdma: Reduce Receive doorbell rate Chuck Lever
2021-01-21 20:53 ` [PATCH v1 2/2] svcrdma: DMA-sync the receive buffer in svc_rdma_recvfrom() Chuck Lever
2021-01-22  7:58   ` Christoph Hellwig
2021-01-22 14:37   ` Tom Talpey
2021-01-22 17:26     ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.