Linux-RDMA Archive on lore.kernel.org
 help / color / Atom feed
From: Chuck Lever <chuck.lever@oracle.com>
To: bfields@fieldses.org
Cc: linux-nfs@vger.kernel.org, linux-rdma@vger.kernel.org
Subject: [PATCH v3 01/32] SUNRPC: Move xpt_mutex into socket xpo_sendto methods
Date: Thu, 21 May 2020 10:33:48 -0400
Message-ID: <20200521143348.3557.45005.stgit@klimt.1015granger.net> (raw)
In-Reply-To: <20200521141100.3557.17098.stgit@klimt.1015granger.net>

It appears that the RPC/RDMA transport does not need serialization
of calls to its xpo_sendto method. Move the mutex into the socket
methods that still need that serialization.

Tail latencies are unambiguously better with this patch applied.
fio randrw 8KB 70/30 on NFSv3, smaller numbers are better:

    clat percentiles (usec):

With xpt_mutex:
r    | 99.99th=[ 8848]
w    | 99.99th=[ 9634]

Without xpt_mutex:
r    | 99.99th=[ 8586]
w    | 99.99th=[ 8979]

Serializing the construction of RPC/RDMA transport headers is not
really necessary at this point, because the Linux NFS server
implementation never changes its credit grant on a connection. If
that should change, then svc_rdma_sendto will need to serialize
access to the transport's credit grant fields.

Reported-by: kbuild test robot <lkp@intel.com>
[ cel: fix uninitialized variable warning ]
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_xprt.h            |    6 +++++
 net/sunrpc/svc_xprt.c                      |   12 ++--------
 net/sunrpc/svcsock.c                       |   25 ++++++++++++++++++++
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c |   35 +++++++++++++---------------
 net/sunrpc/xprtrdma/svc_rdma_sendto.c      |   10 +++-----
 net/sunrpc/xprtsock.c                      |   12 ++++++++--
 6 files changed, 64 insertions(+), 36 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 9e1e046de176..aca35ab5cff2 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -117,6 +117,12 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u
 	return 0;
 }
 
+static inline bool svc_xprt_is_dead(const struct svc_xprt *xprt)
+{
+	return (test_bit(XPT_DEAD, &xprt->xpt_flags) != 0) ||
+		(test_bit(XPT_CLOSE, &xprt->xpt_flags) != 0);
+}
+
 int	svc_reg_xprt_class(struct svc_xprt_class *);
 void	svc_unreg_xprt_class(struct svc_xprt_class *);
 void	svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 2284ff038dad..07cdbf7d5764 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -914,16 +914,10 @@ int svc_send(struct svc_rqst *rqstp)
 		xb->page_len +
 		xb->tail[0].iov_len;
 	trace_svc_sendto(xb);
-
-	/* Grab mutex to serialize outgoing data. */
-	mutex_lock(&xprt->xpt_mutex);
 	trace_svc_stats_latency(rqstp);
-	if (test_bit(XPT_DEAD, &xprt->xpt_flags)
-			|| test_bit(XPT_CLOSE, &xprt->xpt_flags))
-		len = -ENOTCONN;
-	else
-		len = xprt->xpt_ops->xpo_sendto(rqstp);
-	mutex_unlock(&xprt->xpt_mutex);
+
+	len = xprt->xpt_ops->xpo_sendto(rqstp);
+
 	trace_svc_send(rqstp, len);
 	svc_xprt_release(rqstp);
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 023514e392b3..3e7b6445e317 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -506,6 +506,9 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
  * svc_udp_sendto - Send out a reply on a UDP socket
  * @rqstp: completed svc_rqst
  *
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
  * Returns the number of bytes sent, or a negative errno.
  */
 static int svc_udp_sendto(struct svc_rqst *rqstp)
@@ -531,6 +534,11 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
 
 	svc_set_cmsg_data(rqstp, cmh);
 
+	mutex_lock(&xprt->xpt_mutex);
+
+	if (svc_xprt_is_dead(xprt))
+		goto out_notconn;
+
 	err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
 	xdr_free_bvec(xdr);
 	if (err == -ECONNREFUSED) {
@@ -538,9 +546,15 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
 		err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
 		xdr_free_bvec(xdr);
 	}
+
+	mutex_unlock(&xprt->xpt_mutex);
 	if (err < 0)
 		return err;
 	return sent;
+
+out_notconn:
+	mutex_unlock(&xprt->xpt_mutex);
+	return -ENOTCONN;
 }
 
 static int svc_udp_has_wspace(struct svc_xprt *xprt)
@@ -1063,6 +1077,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
  * svc_tcp_sendto - Send out a reply on a TCP socket
  * @rqstp: completed svc_rqst
  *
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
  * Returns the number of bytes sent, or a negative errno.
  */
 static int svc_tcp_sendto(struct svc_rqst *rqstp)
@@ -1080,12 +1097,19 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
 
 	svc_release_skb(rqstp);
 
+	mutex_lock(&xprt->xpt_mutex);
+	if (svc_xprt_is_dead(xprt))
+		goto out_notconn;
 	err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
 	xdr_free_bvec(xdr);
 	if (err < 0 || sent != (xdr->len + sizeof(marker)))
 		goto out_close;
+	mutex_unlock(&xprt->xpt_mutex);
 	return sent;
 
+out_notconn:
+	mutex_unlock(&xprt->xpt_mutex);
+	return -ENOTCONN;
 out_close:
 	pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
 		  xprt->xpt_server->sv_name,
@@ -1093,6 +1117,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
 		  (err < 0) ? err : sent, xdr->len);
 	set_bit(XPT_CLOSE, &xprt->xpt_flags);
 	svc_xprt_enqueue(xprt);
+	mutex_unlock(&xprt->xpt_mutex);
 	return -EAGAIN;
 }
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index af7eb8d202ae..d9aab4504f2c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -210,34 +210,31 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
 	return -ENOTCONN;
 }
 
-/* Send an RPC call on the passive end of a transport
- * connection.
+/**
+ * xprt_rdma_bc_send_request - Send a reverse-direction Call
+ * @rqst: rpc_rqst containing Call message to be sent
+ *
+ * Return values:
+ *   %0 if the message was sent successfully
+ *   %ENOTCONN if the message was not sent
  */
-static int
-xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
+static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
 {
 	struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
-	struct svcxprt_rdma *rdma;
+	struct svcxprt_rdma *rdma =
+		container_of(sxprt, struct svcxprt_rdma, sc_xprt);
 	int ret;
 
 	dprintk("svcrdma: sending bc call with xid: %08x\n",
 		be32_to_cpu(rqst->rq_xid));
 
-	mutex_lock(&sxprt->xpt_mutex);
-
-	ret = -ENOTCONN;
-	rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
-	if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) {
-		ret = rpcrdma_bc_send_request(rdma, rqst);
-		if (ret == -ENOTCONN)
-			svc_close_xprt(sxprt);
-	}
+	if (test_bit(XPT_DEAD, &sxprt->xpt_flags))
+		return -ENOTCONN;
 
-	mutex_unlock(&sxprt->xpt_mutex);
-
-	if (ret < 0)
-		return ret;
-	return 0;
+	ret = rpcrdma_bc_send_request(rdma, rqst);
+	if (ret == -ENOTCONN)
+		svc_close_xprt(sxprt);
+	return ret;
 }
 
 static void
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b6c8643867f2..38e7c3c8c4a9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -868,12 +868,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	__be32 *p;
 	int ret;
 
-	/* Create the RDMA response header. xprt->xpt_mutex,
-	 * acquired in svc_send(), serializes RPC replies. The
-	 * code path below that inserts the credit grant value
-	 * into each transport header runs only inside this
-	 * critical section.
-	 */
+	ret = -ENOTCONN;
+	if (svc_xprt_is_dead(xprt))
+		goto err0;
+
 	ret = -ENOMEM;
 	sctxt = svc_rdma_send_ctxt_get(rdma);
 	if (!sctxt)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 845d0be805ec..839c49330785 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2548,8 +2548,16 @@ static int bc_sendto(struct rpc_rqst *req)
 	return sent;
 }
 
-/*
- * The send routine. Borrows from svc_send
+/**
+ * bc_send_request - Send a backchannel Call on a TCP socket
+ * @req: rpc_rqst containing Call message to be sent
+ *
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
+ * Return values:
+ *   %0 if the message was sent successfully
+ *   %ENOTCONN if the message was not sent
  */
 static int bc_send_request(struct rpc_rqst *req)
 {


  reply index

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-21 14:33 [PATCH v3 00/32] Possible NFSD patches for v5.8 Chuck Lever
2020-05-21 14:33 ` Chuck Lever [this message]
2020-05-21 14:33 ` [PATCH v3 02/32] svcrdma: Clean up the tracing for rw_ctx_init errors Chuck Lever
2020-05-21 14:33 ` [PATCH v3 03/32] svcrdma: Clean up handling of get_rw_ctx errors Chuck Lever
2020-05-21 14:34 ` [PATCH v3 04/32] svcrdma: Trace page overruns when constructing RDMA Reads Chuck Lever
2020-05-21 14:34 ` [PATCH v3 05/32] svcrdma: trace undersized Write chunks Chuck Lever
2020-05-21 14:34 ` [PATCH v3 06/32] svcrdma: Fix backchannel return code Chuck Lever
2020-05-21 14:34 ` [PATCH v3 07/32] svcrdma: Remove backchannel dprintk call sites Chuck Lever
2020-05-21 14:34 ` [PATCH v3 08/32] svcrdma: Rename tracepoints that record header decoding errors Chuck Lever
2020-05-21 14:34 ` [PATCH v3 09/32] svcrdma: Remove the SVCRDMA_DEBUG macro Chuck Lever
2020-05-21 14:34 ` [PATCH v3 10/32] svcrdma: Displayed remote IP address should match stored address Chuck Lever
2020-05-21 14:34 ` [PATCH v3 11/32] svcrdma: Add tracepoints to report ->xpo_accept failures Chuck Lever
2020-05-21 14:34 ` [PATCH v3 12/32] SUNRPC: Remove kernel memory address from svc_xprt tracepoints Chuck Lever
2020-05-21 14:34 ` [PATCH v3 13/32] SUNRPC: Tracepoint to record errors in svc_xpo_create() Chuck Lever
2020-05-21 14:34 ` [PATCH v3 14/32] SUNRPC: Trace a few more generic svc_xprt events Chuck Lever
2020-05-21 14:35 ` [PATCH v3 15/32] SUNRPC: Remove "#include <trace/events/skb.h>" Chuck Lever
2020-05-21 14:35 ` [PATCH v3 16/32] SUNRPC: Add more svcsock tracepoints Chuck Lever
2020-05-21 14:35 ` [PATCH v3 17/32] SUNRPC: Replace dprintk call sites in TCP state change callouts Chuck Lever
2020-05-21 14:35 ` [PATCH v3 18/32] SUNRPC: Trace server-side rpcbind registration events Chuck Lever
2020-05-21 14:35 ` [PATCH v3 19/32] SUNRPC: Rename svc_sock::sk_reclen Chuck Lever
2020-05-21 14:35 ` [PATCH v3 20/32] SUNRPC: Restructure svc_tcp_recv_record() Chuck Lever
2020-05-21 14:35 ` [PATCH v3 21/32] SUNRPC: Replace dprintk() call sites in TCP receive path Chuck Lever
2020-05-21 14:35 ` [PATCH v3 22/32] SUNRPC: Refactor recvfrom path dealing with incomplete TCP receives Chuck Lever
2020-05-21 14:35 ` [PATCH v3 23/32] SUNRPC: Clean up svc_release_skb() functions Chuck Lever
2020-05-21 14:35 ` [PATCH v3 24/32] SUNRPC: Refactor svc_recvfrom() Chuck Lever
2020-05-21 14:35 ` [PATCH v3 25/32] SUNRPC: Restructure svc_udp_recvfrom() Chuck Lever
2020-05-21 14:36 ` [PATCH v3 26/32] SUNRPC: svc_show_status() macro should have enum definitions Chuck Lever
2020-05-21 14:36 ` [PATCH v3 27/32] NFSD: Add tracepoints to NFSD's duplicate reply cache Chuck Lever
2020-05-21 14:36 ` [PATCH v3 28/32] NFSD: Add tracepoints to the NFSD state management code Chuck Lever
2020-05-21 14:36 ` [PATCH v3 29/32] NFSD: Add tracepoints for monitoring NFSD callbacks Chuck Lever
2020-05-21 14:36 ` [PATCH v3 30/32] SUNRPC: Clean up request deferral tracepoints Chuck Lever
2020-05-21 14:36 ` [PATCH v3 31/32] NFSD: Squash an annoying compiler warning Chuck Lever
2020-05-21 14:36 ` [PATCH v3 32/32] NFSD: Fix improperly-formatted Doxygen comments Chuck Lever

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200521143348.3557.45005.stgit@klimt.1015granger.net \
    --to=chuck.lever@oracle.com \
    --cc=bfields@fieldses.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-RDMA Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-rdma/0 linux-rdma/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-rdma linux-rdma/ https://lore.kernel.org/linux-rdma \
		linux-rdma@vger.kernel.org
	public-inbox-index linux-rdma

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-rdma


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git